In [1]:
from pathlib import Path
import cv2
from PIL import Image
import random

import matplotlib.pyplot as plt
import seaborn as sns

import pandas as pd
import numpy as np

import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import SequentialSampler
import tensorflow as tf
import tensorflow.keras.layers as tfl

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device('cuda')
device=torch.device(device)

In [3]:
def collate_fn(batch):
    return tuple(zip(*batch))

In [4]:
class TrafficData(Dataset):
    def __init__(self, df, image_dir, transforms=None):
        self.image_ids = df['Path'].unique()
        self.df = df
        self.image_dir = image_dir
        self.transforms = transforms

    def __getitem__(self, index, size = [100, 100]):
        image_path = self.image_ids[index]
        records = self.df[self.df['Path'] == image_path]

        #print(f'{self.image_dir}/{image_path}')
        image = cv2.imread(f'{self.image_dir}/{image_path}', cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = cv2.resize(image, size)
        image = image.astype(float) / 255.0

        target = records['ClassId'].values

        if self.transforms:
            image = self.transforms(**image)

        return image, target, image_path

    def __len__(self) -> int:
        return self.image_ids.shape[0]

    @staticmethod
    def create_dataset(df, dir, transform=None):
       dataset = TrafficData(df, dir)
       return dataset

    @staticmethod 
    def loader(dataset, batch_size, num_workers=0):
       data_loader = DataLoader(
          dataset,
          batch_size=batch_size,
          shuffle=True,
          num_workers=num_workers,
          collate_fn = collate_fn
      )
       return data_loader

In [5]:
path = Path("Data_images")

In [6]:
#6 minutes
'''
df_train = pd.DataFrame(columns=['ClassId', 'Path'])

train_path = path / 'Train'
for folder in train_path.glob("*"):
    #print(folder)
    class_id = int(str(folder)[len(str(train_path)) + 1:])
    for im in folder.glob("*"):
        #print(p)
        df_train = pd.concat([df_train, pd.DataFrame({'ClassId': [class_id], 'Path': [str(im)[len(str(path)) + 1:]]})], ignore_index=True)
        #df_train.loc[df_train.shape[0]] = [class_id, str(im)[len(str(path)) + 1:]]

df_train.to_csv("Data_images/Train_data.csv")
df_train.head()
'''

'\ndf_train = pd.DataFrame(columns=[\'ClassId\', \'Path\'])\n\ntrain_path = path / \'Train\'\nfor folder in train_path.glob("*"):\n    #print(folder)\n    class_id = int(str(folder)[len(str(train_path)) + 1:])\n    for im in folder.glob("*"):\n        #print(p)\n        df_train = pd.concat([df_train, pd.DataFrame({\'ClassId\': [class_id], \'Path\': [str(im)[len(str(path)) + 1:]]})], ignore_index=True)\n        #df_train.loc[df_train.shape[0]] = [class_id, str(im)[len(str(path)) + 1:]]\n\ndf_train.to_csv("Data_images/Train_data.csv")\ndf_train.head()\n'

In [24]:
'''
df_test_messy = pd.read_csv('Data_Images/Test_data.csv')
df_test = pd.DataFrame(columns=['ClassId', 'Path'])

test_path = path / 'Test'
for pic in test_path.glob("*"):
    name = str(pic)[len(str(test_path)) + 1:]
    row = df_test_messy[df_test_messy['Path'] == f'Test/{name}']
    df_test = pd.concat([df_test, row])

df_test.to_csv("Test_data_cleaned.csv")
df_test.head()
'''

KeyboardInterrupt: 

In [8]:
#print(df_test_messy.shape[0])
#print(df_test.shape[0])

In [30]:
df_train = pd.read_csv("Data_Images/Train_data.csv")
df_test = pd.read_csv("Data_images/Test_data_cleaned.csv")
df_train = df_train[["ClassId", "Path"]]
df_test = df_test[["ClassId", "Path"]]

In [22]:
train_data = TrafficData.create_dataset(df_train, path)
train_data_loader = TrafficData.loader(train_data, 16)

test_data = TrafficData.create_dataset(df_test, path)
test_data_loader = TrafficData.loader(test_data, 16)

In [11]:
images, targets, image_ids = next(iter(test_data_loader))