In [1]:
# All required python standard libraries
import os
import time

In [2]:
# All torch related imports 
import torch
from torch.utils.data import DataLoader, Dataset
from torchvision.transforms import transforms
from torch import nn, optim

In [3]:
# using cv2 to read an image
import cv2

In [4]:
# All sci-kit related imports 
import pandas as pd
import numpy as np

In [5]:
train_directory = os.path.join(os.getcwd(),'guides\\isolated-dataset-csv\\IsolatedTrain.csv')
test_directory = os.path.join(os.getcwd(),'guides\\isolated-dataset-csv\\IsolatedTest.csv')

In [6]:
BATCH_SIZE = 64

In [7]:
train_csv = pd.read_csv(train_directory, usecols=["labels","directory"])
test_csv = pd.read_csv(test_directory, usecols=["labels","directory"])

In [8]:
def csv_preprocessor(base_dir:str, directory:str):
    return os.path.join(base_dir,directory).replace("\\","/")

In [9]:
def return_all_image_list_from_processed_csv(csv_file):
    ### This returns the entire list full of images to be loaded into cpu
    ###
    ###
    ALL_IMAGES = []
    start = time.time()
    for i, items in tq(enumerate(train_csv.iloc[:,1])):
        image = cv2.imread(image_path, cv2.COLOR_BGR2RGB)
        resized = cv2.resize(image,(224,224))
        ALL_IMAGES.append(resized)
    
    print("Tt took us approximately {} seconds".format(time.time()-start))  
    return ALL_IMAGES

In [10]:
x = train_csv["directory"].map(lambda x: csv_preprocessor(base_dir=str(os.getcwd()), directory=str(x)))

In [11]:
image_path = csv_preprocessor( base_dir = os.getcwd() , directory = train_csv.iloc[0,1])

In [12]:
ALL_IMAGES = []

In [13]:
from tqdm.notebook import tqdm as tq

In [14]:
start = time.time()
for i, items in tq(enumerate(train_csv.iloc[:,1])):
    image = cv2.imread(image_path, cv2.COLOR_BGR2RGB)
    resized = cv2.resize(image,(224,224))
    ALL_IMAGES.append(resized)
print("Tt took us approximately {} seconds".format(time.time()-start))    

HBox(children=(HTML(value=''), FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0…


Tt took us approximately 11.201149702072144 seconds


In [15]:
start = time.time()
NP_ALLIMAGES = np.array(ALL_IMAGES)
print("Tt took us approximately {} seconds, with shape {} with totally {} Bytes".format(time.time()-start, NP_ALLIMAGES.shape, NP_ALLIMAGES.nbytes))  

Tt took us approximately 19.438267946243286 seconds, with shape (34439, 224, 224, 3) with totally 5184033792 Bytes


In [19]:
class IsolatedCharacterDataset(Dataset):
    def __init__(self, csv_dir_path,  transforms=None, custom_transform=None ):
        ### complete dataset path
        self.dataset_csv = pd.read_csv(csv_dir_path, usecols=["labels","directory"])  
        self.dataset_csv["directory"] = self.dataset_csv["directory"].map(lambda x: csv_preprocessor(base_dir=str(os.getcwd()), directory=str(x)))
        self.dataset_csv_numpy = self.dataset_csv.to_numpy()
        self.ALL_IMAGES = return_all_image_list_from_processed_csv(csv_file=self.dataset_csv)
        
        ### labels
        self.labels = self.dataset_csv_numpy[:,0]
        ### images directories
        self.image_directories = self.dataset_csv_numpy[:,1]
        ### transformations to apply on images
        self.transforms = transforms
        
    def __getitem__(self, index):
        # convert labels to tensor 
        label = torch.tensor(self.labels[index])
        
        # load single image from list of all preloaded images
        image = self.ALL_IMAGES[index]
        
        if self.transforms:
            ## apply transforms 
            image = self.transforms(image)
            image = image.float()
        label = label.long().subtract(1)
        return image, label 
    
    def __len__(self):
        rows,_ = self.dataset_csv_numpy.shape
        return rows

In [20]:
DATA_NORMALIZER = transforms.Compose([transforms.ToTensor(),transforms.Resize(224),transforms.CenterCrop(224),transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),])

In [21]:
TRAIN_DATASET = IsolatedCharacterDataset(csv_dir_path= train_directory,transforms=DATA_NORMALIZER)
TRAIN_LOADER = DataLoader(dataset=TRAIN_DATASET, batch_size=BATCH_SIZE,num_workers=0,shuffle= True)

HBox(children=(HTML(value=''), FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0…


Tt took us approximately 13.851259469985962 seconds


In [32]:
for x in range(1):
    start = time.time()
    for i,data in enumerate(TRAIN_LOADER):
        load_start = time.time()
        image, label = data
        print("---> batch no  {}, time taken {}".format(i, time.time()-load_start ))
    print("pass {} time taken {}".format(x, -start + time.time()))

---> batch no  0, time taken 0.0
---> batch no  1, time taken 0.0029916763305664062
---> batch no  2, time taken 0.0029916763305664062
---> batch no  3, time taken 0.002991199493408203
---> batch no  4, time taken 0.001989603042602539
---> batch no  5, time taken 0.0029921531677246094
---> batch no  6, time taken 0.002991914749145508
---> batch no  7, time taken 0.001994609832763672
---> batch no  8, time taken 0.002991199493408203
---> batch no  9, time taken 0.002991914749145508
---> batch no  10, time taken 0.0029916763305664062
---> batch no  11, time taken 0.002991914749145508
---> batch no  12, time taken 0.0029916763305664062
---> batch no  13, time taken 0.001994609832763672
---> batch no  14, time taken 0.0019943714141845703
---> batch no  15, time taken 0.002991914749145508
---> batch no  16, time taken 0.002991199493408203
---> batch no  17, time taken 0.002991914749145508
---> batch no  18, time taken 0.002991914749145508
---> batch no  19, time taken 0.002991914749145508
-

---> batch no  159, time taken 0.0029921531677246094
---> batch no  160, time taken 0.003991842269897461
---> batch no  161, time taken 0.001994609832763672
---> batch no  162, time taken 0.0019941329956054688
---> batch no  163, time taken 0.001994609832763672
---> batch no  164, time taken 0.0029916763305664062
---> batch no  165, time taken 0.002991914749145508
---> batch no  166, time taken 0.0029916763305664062
---> batch no  167, time taken 0.001994609832763672
---> batch no  168, time taken 0.0019948482513427734
---> batch no  169, time taken 0.0029914379119873047
---> batch no  170, time taken 0.0019943714141845703
---> batch no  171, time taken 0.0019943714141845703
---> batch no  172, time taken 0.002991199493408203
---> batch no  173, time taken 0.001994609832763672
---> batch no  174, time taken 0.0029916763305664062
---> batch no  175, time taken 0.0029921531677246094
---> batch no  176, time taken 0.0029916763305664062
---> batch no  177, time taken 0.0019943714141845703


---> batch no  315, time taken 0.0019941329956054688
---> batch no  316, time taken 0.002991199493408203
---> batch no  317, time taken 0.002991199493408203
---> batch no  318, time taken 0.0019941329956054688
---> batch no  319, time taken 0.0019941329956054688
---> batch no  320, time taken 0.0029916763305664062
---> batch no  321, time taken 0.002991199493408203
---> batch no  322, time taken 0.0029916763305664062
---> batch no  323, time taken 0.002991914749145508
---> batch no  324, time taken 0.001994609832763672
---> batch no  325, time taken 0.002991914749145508
---> batch no  326, time taken 0.002991199493408203
---> batch no  327, time taken 0.001994609832763672
---> batch no  328, time taken 0.0029921531677246094
---> batch no  329, time taken 0.0019948482513427734
---> batch no  330, time taken 0.0029914379119873047
---> batch no  331, time taken 0.0029916763305664062
---> batch no  332, time taken 0.0029914379119873047
---> batch no  333, time taken 0.002991199493408203
--

---> batch no  471, time taken 0.0029916763305664062
---> batch no  472, time taken 0.002991199493408203
---> batch no  473, time taken 0.001994609832763672
---> batch no  474, time taken 0.001994609832763672
---> batch no  475, time taken 0.002991914749145508
---> batch no  476, time taken 0.0029916763305664062
---> batch no  477, time taken 0.0029916763305664062
---> batch no  478, time taken 0.0019948482513427734
---> batch no  479, time taken 0.0029914379119873047
---> batch no  480, time taken 0.002991914749145508
---> batch no  481, time taken 0.0019948482513427734
---> batch no  482, time taken 0.002991914749145508
---> batch no  483, time taken 0.0029914379119873047
---> batch no  484, time taken 0.0029914379119873047
---> batch no  485, time taken 0.002992391586303711
---> batch no  486, time taken 0.0029914379119873047
---> batch no  487, time taken 0.001994609832763672
---> batch no  488, time taken 0.0019941329956054688
---> batch no  489, time taken 0.001995086669921875
--

In [27]:
for x in range(1):
    start = time.time()
    for i,data in enumerate(TRAIN_LOADER):
        load_start = time.time()
        image, label = data
        print("---> batch no  {}, time taken {}".format(i, time.time()-load_start ))
    print("pass {} time taken {}".format(x, -start + time.time()))

---> batch no  0, time taken 0.0
---> batch no  1, time taken 0.0029914379119873047
---> batch no  2, time taken 0.003990650177001953
---> batch no  3, time taken 0.0029909610748291016
---> batch no  4, time taken 0.00299072265625
---> batch no  5, time taken 0.0029914379119873047
---> batch no  6, time taken 0.002978086471557617
---> batch no  7, time taken 0.0029914379119873047
---> batch no  8, time taken 0.0029904842376708984
---> batch no  9, time taken 0.002991199493408203
---> batch no  10, time taken 0.0029914379119873047
---> batch no  11, time taken 0.002991914749145508
---> batch no  12, time taken 0.002992391586303711
---> batch no  13, time taken 0.0019941329956054688
---> batch no  14, time taken 0.0029909610748291016
---> batch no  15, time taken 0.002991914749145508
---> batch no  16, time taken 0.0029916763305664062
---> batch no  17, time taken 0.0019943714141845703
---> batch no  18, time taken 0.003988742828369141
---> batch no  19, time taken 0.0029909610748291016


---> batch no  163, time taken 0.002991914749145508
---> batch no  164, time taken 0.0029916763305664062
---> batch no  165, time taken 0.002990245819091797
---> batch no  166, time taken 0.0019931793212890625
---> batch no  167, time taken 0.0029900074005126953
---> batch no  168, time taken 0.002992868423461914
---> batch no  169, time taken 0.0035657882690429688
---> batch no  170, time taken 0.0029916763305664062
---> batch no  171, time taken 0.002991914749145508
---> batch no  172, time taken 0.002991914749145508
---> batch no  173, time taken 0.00299072265625
---> batch no  174, time taken 0.002991914749145508
---> batch no  175, time taken 0.002991914749145508
---> batch no  176, time taken 0.002991199493408203
---> batch no  177, time taken 0.002992391586303711
---> batch no  178, time taken 0.0029916763305664062
---> batch no  179, time taken 0.00299072265625
---> batch no  180, time taken 0.0029935836791992188
---> batch no  181, time taken 0.0029931068420410156
---> batch n

---> batch no  322, time taken 0.003988742828369141
---> batch no  323, time taken 0.002991914749145508
---> batch no  324, time taken 0.0019948482513427734
---> batch no  325, time taken 0.002991914749145508
---> batch no  326, time taken 0.00299072265625
---> batch no  327, time taken 0.002991914749145508
---> batch no  328, time taken 0.0029914379119873047
---> batch no  329, time taken 0.0029931068420410156
---> batch no  330, time taken 0.0029916763305664062
---> batch no  331, time taken 0.0029921531677246094
---> batch no  332, time taken 0.0019941329956054688
---> batch no  333, time taken 0.003989458084106445
---> batch no  334, time taken 0.0029935836791992188
---> batch no  335, time taken 0.002991914749145508
---> batch no  336, time taken 0.002991914749145508
---> batch no  337, time taken 0.0029914379119873047
---> batch no  338, time taken 0.0019941329956054688
---> batch no  339, time taken 0.001993894577026367
---> batch no  340, time taken 0.002991199493408203
---> ba

---> batch no  480, time taken 0.0029916763305664062
---> batch no  481, time taken 0.002991914749145508
---> batch no  482, time taken 0.002991914749145508
---> batch no  483, time taken 0.0019941329956054688
---> batch no  484, time taken 0.0039899349212646484
---> batch no  485, time taken 0.0029916763305664062
---> batch no  486, time taken 0.0019941329956054688
---> batch no  487, time taken 0.003988981246948242
---> batch no  488, time taken 0.002991914749145508
---> batch no  489, time taken 0.002991199493408203
---> batch no  490, time taken 0.002991914749145508
---> batch no  491, time taken 0.0029909610748291016
---> batch no  492, time taken 0.002991914749145508
---> batch no  493, time taken 0.0019941329956054688
---> batch no  494, time taken 0.0019941329956054688
---> batch no  495, time taken 0.0019943714141845703
---> batch no  496, time taken 0.0019948482513427734
---> batch no  497, time taken 0.002992391586303711
---> batch no  498, time taken 0.0029921531677246094
-

In [28]:
DATA_NORMALIZER_2 = transforms.Compose([transforms.ToTensor(),transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),])

In [None]:
TRAIN_DATASET_2 = IsolatedCharacterDataset(csv_dir_path= train_directory,transforms=DATA_NORMALIZER_2)
TRAIN_LOADER_2 = DataLoader(dataset=TRAIN_DATASET, batch_size=BATCH_SIZE,num_workers=0,shuffle= True)

HBox(children=(HTML(value=''), FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0…

In [19]:
for x in range(1):
    start = time.time()
    for i,data in enumerate(TRAIN_LOADER_2):
        load_start = time.time()
        image, label = data
        print("---> batch no  {}, time taken {}".format(i, time.time()-load_start ))
    print("pass {} time taken {}".format(x, -start + time.time()))

---> batch no  0, time taken 0.0
---> batch no  1, time taken 0.0029916763305664062
---> batch no  2, time taken 0.001994609832763672
---> batch no  3, time taken 0.0019941329956054688
---> batch no  4, time taken 0.0029914379119873047
---> batch no  5, time taken 0.0019943714141845703
---> batch no  6, time taken 0.001994609832763672
---> batch no  7, time taken 0.0029909610748291016
---> batch no  8, time taken 0.002991914749145508
---> batch no  9, time taken 0.002991914749145508
---> batch no  10, time taken 0.0029914379119873047
---> batch no  11, time taken 0.0029916763305664062
---> batch no  12, time taken 0.002991199493408203
---> batch no  13, time taken 0.0029914379119873047
---> batch no  14, time taken 0.002991914749145508
---> batch no  15, time taken 0.0019941329956054688
---> batch no  16, time taken 0.0029926300048828125
---> batch no  17, time taken 0.0029914379119873047
---> batch no  18, time taken 0.002991914749145508
---> batch no  19, time taken 0.002991437911987

---> batch no  159, time taken 0.001994609832763672
---> batch no  160, time taken 0.0029916763305664062
---> batch no  161, time taken 0.0019941329956054688
---> batch no  162, time taken 0.002991914749145508
---> batch no  163, time taken 0.002991914749145508
---> batch no  164, time taken 0.001993894577026367
---> batch no  165, time taken 0.0019943714141845703
---> batch no  166, time taken 0.0029921531677246094
---> batch no  167, time taken 0.002991914749145508
---> batch no  168, time taken 0.0019941329956054688
---> batch no  169, time taken 0.002991199493408203
---> batch no  170, time taken 0.002991914749145508
---> batch no  171, time taken 0.002991199493408203
---> batch no  172, time taken 0.0019948482513427734
---> batch no  173, time taken 0.0029916763305664062
---> batch no  174, time taken 0.002991914749145508
---> batch no  175, time taken 0.0019943714141845703
---> batch no  176, time taken 0.0029921531677246094
---> batch no  177, time taken 0.0019941329956054688
--

---> batch no  315, time taken 0.002991199493408203
---> batch no  316, time taken 0.002991199493408203
---> batch no  317, time taken 0.0029921531677246094
---> batch no  318, time taken 0.0019943714141845703
---> batch no  319, time taken 0.0029921531677246094
---> batch no  320, time taken 0.0039899349212646484
---> batch no  321, time taken 0.002991199493408203
---> batch no  322, time taken 0.0019941329956054688
---> batch no  323, time taken 0.002991914749145508
---> batch no  324, time taken 0.0029914379119873047
---> batch no  325, time taken 0.0029921531677246094
---> batch no  326, time taken 0.002991914749145508
---> batch no  327, time taken 0.0029921531677246094
---> batch no  328, time taken 0.0029900074005126953
---> batch no  329, time taken 0.0029916763305664062
---> batch no  330, time taken 0.0019941329956054688
---> batch no  331, time taken 0.0029914379119873047
---> batch no  332, time taken 0.001994609832763672
---> batch no  333, time taken 0.002991199493408203


---> batch no  471, time taken 0.0019943714141845703
---> batch no  472, time taken 0.001994609832763672
---> batch no  473, time taken 0.002991914749145508
---> batch no  474, time taken 0.0019941329956054688
---> batch no  475, time taken 0.0019943714141845703
---> batch no  476, time taken 0.0029914379119873047
---> batch no  477, time taken 0.0019941329956054688
---> batch no  478, time taken 0.0029916763305664062
---> batch no  479, time taken 0.0019943714141845703
---> batch no  480, time taken 0.002992391586303711
---> batch no  481, time taken 0.002991199493408203
---> batch no  482, time taken 0.0019941329956054688
---> batch no  483, time taken 0.0029909610748291016
---> batch no  484, time taken 0.002992391586303711
---> batch no  485, time taken 0.00299072265625
---> batch no  486, time taken 0.0019893646240234375
---> batch no  487, time taken 0.002991914749145508
---> batch no  488, time taken 0.002991914749145508
---> batch no  489, time taken 0.002989053726196289
---> b