In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import torch, torch.nn as nn, torch.functional as F, torch.utils.data as data, torchnet as tnt
import torchvision, numpy as np, os
import matplotlib.pyplot as plt, fastai
from fastai.conv_learner import ConvnetBuilder
from fastai.model import resnet34
import nvvl, time
import sys, os
from tqdm import tqdm

In [3]:
#Adding layer.freeze and layer.unfreeze capability to nn.Module layers

def freeze(self):
    for param in self.parameters(): param.requires_grad=False
def unfreeze(self):
    for param in self.parameters(): param.requires_grad=True

nn.Module.freeze=freeze
nn.Module.unfreeze=unfreeze

In [4]:
device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu"); device

device(type='cuda', index=0)

In [5]:
PATH="/home/sufiyan/Common_data/mtp2/dataset/OLD/100_vids/"
PATH_L_Vids=PATH+"LEFT/"
PATH_L=PATH+"LEFT_data/"
PATH_L_L=PATH+"LEFT_labels/"
PATH_R_Vids=PATH+"RIGHT/"
PATH_R=PATH+"RIGHT_data/"
PATH_R_L=PATH+"RIGHT_labels/"

In [6]:
#string_to_int and int_to_string mapping
stoi={"w":0, "a":1, "d":2}; itos={0:"Clear", 1:"Left", 2:"Right"}

In [7]:
leftVids =[PATH_L+fileName for fileName in os.listdir(PATH_L) if os.path.isfile(PATH_L+fileName)]
rightVids=[PATH_R+fileName for fileName in os.listdir(PATH_R) if os.path.isfile(PATH_R+fileName)]

In [8]:
class myVidDataset(torch.utils.data.Dataset):
    def __init__(self, npArray, labelList):
        assert isinstance(npArray, np.ndarray), "npArray in myVidDataset should be a numpy array of Frames"
        assert len(npArray)==len(labelList), f"Length of Labels {len(labelList)} not equal to len \
                                                    of npArray {len(npArray)}"
        self.frames, self.labels = npArray,labelList
        
    def __getitem__(self, idx):
        x=self.frames[idx]
        x=np.squeeze(x)     #extra 1 dimention removed if any
        if len(x.shape)==2 : x=np.dstack((x,x,x)) #id image is 1 channel, make it 3 channel
        x=np.rollaxis(x, 2)   #Make the images channel first
        x=x[:,:224,:224]
        return (x, self.labels[idx])
    
    def __len__(self): return len(self.frames)
    @classmethod
    def fromPath(cls, videoPath, skipStart=100, skipEnd=100):
        """frames to skip at start and end to reduce class imbalance"""
        vid=np.load(videoPath)[skipStart:-skipEnd]
        labels=cls.get_labels(videoPath)[skipStart:-skipEnd]
        return cls(vid, labels)
    
    #returns the list of labels for the passed videoPath
    @staticmethod
    def get_labels(fileName):
        vidName=fileName.split("/")[-1]
        labelName=".".join(vidName.split(".")[:-1]+["npy"])
        if fileName.split("/")[-2]=="RIGHT_data":
            labels=list(np.load(PATH_R_L+labelName))
        elif fileName.split("/")[-2]=="LEFT_data":
            labels=list(np.load(PATH_L_L+labelName))
        else: raise ValueError(f"Passed videoFile {fileName} has non recognizable parent folder")
        return [stoi[label] for label in labels]
    
class dataLoaderGetter(object):
    def __init__(self, startIndex=0, endIndex=55):
        """get one concat dataset of left and right video each
        Returns a new dataloader of 2 new video each from left and right each time"""
        self.len=endIndex-startIndex
        self.startIndex,self.endIndex = startIndex,endIndex
        
    def __iter__(self):
        for i, (l,r) in enumerate(tqdm([item for item in zip(leftVids,rightVids)], total=self.len)):
            if self.startIndex<=i and i<self.endIndex:
                set_L,set_R=myVidDataset.fromPath(l), myVidDataset.fromPath(r)
                dataset=torch.utils.data.ConcatDataset([set_L,set_R]) 
                yield torch.utils.data.DataLoader(dataset=dataset, batch_size=200, 
                                                  shuffle=True, pin_memory=False, num_workers=4, drop_last=True)
    def __len__(self):
        return self.len

In [9]:
#loads resnet34 pretrained on ImageNet using fastai library, Addaptive Pool 
#Followed by Fully connected layers of 500 and c
model=ConvnetBuilder(resnet34, c=3, is_multi=True, is_reg=False, pretrained=True).model
loss=nn.CrossEntropyLoss
optim=torch.optim.Adam

In [10]:
#freezing the pretrained layers
for i, layer in enumerate(model.children()):
    if i<=11: layer.freeze()
    else: layer.unfreeze()

In [11]:
for layer in model.children():
    for param in layer.parameters():
        if param.requires_grad: print(layer); break

Linear(in_features=1024, out_features=512, bias=True)
BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
Linear(in_features=512, out_features=3, bias=True)


In [12]:
class modelLearner(nn.Module):
    """modelLearner class takes model, loss function and learning rate.
    Given each sample (x,y), it trains on it
    call epochEnded at the end of each epoch,
            passing parentLearnerClassObject that has trainLoader as its attribute

"""
    def __init__(self,model, loss_fn, lr, optim, modelName, Train=True, is_multi=True, classes=3, *args, **kwargs):
        super().__init__()
        self.loss=loss_fn().to(device)
        self.lr=lr
        self.model=model.to(device)
        self.optim=optim(self.model.parameters(), self.lr)
        self.modelName=modelName
        self.args,self.kwargs=args,kwargs
        self.train_epoch_loss=0     #Add loss here for each batch and reset at end of epoch
        self.test_epoch_loss=0      #same as above for test
        self.num_samples_seen=0
        self.Train=Train            #Training Mode Flag
        self.train_loss_list=[]     #to be updated at the end of  each epoch
        self.test_loss_list=[]
        self.is_multi=is_multi
        if is_multi: self.confusion_matrix=tnt.meter.ConfusionMeter(classes)
        if isinstance(self.loss, nn.MSELoss): self.loss_name="MSE "
        else: self.loss_name="CE "
        self.to(device)
        
        
    def forward(self, x, y):
        y_pred = self.model(x)
        if self.Train==True:
            if isinstance(self.loss, nn.CrossEntropyLoss): #Handeling specific requirements of CE Loss
                y=y.view(self.parentLearner.trainLoader.batch_size)
                y=y.long()
            loss = self.loss(y_pred, y)
            self.num_samples_seen= self.num_samples_seen + x.shape[0]
            self.train_epoch_loss += loss.item()
            self.optim.zero_grad()
            loss.backward()
            self.optim.step()
        else: #Test Loop
            if isinstance(self.loss, nn.CrossEntropyLoss): #Handeling specific requirements of CE Loss
                y=y.view(self.parentLearner.validLoader.batch_size)
                y=y.long()
            loss = self.loss(y_pred, y)
            self.confusion_matrix.add(y_pred.data, y.data)
            self.test_epoch_loss+= loss.item()


    def setTest(self):   self.Train=False
    def setTrain(self):  self.Train=True
    def save(self): self.model.save_state_dict(f"saved_models/{self.modelName}_lr{self.lr}/\
    loss_{self.loss_name}_epoch_{len(self.train_loss_list)}.pt")
    
        #setParent will give the modelLearner access the higherlevel class attribures like trainLoader's length
    #and batch size, currentEpoch, etc
    def setParent(self, parentLearner): self.parentLearner=parentLearner
    def trainEpochEnded(self): 
        try:    self.train_loss_list.append(self.train_epoch_loss/
                                            (len(self.parentLearner.trainLoader)*self.parentLearner.num_trainLoader+1))
        except: self.train_loss_list.append(self.train_epoch_loss/len(self.parentLearner.trainLoader))

        self.train_epoch_loss=0  #reset total_average_loss at the end of each epoch
        try: 
            epochs=self.parentLearner.epochsDone
            printEvery=self.parentLearner.printEvery
            if epochs%printEvery==0:    
                print(f"lr: {self.lr}      trainLoss: {self.train_loss_list[-1]}")
        except: print(f"lr: {self.lr}      trainLoss: {self.train_loss_list[-1]}")
    def testEpochEnded(self):
        try:    self.test_loss_list.append(self.test_epoch_loss/
                                           (len(self.parentLearner.validLoader)*self.parentLearner.num_validLoader+1))
        except: self.test_loss_list.append(self.test_epoch_loss/len(self.parentLearner.validLoader))
        self.test_epoch_loss=0
        try: 
            epochs=self.parentLearner.epochsDone
            printEvery=self.parentLearner.printEvery
            if epochs%printEvery==0:
                print(f"lr: {self.lr}      {self.loss_name}testLoss: {self.test_loss_list[-1]}")
        except: print(f"testLoss: {self.loss_name}{self.test_loss_list[-1]}")

In [13]:
class ParallelLearner(nn.Module):
    """ParallelLearner takes list of modelLearners to be trained parallel on the same data samples
    from the passed trainLoader object. epochs are the number of epochs to be trained for.
    """
    def __init__(self, listOfLearners, epochs, trainLoaderGetter=None, trainLoader=None, printEvery=10, validLoader=None, validLoaderGetter=None, *args, **kwargs):
        super().__init__()
        self.learners=listOfLearners
        self.trainLoader=trainLoader
        self.trainLoaderGetter=trainLoaderGetter
        self.epochs=epochs
        self.args,self.kwargs = args,kwargs
        self.validLoader=validLoader           #trainLoader for test set
        self.validLoaderGetter=validLoaderGetter
        self.epochsDone=0  #epoch counter
        self.printEvery=printEvery #print every n epochs
        try: [learner.setParent(self) for learner in self.learners] #set self as parent of all modelLearners
        except: print("Couldn't set ParallelLearner as parent of modelLearners!!!")
    
    
    def train(self):
        startTime=time.time()
        for t in range(self.epochs):
            [learner.setTrain() for learner in self.learners] #set all modelLearners to Train Mode
            for self.num_trainLoader, self.trainLoader in enumerate(self.trainLoaderGetter()):
                for idx, (x,y) in enumerate(self.trainLoader):
#                     x = x.view(self.trainLoader.batch_size,28*28).to(device)
#                     y = y.view(self.trainLoader.batch_size, 1).float().to(device)
                    x = x.float().to(device)
                    y = y.float().to(device)
                    [learner(x,y) for learner in self.learners]
            self.epochsDone+=1
            if self.epochsDone%self.printEvery==0:
                print()
                print("*"*50)
                print(f"Epoch: {t}   Time Elapsed: {time.time()-startTime}")
            [learner.trainEpochEnded() for learner in self.learners]
            if (not self.validLoaderGetter is None): #This part runs only when validLoaderGetter is provided
                [learner.setTest() for learner in self.learners] #Set all modelLearners to Test Model
                for self.num_validLoader, self.validLoader in enumerate(self.validLoaderGetter()):
                    for idx, (x,y) in enumerate(self.validLoader):
                        x = x.float().to(device)
                        y = y.float().to(device)
                        [learner(x,y) for learner in self.learners]
                [learner.testEpochEnded() for learner in self.learners]
        #Pass self to all learners defined above so they can use self.trainLoader to calculate it's total_loss before resetting epoch_loss
    
    
    def plotLoss(self, title, listOfLabelsForTrain, listOfLabelsForTest=None, xlabel="Epochs", ylabel="Loss", save=False):
        """Parameters:
        listOfLabelsForTrain: Labels for the train epoch loss for each modelLearner
        listOfLabelsForTest : Labels for the test epoch loss for each modelLearner, \
                              to be provided if validLoader was used to calculate loss on validation dataset.
        """
        assert len(listOfLabelsForTrain)==len(self.learners), "Provide Description for all Learners to Plot"
        import matplotlib.pyplot as plt
        import os
        plt.switch_backend('agg')
        x=range(1,self.epochsDone+1)
        for i,learner in enumerate(self.learners):
            plt.plot(x, learner.train_loss_list, label=listOfLabelsForTrain[i])
        if (not (listOfLabelsForTest is None)) and (not (self.validLoader is None)):
            assert len(listOfLabelsForTest)==len(self.learners), \
                        "length of ListOfLabelsForTest is not same as num of learners"
            for i,learner in enumerate(self.learners):
                plt.plot(x, learner.test_loss_list, label=listOfLabelsForTest[i])
        plt.title(title)
        plt.xlabel(xlabel)
        plt.ylabel(ylabel)
        plt.legend()
        os.makedirs("plots", exist_ok=True)
        plt.savefig(os.path.join("plots", title+".png"))

In [21]:
from functools import partial
mainLearner=ParallelLearner([modelLearner(model, lr=0.01,loss_fn=nn.CrossEntropyLoss, optim=optim, modelName="onVivekData")],
                            epochs=20,
                            trainLoaderGetter=dataLoaderGetter,
                            printEvery=10,
                            validLoaderGetter=partial(dataLoaderGetter, startIndex=56, endIndex=63))

In [22]:
mainLearner.train()

62it [00:44,  1.38it/s]                        
62it [00:04, 12.86it/s]              
62it [00:44,  1.38it/s]                        
62it [00:04, 12.78it/s]              
62it [00:44,  1.39it/s]                        
62it [00:04, 12.78it/s]              
62it [00:45,  1.37it/s]                        
62it [00:04, 12.76it/s]              
62it [00:44,  1.39it/s]                        
62it [00:04, 12.81it/s]              
62it [00:45,  1.37it/s]                        
62it [00:04, 12.49it/s]              
62it [00:45,  1.38it/s]                        
62it [00:04, 12.73it/s]              
62it [00:45,  1.38it/s]                        
62it [00:04, 12.93it/s]              
62it [00:45,  1.37it/s]                        
62it [00:04, 12.88it/s]              
62it [00:45,  1.37it/s]                        
  0%|          | 0/7 [00:00<?, ?it/s]


**************************************************
Epoch: 9   Time Elapsed: 494.1451003551483
lr: 0.01      trainLoss: 0.6508806014279707


62it [00:04, 12.70it/s]              
  0%|          | 0/55 [00:00<?, ?it/s]

lr: 0.01      CE testLoss: 0.7518012577837164


62it [00:45,  1.38it/s]                        
62it [00:04, 12.70it/s]              
62it [00:44,  1.38it/s]                        
62it [00:04, 12.59it/s]              
62it [00:44,  1.38it/s]                        
62it [00:04, 12.79it/s]              
62it [00:45,  1.37it/s]                        
62it [00:04, 12.85it/s]              
62it [00:44,  1.38it/s]                        
62it [00:04, 12.86it/s]              
62it [00:45,  1.38it/s]                        
62it [00:04, 12.88it/s]              
62it [00:45,  1.37it/s]                        
62it [00:04, 12.87it/s]              
62it [00:45,  1.37it/s]                        
62it [00:04, 12.83it/s]              
62it [00:45,  1.37it/s]                        
62it [00:04, 12.73it/s]              
62it [00:45,  1.37it/s]                        
  0%|          | 0/7 [00:00<?, ?it/s]


**************************************************
Epoch: 19   Time Elapsed: 993.5646302700043
lr: 0.01      trainLoss: 0.6224833546428505


62it [00:04, 12.81it/s]              

lr: 0.01      CE testLoss: 0.7630406672304327





In [16]:
learner=mainLearner.learners[0]

In [17]:
learner.confusion_matrix.conf

array([[2073,    0,    0],
       [ 169,    0,   13],
       [ 144,    0,    1]], dtype=int32)

In [18]:
mainLearner.plotLoss("myLossFor5Epoch", ["train"], ["valid"])

In [23]:
learner.model

Sequential(
  (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace)
  (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (4): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Conv2d(64, 64, ker

In [None]:
testGetter = dataLoaderGetter(56,57)
dataLoader=next(iter(testGetter))
print(dataLoader)

In [None]:
xy=next(iter(dataLoader))

In [None]:
x,y=xy
x = x.float().to(device)
y = y.float().to(device)

In [None]:
y_pred=learner.model(x)

In [None]:
y_pred