In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
import math
print(torch.__version__)
plt.ion()   # interactive mode

1.4.0


In [2]:
# Reference:  https://medium.com/@vivekvscool/image-classification-cnn-with-pytorch-5b2cb9ef9476 
# Image classification — CNN with PyTorch
#Transformation for image
transform_ori = transforms.Compose([transforms.RandomHorizontalFlip(),    #flipping the image horizontally
                                    transforms.RandomResizedCrop(64),
                                    transforms.ToTensor(), 
                                    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])  #normalize the image
                                    # transforms.RandomResizedCrop(64),   #create 64x64 image
                                    # transforms.ToTensor(),                 #convert the image to a Tensor
#Load our dataset
train_dataset = datasets.ImageFolder(root = './alldata/',
                                     transform = transform_ori)
# test_dataset = datasets.ImageFolder(root = './test/',
#                                     transform = transform_ori)


#Make the dataset iterable
batch_size = 100
train_load = torch.utils.data.DataLoader(dataset = train_dataset, 
                                         batch_size = batch_size,
                                         shuffle = True)      #Shuffle to create a mixed batches of 100 of cat & dog images


In [3]:
print(len(train_dataset[0]))
print(len(train_dataset[0][0]))

2
3


In [4]:
print ('There are {} images in the training set'.format (len(train_dataset)))

There are 9470 images in the training set


In [5]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN,self).__init__()
        # sub sampling - extract features
        self.cnn1 = nn.Conv2d(in_channels=3, out_channels=8, kernel_size=3,stride=1, padding=1)
        self.batchnorm1 = nn.BatchNorm2d(8)        #Batch normalization
        self.relu = nn.ReLU()                 #RELU Activation
        self.maxpool1 = nn.MaxPool2d(kernel_size=2)   #Maxpooling reduces the size by kernel size. 64/2 = 32
        
        self.cnn2 = nn.Conv2d(in_channels=8, out_channels=32, kernel_size=5, stride=1, padding=2)
        self.batchnorm2 = nn.BatchNorm2d(32)
        self.maxpool2 = nn.MaxPool2d(kernel_size=2)    #Size now is 32/2 = 16
        
        self.cnn3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, stride=1, padding=2)
        self.batchnorm3 = nn.BatchNorm2d(64)
        self.maxpool3 = nn.MaxPool2d(kernel_size=2)    #Size now is 16/2 = 8
        
        # fully connected - classification
        #Flatten the feature maps. You have 32 feature mapsfrom cnn2. Each of the feature is of size 16x16 --> 32*16*16 = 8192
        self.fc1 = nn.Linear(in_features=4096, out_features=4000)   #Flattened image is fed into linear NN and reduced to half size
        self.droput = nn.Dropout(p=0.5)                    #Dropout used to reduce overfitting
        self.fc2 = nn.Linear(in_features=4000, out_features=2000)
        self.droput = nn.Dropout(p=0.5)
        self.fc3 = nn.Linear(in_features=2000, out_features=500)
        self.droput = nn.Dropout(p=0.5)
        self.fc4 = nn.Linear(in_features=500, out_features=50)
        self.droput = nn.Dropout(p=0.5)
        self.fc5 = nn.Linear(in_features=50, out_features=2)   
        self.softmax = nn.Softmax(dim = 1)
        
    def forward(self,x):
        out = self.cnn1(x)
        out = self.batchnorm1(out)
        out = self.relu(out)
        out = self.maxpool1(out)
        out = self.cnn2(out)
        out = self.batchnorm2(out)
        out = self.relu(out)
        out = self.maxpool2(out)
        out = self.cnn3(out)
        out = self.batchnorm3(out)
        out = self.relu(out)
        out = self.maxpool3(out)
        #Flattening is done here with .view() -> (batch_size, 32*16*16) = (100, 8192)
        out = out.view(-1,4096)   #-1 will automatically update the batchsize as 100; 8192 flattens 32,16,16
        #Then we forward through our fully connected layer 
        out = self.fc1(out)
        out = self.relu(out)
#         out = self.droput(out)
        out = self.fc2(out)
        out = self.relu(out)
#         out = self.droput(out)
        out = self.fc3(out)
        out = self.relu(out)
#         out = self.droput(out)
        out = self.fc4(out)
        out = self.relu(out)
#         out = self.droput(out)
        out = self.fc5(out)
        out = self.softmax(out)
        return out

In [6]:
model = CNN()
CUDA = torch.cuda.is_available()
if CUDA:
    model = model.cuda()    
loss_fn = nn.CrossEntropyLoss()        
optimizer = torch.optim.Adam(model.parameters(), lr = 0.00006) # tune!

In [7]:
import time
from torch.autograd import Variable
num_epochs = 40

#Define the lists to store the results of loss and accuracy
train_loss = []
test_loss = []
train_accuracy = []
test_accuracy = []

#Training
for epoch in range(num_epochs): 
    #Reset these below variables to 0 at the begining of every epoch
    start = time.time()
    correct = 0
    iterations = 0
    iter_loss = 0.0
    # change learning rate 
    model.train()                   # Put the network into training mode
    
    for i, (inputs, labels) in enumerate(train_load):
#         print ('train ' + str(iterations) )
#         print ('inputs' + str(inputs))
#         print ('labels' + str(labels))
        # Convert torch tensor to Variable
        inputs = Variable(inputs)
        labels = Variable(labels)

#         # If we have GPU, shift the data to GPU
#         CUDA = torch.cuda.is_available()
#         if CUDA:
#             inputs = inputs.cuda()
#             labels = labels.cuda()
        
        optimizer.zero_grad()            # Clear off the gradient in (w = w - gradient)
        outputs = model(inputs)         
        loss = loss_fn(outputs, labels)  
        iter_loss += loss.data.item()      # Accumulate the loss, loss.data[0]
        loss.backward()                 # Backpropagation 
        optimizer.step()                # Update the weights
        
        # Record the correct predictions for training data 
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum()
        iterations += 1
#     print ('train loss is '+ str(iter_loss))
    # Record the training loss
    train_loss.append(iter_loss/iterations)
    # Record the training accuracy
    train_accuracy.append((100 * correct / len(train_dataset)))
    
    print ('Finish training.')
    stop = time.time()
    
    print ('Epoch {}/{}, Training Loss: {:.3f}, Training Accuracy: {:.3f}, Time: {}s'
           .format(epoch+1, num_epochs, train_loss[-1], train_accuracy[-1], stop-start))

Finish training.
Epoch 1/40, Training Loss: 0.559, Training Accuracy: 73.000, Time: 239.70128273963928s
Finish training.
Epoch 2/40, Training Loss: 0.464, Training Accuracy: 84.000, Time: 238.4029688835144s
Finish training.
Epoch 3/40, Training Loss: 0.415, Training Accuracy: 89.000, Time: 248.81012892723083s
Finish training.
Epoch 4/40, Training Loss: 0.401, Training Accuracy: 90.000, Time: 261.5527422428131s
Finish training.
Epoch 5/40, Training Loss: 0.394, Training Accuracy: 91.000, Time: 253.10553526878357s
Finish training.
Epoch 6/40, Training Loss: 0.396, Training Accuracy: 91.000, Time: 252.01732087135315s
Finish training.
Epoch 7/40, Training Loss: 0.380, Training Accuracy: 92.000, Time: 258.9493169784546s
Finish training.
Epoch 8/40, Training Loss: 0.389, Training Accuracy: 92.000, Time: 269.2552580833435s
Finish training.
Epoch 9/40, Training Loss: 0.382, Training Accuracy: 92.000, Time: 259.0132648944855s
Finish training.
Epoch 10/40, Training Loss: 0.379, Training Accuracy

In [8]:
modelPATH = './NoTestAllTrain_cnn_model_96.pt'
torch.save(model, modelPATH)

  "type " + obj.__name__ + ". It won't be checked "


In [12]:
import cv2
from scipy import stats
from PIL import Image

In [14]:
cap = cv2.VideoCapture('../../Downloads/fila1outpy.avi')
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
out = cv2.VideoWriter('fila1write_model2.avi',cv2.VideoWriter_fourcc('M','J','P','G'), 10, (frame_width,frame_height))
font = cv2.FONT_HERSHEY_SIMPLEX 
minidict = {0:'NOTseeP',1:'seeP'}
i = 0
ans_list = []
result = 0
frame_interval = 10
while True:
# while(cap.isOpened()):
    ret, frame = cap.read()
    if ret == True: 
        left_right_image = np.split(frame, 2, axis=1)
        left_img = left_right_image[0]
        right_img = left_right_image[1]
        left_img_pil = Image.fromarray(left_img)
        right_img_pil = Image.fromarray(right_img)
    #     print (left_img_pil)
    #     print (type(left_img_pil))
#         cv2.imshow('frame',frame)
        left_img_pil2tensor = transform_ori(left_img_pil)
        right_img_pil2tensor = transform_ori(right_img_pil)
        left_img_4d = left_img_pil2tensor.unsqueeze(0)
        right_img_4d = right_img_pil2tensor.unsqueeze(0)
        left_inputs = Variable(left_img_4d)
        right_inputs = Variable(right_img_4d)
        left_outputs = model(left_inputs) 
        right_outputs = model(right_inputs)  
        _, left_predicted = torch.max(left_outputs, 1)
        _, right_predicted = torch.max(right_outputs, 1)
#         print ([left_predicted,right_predicted])
        left_ans = left_predicted.numpy()[0]
        right_ans = right_predicted.numpy()[0]
        ans_list.append(left_ans)
        ans_list.append(right_ans)
#         cv2.putText(frame,str(left_ans), (50, 50), font, 1, (0, 255, 255), 2, cv2.LINE_4) # left 
#         cv2.putText(frame,str(right_ans), (1500, 50), font, 1, (0, 255, 255), 2, cv2.LINE_4) # right

        if i % frame_interval == frame_interval-1 and i >= frame_interval-1:
            result = stats.mode(ans_list)[0][0]
#             print (str(result))
            cv2.putText(frame,minidict[result], (50, 50), font, 1, (0, 255, 255), 2, cv2.LINE_4)
            ans_list = []
        elif i < frame_interval-1:
            if left_ans == right_ans:
                cv2.putText(frame,minidict[left_ans], (50, 50), font, 1, (0, 255, 255), 2, cv2.LINE_4)
            else:
                cv2.putText(frame,minidict[0], (50, 50), font, 1, (0, 255, 255), 2, cv2.LINE_4)
        else:
            cv2.putText(frame,minidict[result], (50, 50), font, 1, (0, 255, 255), 2, cv2.LINE_4)
        cv2.putText(frame,str(i), (1230, 50), font, 1, (0, 255, 255), 2, cv2.LINE_4)
        out.write(frame)
        i += 1
    #     break
    # if cv2.waitKey(1) & 0xFF == ord('q'):
    #     break
    else:
        break
cap.release()
out.release()
cv2.destroyAllWindows()