In [1]:
import os
import pickle
import torch
import torchvision
import numpy as np
import pandas as pd
import torch.nn as nn
from PIL import Image
from tqdm import tqdm
import torch.nn as nn
import torch.utils.data as data1
from torch.utils import data
import torch.nn.functional as F
import torchvision.models as models
import matplotlib.pyplot as plt
from torch.autograd import Variable
from sklearn.metrics import accuracy_score
import torchvision.transforms as transforms
from sklearn.preprocessing import OneHotEncoder, LabelEncoder

import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorboardX import SummaryWriter

In [2]:
from torch.utils import data
class Dataload_3D_CNN(data.Dataset):
    "Characterizes a dataset for PyTorch"
    def __init__(self, data_path, transform=None):
        "Initialization"
        self.transform = transform
        #self.frames = frames
        self.folders = data_path

    def __len__(self):
        "Denotes the total number of samples"
        return len(os.listdir(self.folders))

    def read_images(self, data_path, use_transform):
        X = []
        for i in os.listdir(data_path):
            #print("file name is ",i)
            image = Image.open(os.path.join(data_path,i))
            
            #print(image.shape)
            if use_transform is not None:
                image = use_transform(image)
                #print(image.size)
            image = torch.from_numpy(np.asarray(image))
            X.append(image)
        #print(X)
        #X = np.array(X)
        X = torch.stack(X, dim=0)

        return X

    def __getitem__(self, index):
        "Generates one sample of data"
        # Select sample
        #print("index passed is ",index)
        #print(self.folders)
        data_path = os.path.join(self.folders,os.listdir(self.folders)[index])
        #data_path = self.folders+ str(index)
        #print("Data path is ",data_path)
        
        # Load data
        X = self.read_images(data_path, self.transform)                     # (input) spatial images
        
        y = 1
        if 'orig' in data_path:
            y = 0
        # print(X.shape)
        return X, torch.from_numpy(np.array(y)).type(torch.LongTensor)

In [3]:
TRANSFORM_IMG = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(256),
    #transforms.ToTensor()
    #transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         #std=[0.229, 0.224, 0.225] )
    ])

In [4]:
train_path = '/home/chinmay/datatset/train/'
train_data = Dataload_3D_CNN(train_path, transform=TRANSFORM_IMG)
# for step, (x, y) in enumerate(data):
#     print(x.shape)
val_path = '/home/chinmay/datatset/val/'
val_data = Dataload_3D_CNN(val_path, transform=TRANSFORM_IMG)

In [5]:
epochs = 40
batch_size = 8
learning_rate = 1e-4
log_interval = 10
img_x, img_y = 96,96#128,128#256, 256  # resize video 2d frame size

In [6]:
# Detect devices
use_cuda = torch.cuda.is_available()                   # check if GPU exists
device = torch.device("cuda" if use_cuda else "cpu")   # use CPU or GPU
print("Is use_cuda", use_cuda)
# Now load the dataset
params = {'batch_size': batch_size, 'shuffle': True, 'num_workers': 4, 'pin_memory': True} if use_cuda else {}
# Load the dataset

train_loader = data1.DataLoader(train_data, **params)
valid_loader = data1.DataLoader(val_data, **params)

Is use_cuda True


In [7]:
def find_median(numpy_array = []): #This is expected to take an array of array. So,
    #print("Input array is {}".format(numpy_array))
    output = []
    confidence_scores = []
    for array in numpy_array:
        counts = np.bincount(array)
        output.append(np.argmax(counts))
        # Let us compute the confidence of the scores
        # since frames are independent, our confidence is purely based on the number
        # of frames our model thinks is belonging to a specific category
        # the confidence of individual frame prediction is not taken into consideration
        # and this portion is debatable....
        frame_set_pred = np.sort(counts)[-1]
        confidence = frame_set_pred/sum(counts)
        confidence_scores.append(confidence)
    return torch.from_numpy(np.asarray(output)).type(torch.LongTensor), torch.from_numpy(np.asarray(confidence_scores)).type(torch.FloatTensor)

In [8]:
# Model paths
writer_train = SummaryWriter('/home/chinmay/training-results/conv3D_refined_f2f/train')
writer_test = SummaryWriter('/home/chinmay/training-results/conv3D_refined_f2f/test')
save_model_path = "/home/chinmay/model_weights/conv3D_f2f/"


loss_fn = nn.CrossEntropyLoss()

# 3D CNN parameters
fc_hidden1, fc_hidden2 = 256, 256
dropout = 0.0        # dropout probability


# Select which frame to begin & end in videos
begin_frame, end_frame, skip_frame = 1, 10, 1

def train(log_interval, model, device, train_loader, optimizer, epoch):
    # set model as training mode
    model.train()

    losses = []
    scores = []
    N_count = 0   # counting total trained sample in one epoch
    for batch_idx, (X, y) in enumerate(train_loader):
        # distribute data to device
        #X, y = X.to(device), y.to(device)
        X, y = X.cuda(), y.cuda()
        #print("The label is ",y)
        N_count += X.size(0)
        #print("The size is ",X.size())
        optimizer.zero_grad()
        output = model(X)  # output size = (batch, number of classes)
        #y, _ = find_median(y) #This is necessary as now only single label output for entire frame
        y = y.to(device)
        #print(y)
        #print(y.shape)
        loss = F.cross_entropy(output, y)
        losses.append(loss.item())

        # to compute accuracy
        y_pred = torch.max(output, 1)[1]  # y_pred != output
        step_score = accuracy_score(y.cpu().data.squeeze().numpy(), y_pred.cpu().data.squeeze().numpy())
        scores.append(step_score)         # computed on CPU

        loss.backward()
        optimizer.step()
        
                 
        if (batch_idx + 1) % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}, Accu: {:.2f}%'.format(
                epoch + 1, N_count, len(train_loader.dataset), 100. * (batch_idx + 1) / len(train_loader), loss.item(), 100 * step_score))
         
    return np.mean(losses), np.mean(scores)

In [9]:
def validation(model, device, optimizer, test_loader):
    # set model as testing mode
    model.eval()

    all_y = []
    all_y_pred = []
    test_loss = []
    with torch.no_grad():
        for X, y in test_loader:
            # distribute data to device
            #X, y = X.to(device), y.to(device)
            X, y = X.cuda(), y.cuda()
            output = model(X)

            #y, _ = find_median(y) #This is necessary as now only single label output for entire frame
            y = y.to(device)
            loss = F.cross_entropy(output, y)
            test_loss.append(loss.item())                 # sum up batch loss
            y_pred = output.max(1, keepdim=True)[1]  # (y_pred != output) get the index of the max log-probability
            
            # collect all y and y_pred in all batches
            all_y.extend(y)
            all_y_pred.extend(y_pred)

    test_loss = np.mean(test_loss)

    # to compute accuracy
#     all_y = torch.stack(all_y, dim=0)
#     all_y_pred = torch.stack(all_y_pred, dim=0)
    all_y = torch.stack(all_y, dim=0)
    all_y_pred = torch.stack(all_y_pred, dim=0)
    test_score = accuracy_score(all_y.cpu().data.squeeze().numpy(), all_y_pred.cpu().data.squeeze().numpy())

    # show information
    print('\nTest set ({:d} samples): Average loss: {:.4f}, Accuracy: {:.2f}%\n'.format(len(all_y), test_loss, 100* test_score))

    # save Pytorch models of best record
    torch.save(model.state_dict(), os.path.join(save_model_path, 'cnn3d_f2f{}.pth'.format(epoch + 1)))  # save spatial_encoder
    torch.save(optimizer.state_dict(), os.path.join(save_model_path, 'cnn3d_f2f_epoch{}.pth'.format(epoch + 1)))      # save optimizer
    print("Epoch {} model saved!".format(epoch + 1))


    return test_loss, test_score

In [10]:
# create model
#cnn3d = CNN3D(t_dim=10, img_x=img_x, img_y=img_y,
#              drop_p=dropout, fc_hidden1=fc_hidden1,  fc_hidden2=fc_hidden2, num_classes=2)

from Res3D import C3D
cnn3d = C3D(img_dim=256, frames=10, dropout=0.4)

cnn3d.cuda()
# Parallelize model to multiple GPUs
if torch.cuda.device_count() > 1:
    print("Using", torch.cuda.device_count(), "GPUs!")
    cnn3d = nn.DataParallel(cnn3d)

optimizer = torch.optim.Adam(cnn3d.parameters(), lr=learning_rate)   # optimize all cnn parameters


# record training process
epoch_train_losses = []
epoch_train_scores = []
epoch_test_losses = []
epoch_test_scores = []


# start training
for epoch in range(epochs):
    # train, test model
    train_losses, train_scores = train(log_interval, cnn3d, device, train_loader, optimizer, epoch)
    epoch_test_loss, epoch_test_score = validation(cnn3d, device, optimizer, valid_loader)

    # save all train test results
    # save results
    writer_train.add_scalar('loss',train_losses,epoch+1)
    writer_train.add_scalar('score',train_scores,epoch+1)
    writer_test.add_scalar('loss',epoch_test_loss,epoch+1)
    writer_test.add_scalar('score',epoch_test_score,epoch+1)
    
    
    torch.cuda.empty_cache()



Test set (300 samples): Average loss: 0.6945, Accuracy: 50.67%

Epoch 1 model saved!

Test set (300 samples): Average loss: 0.6950, Accuracy: 52.67%

Epoch 2 model saved!

Test set (300 samples): Average loss: 0.7155, Accuracy: 56.33%

Epoch 3 model saved!

Test set (300 samples): Average loss: 0.6173, Accuracy: 68.33%

Epoch 4 model saved!

Test set (300 samples): Average loss: 0.5703, Accuracy: 72.67%

Epoch 5 model saved!

Test set (300 samples): Average loss: 0.5591, Accuracy: 73.00%

Epoch 6 model saved!

Test set (300 samples): Average loss: 0.5720, Accuracy: 69.00%

Epoch 7 model saved!



Test set (300 samples): Average loss: 0.5012, Accuracy: 74.33%

Epoch 8 model saved!

Test set (300 samples): Average loss: 0.5562, Accuracy: 69.67%

Epoch 9 model saved!

Test set (300 samples): Average loss: 0.5047, Accuracy: 76.33%

Epoch 10 model saved!

Test set (300 samples): Average loss: 0.6283, Accuracy: 70.00%

Epoch 11 model saved!

Test set (300 samples): Average loss: 0.5657, Accuracy: 73.67%

Epoch 12 model saved!


Process Process-100:
Process Process-99:
Process Process-98:
Process Process-97:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/chinmay/anaconda3/lib/python3.5/multiprocessing/process.py", line 252, in _bootstrap
    self.run()
  File "/home/chinmay/anaconda3/lib/python3.5/multiprocessing/process.py", line 252, in _bootstrap
    self.run()
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/chinmay/anaconda3/lib/python3.5/multiprocessing/process.py", line 252, in _bootstrap
    self.run()
  File "/home/chinmay/anaconda3/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/chinmay/anaconda3/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/chinmay/anaconda3/lib/python3.5/multiprocessing/process.py", line 252, in _bootstrap
    self.run()
  File "/home/chinmay/anaconda3/lib/python3.5/sit

KeyboardInterrupt: 