In [1]:
import os
import numpy as np

In [2]:
!ls accident_frames/final/

train  val


In [3]:
!ls -latrh accident_frames | wc -l

3697


In [4]:
import torch

In [5]:
from __future__ import print_function, division
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import copy
import os
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

# Learning rate parameters
BASE_LR = 0.001
EPOCH_DECAY = 30 # number of epochs after which the Learning rate is decayed exponentially.
DECAY_WEIGHT = 0.1 # factor by which the learning rate is reduced.


# DATASET INFO
NUM_CLASSES = 2 # set the number of classes in your dataset

# DATALOADER PROPERTIES
BATCH_SIZE = 10 # Set as high as possible. If you keep it too high, you'll get an out of memory error.


### GPU SETTINGS
CUDA_DEVICE = 0 # Enter device ID of your gpu if you want to run on gpu. Otherwise neglect.
GPU_MODE = 1 # set to 1 if want to run on gpu.


# SETTINGS FOR DISPLAYING ON TENSORBOARD
USE_TENSORBOARD = 0 #if you want to use tensorboard set this to 1.
TENSORBOARD_SERVER = "YOUR TENSORBOARD SERVER ADDRESS HERE" # If you set.
EXP_NAME = "fine_tuning_experiment" # if using tensorboard, enter name of experiment you want it to be displayed as.

## If you want to keep a track of your network on tensorboard, set USE_TENSORBOARD TO 1 in config file.

if False:
    from pycrayon import CrayonClient
    cc = CrayonClient(hostname=TENSORBOARD_SERVER)
    try:
        cc.remove_experiment(EXP_NAME)
    except:
        pass
    foo = cc.create_experiment(EXP_NAME)


## If you want to use the GPU, set GPU_MODE TO 1 in config file

use_gpu = True
if use_gpu:
    torch.cuda.set_device(CUDA_DEVICE)

count=0


In [6]:

### SECTION 2 - data loading and shuffling/augmentation/normalization : all handled by torch automatically.

# This is a little hard to understand initially, so I'll explain in detail here!

# For training, the data gets transformed by undergoing augmentation and normalization. 
# The RandomSizedCrop basically takes a crop of an image at various scales between 0.01 to 0.8 times the size of the image and resizes it to given number
# Horizontal flip is a common technique in computer vision to augment the size of your data set. Firstly, it increases the number of times the network gets
# to see the same thing, and secondly it adds rotational invariance to your networks learning.


# Just normalization for validation, no augmentation. 

# You might be curious where these numbers came from? For the most part, they were used in popular architectures like the AlexNet paper. 
# It is important to normalize your dataset by calculating the mean and standard deviation of your dataset images and making your data unit normed. However,
# it takes a lot of computation to do so, and some papers have shown that it doesn't matter too much if they are slightly off. So, people just use imagenet
# dataset's mean and standard deviation to normalize their dataset approximately. These numbers are imagenet mean and standard deviation!

# If you want to read more, transforms is a function from torchvision, and you can go read more here - http://pytorch.org/docs/master/torchvision/transforms.html
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(224),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

#data_dir, model_name = ("/home/dd/arcelormittalvideos/accident_frames/10sfinal", "resnet18_10_ahead")
data_dir, model_name = ("/home/dd/arcelormittalvideos/accident_frames/final", "resnet18_this_frame")


dsets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x])
         for x in ['train', 'val']}
dset_loaders = {x: torch.utils.data.DataLoader(dsets[x], batch_size=BATCH_SIZE,
                                               shuffle=True, num_workers=4)
                for x in ['train', 'val']}
dset_sizes = {x: len(dsets[x]) for x in ['train', 'val']}
dset_classes = dsets['train'].classes


In [30]:


### SECTION 3 : Writing the functions that do training and validation phase. 

# These functions basically do forward propogation, back propogation, loss calculation, update weights of model, and save best model!


## The below function will train the model. Here's a short basic outline - 

# For the number of specified epoch's, the function goes through a train and a validation phase. Hence the nested for loop. 

# In both train and validation phase, the loaded data is forward propogated through the model (architecture defined ahead). 
# In PyTorch, the data loader is basically an iterator. so basically there's a get_element function which gets called everytime 
# the program iterates over data loader. So, basically, get_item on dset_loader below gives data, which contains 2 tensors - input and target. 
# target is the class number. Class numbers are assigned by going through the train/val folder and reading folder names in alphabetical order.
# So in our case cats would be first, dogs second and humans third class.

# Forward prop is as simple as calling model() function and passing in the input. 

# Variables are basically wrappers on top of PyTorch tensors and all that they do is keep a track of every process that tensor goes through.
# The benefit of this is, that you don't need to write the equations for backpropogation, because the history of computations has been tracked
# and pytorch can automatically differentiate it! Thus, 2 things are SUPER important. ALWAYS check for these 2 things. 
# 1) NEVER overwrite a pytorch variable, as all previous history will be lost and autograd won't work.
# 2) Variables can only undergo operations that are differentiable.

def train_model(model, criterion, optimizer, lr_scheduler, num_epochs=100):
    since = time.time()

    best_model = model
    best_acc = 0.0

    leave = False
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                mode='train'
                optimizer = lr_scheduler(optimizer, epoch)
                model.train()  # Set model to training mode
            else:
                model.eval()
                mode='val'

            running_loss = 0.0
            running_corrects = 0

            counter=0
            # Iterate over data.
            for data in dset_loaders[phase]:
                inputs, labels = data
                # wrap them in Variable
                if use_gpu:
                    try:
                        inputs, labels = inputs.float().cuda(), labels.long().cuda()
                    except Exception as e:
                        print(e)
                        
                        print("Could not be converted to GPU!!!")
                        print(inputs,labels)
                else:
                    inputs, labels = Variable(inputs), Variable(labels)

                # Set gradient to zero to delete history of computations in previous epoch. Track operations so that differentiation can be done automatically.
                optimizer.zero_grad()
                outputs = model(inputs)
                _, preds = torch.max(outputs.data, 1)
                
                loss = criterion(outputs, labels)
                # print('loss done')                
                # Just so that you can keep track that something's happening and don't feel like the program isn't running.
                # if counter%10==0:
                #     print("Reached iteration ",counter)
                counter+=1

                # backward + optimize only if in training phase
                if phase == 'train':
                    # print('loss backward')
                    loss.backward()
                    # print('done loss backward')
                    optimizer.step()
                    # print('done optim')
                # print evaluation statistics
                try:
                    # running_loss += loss.data[0]
                    running_loss += loss.item()
                    # print(labels.data)
                    # print(preds)
                    running_corrects += torch.sum(preds == labels.data)
                    # print('running correct =',running_corrects)
                except:
                    print('unexpected error, could not calculate loss or do a sum.')
            print('trying epoch loss')
            epoch_loss = running_loss / dset_sizes[phase]
            epoch_acc = running_corrects.item() / float(dset_sizes[phase])
            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))


            # deep copy the model
            if phase == 'val':
                if USE_TENSORBOARD:
                    foo.add_scalar_value('epoch_loss',epoch_loss,step=epoch)
                    foo.add_scalar_value('epoch_acc',epoch_acc,step=epoch)
                if epoch_acc > best_acc:
                    best_acc = epoch_acc
                    best_model = copy.deepcopy(model)
                    print('new best accuracy = ',best_acc)
                if epoch_acc >= 0.98:
                    print("Sufficient accuracy, breaking")
                    leave = True
                    break
        if leave: break
    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))
    print('returning and looping back')
    return best_model

# This function changes the learning rate over the training model.
def exp_lr_scheduler(optimizer, epoch, init_lr=BASE_LR, lr_decay_epoch=EPOCH_DECAY):
    """Decay learning rate by a factor of DECAY_WEIGHT every lr_decay_epoch epochs."""
    lr = init_lr * (DECAY_WEIGHT**(epoch // lr_decay_epoch))

    if epoch % lr_decay_epoch == 0:
        print('LR is set to {}'.format(lr))

    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

    return optimizer


### SECTION 4 : DEFINING MODEL ARCHITECTURE.

# We use Resnet18 here. If you have more computational power, feel free to swap it with Resnet50, Resnet100 or Resnet152.
# Since we are doing fine-tuning, or transfer learning we will use the pretrained net weights. In the last line, the number of classes has been specified.
# Set the number of classes in the config file by setting the right value for NUM_CLASSES.

model_ft = models.resnet18(pretrained=True)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, NUM_CLASSES)


criterion = nn.CrossEntropyLoss()

if use_gpu:
    criterion.cuda()
    model_ft.cuda()
    
#print(model_ft)

optimizer_ft = optim.RMSprop(model_ft.parameters(), lr=0.0001)



# Run the functions and save the best model in the function model_ft.
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
                       num_epochs=100)

# Save model


Epoch 0/99
----------
LR is set to 0.001
trying epoch loss
train Loss: 0.0791 Acc: 0.5571
trying epoch loss
val Loss: 0.0658 Acc: 0.6108
new best accuracy =  0.6108108108108108
Epoch 1/99
----------
trying epoch loss
train Loss: 0.0675 Acc: 0.6138
trying epoch loss
val Loss: 0.0684 Acc: 0.6324
new best accuracy =  0.6324324324324324
Epoch 2/99
----------
trying epoch loss
train Loss: 0.0668 Acc: 0.6007
trying epoch loss
val Loss: 0.1942 Acc: 0.5405
Epoch 3/99
----------
trying epoch loss
train Loss: 0.0637 Acc: 0.6469
trying epoch loss
val Loss: 0.0566 Acc: 0.7622
new best accuracy =  0.7621621621621621
Epoch 4/99
----------
trying epoch loss
train Loss: 0.0632 Acc: 0.6556
trying epoch loss
val Loss: 0.0577 Acc: 0.6757
Epoch 5/99
----------
trying epoch loss
train Loss: 0.0615 Acc: 0.6775
trying epoch loss
val Loss: 0.1329 Acc: 0.2541
Epoch 6/99
----------
trying epoch loss
train Loss: 0.0605 Acc: 0.6762
trying epoch loss
val Loss: 0.3536 Acc: 0.5946
Epoch 7/99
----------
trying epoch 

trying epoch loss
train Loss: 0.0256 Acc: 0.8971
trying epoch loss
val Loss: 0.0851 Acc: 0.7514
Epoch 68/99
----------
trying epoch loss
train Loss: 0.0232 Acc: 0.8977
trying epoch loss
val Loss: 0.0825 Acc: 0.6919
Epoch 69/99
----------
trying epoch loss
train Loss: 0.0248 Acc: 0.8877
trying epoch loss
val Loss: 0.0777 Acc: 0.7459
Epoch 70/99
----------
trying epoch loss
train Loss: 0.0241 Acc: 0.8989
trying epoch loss
val Loss: 0.0758 Acc: 0.7351
Epoch 71/99
----------
trying epoch loss
train Loss: 0.0243 Acc: 0.8996
trying epoch loss
val Loss: 0.0757 Acc: 0.7027
Epoch 72/99
----------
trying epoch loss
train Loss: 0.0224 Acc: 0.9033
trying epoch loss
val Loss: 0.0811 Acc: 0.7135
Epoch 73/99
----------
trying epoch loss
train Loss: 0.0243 Acc: 0.8915
trying epoch loss
val Loss: 0.0830 Acc: 0.7135
Epoch 74/99
----------
trying epoch loss
train Loss: 0.0240 Acc: 0.8939
trying epoch loss
val Loss: 0.0831 Acc: 0.6811
Epoch 75/99
----------
trying epoch loss
train Loss: 0.0249 Acc: 0.8927

In [27]:
#torch.save(model_ft "models/resnet18_10_ahead_perfect_window_of_40")
torch.save(model_ft, "models/{}".format(model_name))

In [9]:
!ls models

resnet18_0.988		resnet18_10_ahead_perfect_window_of_20
resnet18_10_ahead	resnet18_10_ahead_perfect_window_of_40
resnet18_10_ahead_0.81	resnet18_this_frame


## TRYING ON IMAGES

In [16]:
import lime
from lime import lime_image
from PIL import Image
import requests
from io import BytesIO

#url = "https://upload.wikimedia.org/wikipedia/commons/thumb/b/b6/Cologne_Germany_Industrial-work-with-Personal-Protective-Equipment-04.jpg/1024px-Cologne_Germany_Industrial-work-with-Personal-Protective-Equipment-04.jpg"
#response = requests.get(url)
#img = Image.open(BytesIO(response.content))
#plt.imshow(img)
#plt.show()

In [11]:
!ls accident_frames

10sfinal		    H3mc1RuzETg.mp4_1373_0.jpg
10sfinal20		    H3mc1RuzETg.mp4_1374_0.jpg
10sfinal40		    H3mc1RuzETg.mp4_1375_0.jpg
3EdQq5iAGYs.mp4_100_0.jpg   H3mc1RuzETg.mp4_1376_0.jpg
3EdQq5iAGYs.mp4_10_0.jpg    H3mc1RuzETg.mp4_1377_0.jpg
3EdQq5iAGYs.mp4_101_0.jpg   H3mc1RuzETg.mp4_1378_0.jpg
3EdQq5iAGYs.mp4_102_0.jpg   H3mc1RuzETg.mp4_1379_0.jpg
3EdQq5iAGYs.mp4_103_0.jpg   H3mc1RuzETg.mp4_1380_0.jpg
3EdQq5iAGYs.mp4_104_0.jpg   H3mc1RuzETg.mp4_1381_0.jpg
3EdQq5iAGYs.mp4_105_0.jpg   H3mc1RuzETg.mp4_1382_0.jpg
3EdQq5iAGYs.mp4_106_0.jpg   H3mc1RuzETg.mp4_1383_0.jpg
3EdQq5iAGYs.mp4_107_0.jpg   H3mc1RuzETg.mp4_1384_0.jpg
3EdQq5iAGYs.mp4_108_0.jpg   H3mc1RuzETg.mp4_1385_0.jpg
3EdQq5iAGYs.mp4_109_0.jpg   H3mc1RuzETg.mp4_1386_0.jpg
3EdQq5iAGYs.mp4_1_0.jpg     H3mc1RuzETg.mp4_1387_0.jpg
3EdQq5iAGYs.mp4_110_0.jpg   H3mc1RuzETg.mp4_1388_0.jpg
3EdQq5iAGYs.mp4_11_0.jpg    H3mc1RuzETg.mp4_1389_0.jpg
3EdQq5iAGYs.mp4_111_0.jpg   H3mc1RuzETg.mp4_1390_0.jpg
3EdQq5iAGYs.mp4_112_0.jpg   H3

In [31]:
#model_ft(x)[0].argmax()
#model_ft =  torch.load("models/resnet18_this_frame")
#model_ft =  torch.load("models/resnet18_this_frame")
#model_ft.load_state_dict(torch.load("models/{}".format("resnet18_10_ahead_perfect_window_of_40")))

model_ft.cpu()
loader = transforms.Compose([
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

def image_loader(image_name):
    """load image, returns cuda tensor"""
    if image_name.startswith("http"):
        url = image_name
        response = requests.get(url)
        image = Image.open(BytesIO(response.content))
    else:
        image = Image.open(image_name)
    image = loader(image).float()
    image = Variable(image, requires_grad=True)
    image = image.unsqueeze(0)  #this is for VGG, may not be needed for ResNet
    return image.cpu()  #assumes that you're using GPU

def load_and_apply(x):
    return model_ft( Variable(loader(x).float(), requires_grad=True).unsqueeze(0))
    
image = image_loader("/home/dd/Dropbox/to_show/before.jpg")
print(model_ft(image).exp())
image = image_loader("/home/dd/Dropbox/to_show/accident.jpg")
print(model_ft(image).exp())
image = image_loader("/home/dd/Dropbox/to_show/after.jpg")
print(model_ft(image).exp())

tensor([[ 0.2604,  3.2817]])
tensor([[ 0.8960,  1.1078]])
tensor([[  0.0098,  81.0585]])


## Load pretrained show and tell model