# Check GPU

In [1]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Select the Runtime > "Change runtime type" menu to enable a GPU accelerator, ')
  print('and then re-execute this cell.')
else:
  print(gpu_info)

Select the Runtime > "Change runtime type" menu to enable a GPU accelerator, 
and then re-execute this cell.


# Run this command once and restart the runtime

In [2]:
!pip install av

Collecting av
[?25l  Downloading https://files.pythonhosted.org/packages/9e/62/9a992be76f8e13ce0e3a24a838191b546805545116f9fc869bd11bd21b5f/av-8.0.2-cp36-cp36m-manylinux2010_x86_64.whl (36.9MB)
[K     |████████████████████████████████| 36.9MB 1.3MB/s 
[?25hInstalling collected packages: av
Successfully installed av-8.0.2


# Mount drive


In [3]:
import os
import io
from google.colab import drive

# Mount google drive
DRIVE_MOUNT='/content/drive'

drive.mount(DRIVE_MOUNT)


# create folder to write data to
DATA_FOLDER = os.path.join(DRIVE_MOUNT, 'Shared drives', 'CIS680 Final Project', 'data')

Mounted at /content/drive


# Load Dataset

In [4]:
import matplotlib.pyplot as plt
import torch
from torch.nn import functional as F
from torchvision import transforms
from torch import nn, Tensor
# from dataset import *
import random
import torchvision
import torchvision.models as models
import torch.optim as optim
import numpy as np

train_size = 138
test_size = 41

train_path = DATA_FOLDER + "/dataset_3/train/train_b"
test_path = DATA_FOLDER + "/dataset_3/test/test_b"

train_loader = []
test_loader = []

for batch in range(train_size):
    batch_path = train_path + str(batch) + ".pt"
    train_loader.append(torch.load(batch_path))
    if batch % 10 == 0:
        print("train: " + str(batch) + "/" + str(train_size))

for batch in range(test_size):
    batch_path = test_path + str(batch) + ".pt"
    test_loader.append(torch.load(batch_path))
    if batch % 10 == 0:
        print("test: " + str(batch) + "/" + str(test_size))


train: 0/138
train: 10/138
train: 20/138
train: 30/138
train: 40/138
train: 50/138
train: 60/138
train: 70/138
train: 80/138
train: 90/138
train: 100/138
train: 110/138
train: 120/138
train: 130/138
test: 0/41
test: 10/41
test: 20/41
test: 30/41
test: 40/41


# Spatial Model

In [5]:
class SpatialStream(torch.nn.Module):

    def __init__(self, 
                 device='cuda',
                 num_classes=51,
                 dropout_probability=0.5,
                 train_resnet=True):

        # Initialize the stream layers
        super(SpatialStream, self).__init__()
        self.device = device
        self.num_classes = num_classes

        # Spatial Backbone
        self.spatial = models.resnet50(pretrained=True)
        for param in self.spatial.parameters():
            param.requires_grad = train_resnet  # False: Freezes the weights of the pre-trained model

        # Add to Spatial Backbone
        self.spatial.fc = nn.Sequential(nn.Linear(2048, 1024),
                                nn.ReLU(),
                                nn.Dropout(p=dropout_probability),
                                nn.Linear(1024, self.num_classes),
                                nn.ReLU(),
                                nn.Dropout(p=dropout_probability),
                                nn.Softmax())

    def forward(self, X):
        return self.spatial(X)

    def compute_loss(self, output, labels):
        criterion = nn.CrossEntropyLoss()
        # labels = torch.tensor(labels)

        loss = criterion(output, labels)
        return loss

# Temporal Model

In [6]:
class TemporalStream(torch.nn.Module):

    def __init__(self,
                 device='cuda',
                 num_classes=51,
                 dropout_probability=0.5):

        # Initialize the stream layers
        super(TemporalStream, self).__init__()
        self.device = device
        self.num_classes = num_classes

        # Backbone
        self.temporal = models.resnet50(pretrained=True)
        for param in self.temporal.parameters():
            param.requires_grad = True

        #self.temporal.conv1 = nn.Conv2d(4, 64, kernel_size=7, stride=2, padding=3)

        self.temporal.fc = nn.Sequential(nn.Linear(2048, 1024),
                                nn.ReLU(),
                                nn.Dropout(p=dropout_probability),
                                nn.Linear(1024, self.num_classes),
                                nn.ReLU(),
                                nn.Dropout(p=dropout_probability),
                                nn.Softmax())
        

    def forward(self, X):
        X = self.temporal(X)
        return X

    def compute_loss(self, output, target):
        criterion = nn.CrossEntropyLoss()
        loss = criterion(output, target)
        return loss

# Fuse Model

In [7]:
class FuseNET(torch.nn.Module):

    def __init__(self, 
                 device='cuda',
                 num_classes=51):

        # Initialize the stream layers
        super(FuseNET, self).__init__()
        self.device = device
        self.num_classes = num_classes

        self.fc1 = nn.Linear(102, 204)
        self.output = nn.Linear(204, self.num_classes)

    def forward(self, X):
        X = self.fc1(X)
        X = self.output(X)
        return X

    def compute_loss(self, output, labels):
        criterion = nn.CrossEntropyLoss()
        # labels = torch.tensor(labels)

        loss = criterion(output, labels)
        return loss

# Video Frame Stacking (SG3I)

In [8]:
from google.colab.patches import cv2_imshow
import cv2 as cv

def getSG3I(videos):
    bz = videos.size(0)

    frame_list_batch = []

    for b in range(bz):
        images = videos[b]

        w = images[0].size(0)
        h = images[0].size(1)
        num_frames = images.size(0)
        frame_list = []

        for i in range(0, num_frames):
            frame_list.append(cv.cvtColor(images[i].numpy(), cv.COLOR_BGR2GRAY))

        frame_list_batch.append(np.stack(frame_list, axis=-1))

    SG3I = np.stack(frame_list_batch, axis=0)
    #cv2_imshow(SG3I[0])

    return torch.Tensor(SG3I)

# Train and Test Functions

In [9]:
  def train(epoch):
    spatial.eval()
    temporal.eval()
    fused.train()

    counter = 0
    train_loss = 0
    log_interval = 100
    save_interval = 250

    epoch_loss = []
    log_int_loss = 0
    for iter, data in enumerate(train_loader, 0):

        videos = data["videos"]
        labels = torch.tensor(data["labels"])
        indexes = data["indexes"]
        
        SG3I = getSG3I(videos)
        SG3I = SG3I.permute(0,3,1,2)
        SG3I = SG3I.to(device)

        videos = videos.type(torch.FloatTensor)
        videos = videos.to(device)
        # labels = labels.to(device)

        optimizer.zero_grad()

        # spatial
        spatial_input = videos[:,0,:,:].permute(0,3,2,1)
        spatial_output = spatial(spatial_input)

        # temporal        
        temporal_output = temporal(SG3I)
        
        # fused
        fused_input = torch.hstack((spatial_output, temporal_output))
        fused_output = fused(fused_input)

        fused_output = fused_output.to(device)
        labels = labels.to(device)

        # calculate losses
        loss = fused.compute_loss(fused_output, labels)

        loss.backward()
        optimizer.step()
        train_loss += loss.item()

        # Logging Interval
        log_int_loss += loss.item()
        epoch_loss.append(loss.item())

        if counter == 0:
            print('Epoch: ', epoch, ', Batch: ', iter, ', loss avg over log interval: ', log_int_loss)
            train_loss_list.append(train_loss / (iter + 1) * batch_size)
            train_counter.append((iter + 1) * batch_size + epoch * len(train_loader.dataset))
            log_int_loss = 0
        elif counter % log_interval == log_interval - 1:
            print('Epoch: ', epoch, ', Batch: ', iter, ', loss avg over log interval: ', log_int_loss / log_interval)
            train_loss_list.append(train_loss / (iter + 1) * batch_size)
            train_counter.append((iter + 1) * batch_size + epoch * len(train_loader.dataset))
            log_int_loss = 0

        if counter % save_interval == save_interval - 1:
            print('saving model')
            save_path = os.path.join(EPOCH_SAVE_PREFIX, 'fused_epoch' + str(epoch) + '_iter_' + str(counter))
            torch.save({
                'epoch': epoch,
                'train_total_loss_list': train_loss_list,
                'epoch_total_loss_list': epoch_loss_list,
                'test_loss_list': test_loss_list,
                'train_counter': train_counter,
                'accuracy_list': accuracy_list,
                'model_state_dict': fused.state_dict(),
                'optimizer_state_dict': optimizer.state_dict()
            }, save_path)

        counter += 1


    avg_loss = sum(epoch_loss) / len(epoch_loss)
    epoch_loss_list.append(avg_loss)
    print('Epoch: ', epoch, ', avg total loss: ', avg_loss)

def test():
    fused.eval()
    test_loss = 0
    correct = 0

    # Data Loop
    with torch.no_grad():
        for iter, data in enumerate(test_loader, 0):
            videos = data["videos"]
            labels = torch.tensor(data["labels"])
            indexes = data["indexes"]
            
            SG3I = getSG3I(videos)
            SG3I = SG3I.permute(0,3,1,2)
            SG3I = SG3I.to(device)

            videos = videos.type(torch.FloatTensor)
            videos = videos.to(device)
            # labels = labels.to(device)

            # spatial
            spatial_input = videos[:,0,:,:].permute(0,3,2,1)
            spatial_output = spatial(spatial_input)

            # temporal        
            temporal_output = temporal(SG3I)
            
            # fused
            fused_input = torch.hstack((spatial_output, temporal_output))
            fused_output = fused(fused_input)

            fused_output = fused_output.to(device)
            labels = labels.to(device)

            # calculate losses
            loss = fused.compute_loss(fused_output, labels)

            test_loss += loss.item()

            # calculate number of correct predictions in batch
            correct += sum(torch.argmax(fused_output,1) == labels).item()
            if iter % 100 == 0:
                print ("iter  ", iter)
                print("accuracy so far = ", correct / ((iter + 1) * len(labels)))

    # Log
    test_loss_list.append(test_loss / len(test_loader.dataset))
    accuracy = correct / len(test_loader.dataset)
    accuracy_list.append(accuracy)
    print('Avg Validation Loss: ', test_loss / len(test_loader.dataset))
    print('Accuracy: ', accuracy)

# Main Training Loop

In [None]:
EPOCH_SAVE_PREFIX = '/content/drive/Shared drives/CIS680 Final Project/models/fused_sampled/'

# Device
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
torch.random.manual_seed(1)

# load trained spatial
spatial = SpatialStream()
spatial.to(device)
spatial_network_path = '/content/drive/Shared drives/CIS680 Final Project/models/spatial/spatial_epoch10'
checkpoint_spatial = torch.load(spatial_network_path)
spatial.load_state_dict(checkpoint_spatial['model_state_dict'])

# load trained temporal
temporal = TemporalStream()
temporal.to(device)
temporal_network_path = '/content/drive/Shared drives/CIS680 Final Project/models/temporal_SG3I/temporal_epoch8'
checkpoint_temporal = torch.load(temporal_network_path)
temporal.load_state_dict(checkpoint_temporal['model_state_dict'])


# fused model
learning_rate = 0.001
fused = FuseNET()
fused.to(device)
optimizer = optim.SGD(fused.parameters(), lr=learning_rate, momentum=0.9)

# Epochs
num_epochs = 50
batch_size = 128

# Logging setup: train
train_loss_list = []
epoch_loss_list = []
train_counter = []

# Logging setup: test
test_loss_list = []
accuracy_list = []
epoch_list = np.arange(num_epochs)

# epoch loop
for epoch in range(num_epochs):

    # Train & Validate
    train(epoch)
    test()

    # Save Model Version
    save_path = os.path.join(EPOCH_SAVE_PREFIX, 'fused_epoch' + str(epoch))
    torch.save({
        'epoch': epoch,
        'train_total_loss_list': train_loss_list,
        'epoch_total_loss_list': epoch_loss_list,
        'test_loss_list': test_loss_list,
        'train_counter': train_counter,
        'accuracy_list': accuracy_list,
        'model_state_dict': fused.state_dict(),
        'optimizer_state_dict': optimizer.state_dict()
    }, save_path)

    print("Epoch %d/%d Completed" % (epoch, num_epochs - 1))

# Resume Training

In [None]:
EPOCH_SAVE_PREFIX = '/content/drive/Shared drives/CIS680 Final Project/models/fused_sampled/'

# Device
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
torch.random.manual_seed(1)

# load trained spatial
spatial = SpatialStream()
spatial.to(device)
spatial_network_path = '/content/drive/Shared drives/CIS680 Final Project/models/spatial/spatial_epoch10'
checkpoint_spatial = torch.load(spatial_network_path)
spatial.load_state_dict(checkpoint_spatial['model_state_dict'])

# load trained temporal
temporal = TemporalStream()
temporal.to(device)
temporal_network_path = '/content/drive/Shared drives/CIS680 Final Project/models/temporal_SG3I/temporal_epoch8'
checkpoint_temporal = torch.load(temporal_network_path)
temporal.load_state_dict(checkpoint_temporal['model_state_dict'])


# fused model
learning_rate = 0.001
fused = FuseNET()
fused.to(device)
optimizer = optim.SGD(fused.parameters(), lr=learning_rate, momentum=0.9)

# Epochs
num_epochs = 250
batch_size = 128
epoch_list = np.arange(num_epochs)


# LOAD NETWORK
fused_network_path = '/content/drive/Shared drives/CIS680 Final Project/models/fused_sampled/fused_epoch1'
checkpoint_fused = torch.load(fused_network_path)
fused.load_state_dict(checkpoint_fused['model_state_dict'])
optimizer.load_state_dict(checkpoint_fused['optimizer_state_dict'])
last_epoch = checkpoint_fused['epoch']

# Logging setup: train
train_loss_list = checkpoint_fused['train_total_loss_list']
epoch_loss_list = checkpoint_fused['epoch_total_loss_list']

test_loss_list = checkpoint_fused['test_loss_list']
accuracy_list = checkpoint_fused['accuracy_list']
train_counter = checkpoint_fused['train_counter']


# epoch loop
for epoch in range(last_epoch + 1, num_epochs):

    # Train & Validate
    train(epoch)
    test()

    # Save Model Version
    save_path = os.path.join(EPOCH_SAVE_PREFIX, 'fused_epoch' + str(epoch))
    torch.save({
        'epoch': epoch,
        'train_total_loss_list': train_loss_list,
        'epoch_total_loss_list': epoch_loss_list,
        'test_loss_list': test_loss_list,
        'train_counter': train_counter,
        'accuracy_list': accuracy_list,
        'model_state_dict': fused.state_dict(),
        'optimizer_state_dict': optimizer.state_dict()
    }, save_path)

    print("Epoch %d/%d Completed" % (epoch, num_epochs - 1))

# Plotting

In [None]:
import numpy as np
import matplotlib.pyplot as plt


EPOCH_SAVE_PREFIX = '/content/drive/Shared drives/CIS680 Final Project/models/fused_sampled/'

# Device
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
torch.random.manual_seed(1)

# Load params
last_epoch = 100

# load data
network_path = EPOCH_SAVE_PREFIX + 'fused_epoch' + str(last_epoch)
checkpoint = torch.load(network_path)
train_loss_list = checkpoint['train_total_loss_list']
epoch_loss_list = checkpoint['epoch_total_loss_list']
train_counter = checkpoint['train_counter']
test_loss_list = checkpoint['test_loss_list']
accuracy_list = checkpoint['accuracy_lsit']
epoch_list = np.arange(last_epoch+1)


# plots
fig = plt.figure()
plt.plot(epoch_loss_list, color='blue')
plt.legend(['FuseNet Train Loss'], loc='upper right')
plt.xlabel('Epochs')
plt.ylabel('Total Loss')

fig = plt.figure()
plt.plot(test_loss_list, color='green')
plt.legend(['FuseNet Validation Loss'], loc='upper right')
plt.xlabel('Epochs')
plt.ylabel('Total Loss')

fig = plt.figure()
plt.plot(accuracy_list, color='red')
plt.legend(['FuseNet Validation Accuracy'], loc='lower right')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')


plt.show()