In [2]:
%load_ext autoreload
%autoreload 2

In [25]:
import sys
import os
import collections
import pickle
import math

import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader,random_split

from tqdm.auto import tqdm
from IPython.display import Image 

from src.model import BoundaryDetectorAttention,BoundaryDetectorSimple
from src.dataset import MovieDataset
from utils import *
from evaluate_sceneseg import calc_ap, calc_miou, calc_precision_recall
#from pytorch_lightning.callbacks import ModelCheckpoint
#import matplotlib.pyplot as plt
#from mpl_toolkits.axes_grid1 import ImageGrid

In [26]:
if torch.cuda.is_available():

    # Tell PyTorch to use the GPU.
    device = torch.device("cuda")
    
    print('There are %d GPU(s) available.' % torch.cuda.device_count())

    print('We will use the GPU:', torch.cuda.get_device_name(0))

# If not...
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

There are 1 GPU(s) available.
We will use the GPU: GeForce RTX 3060 Ti


In [27]:
#window_size = 10
window_size = 12
num_epochs = 20
model_save_path = '/home/jolteon/eluvio_challenge/models/'
model_name = 'Attention_FC'

In [28]:
train_path = '/home/jolteon/eluvio_challenge/data/train/'
val_path = '/home/jolteon/eluvio_challenge/data/val/'

In [30]:
train_dataset = MovieDataset(train_path,window_size=window_size)
train_loader = DataLoader(train_dataset, batch_size = 128, pin_memory=True,num_workers=12)


In [31]:
#print(dataset_length)
#print(train_length)
#print(val_length)

In [32]:
#next(iter(train_loader))

In [33]:
model = BoundaryDetectorAttention(window_size=window_size)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
model = model.to(device)
model.train()

criterion = nn.BCELoss()
criterion = criterion.to(device)


In [34]:
num_param = sum([param.nelement() for param in model.parameters()])
print("num parameters",num_param)

num parameters 10001367


In [37]:
train_losses = []
train_accuracies = []

scores_list=[]
best_val_score=0
for t in tqdm(range(num_epochs)):
    model.train()
    train_loss_per_epoch = 0
    train_correct_per_epoch = 0
    train_total = 0
    for ii,batch in enumerate(train_loader):
        #Put things into Cuda
        place, cast, action, audio, target = batch
        place = place.to(device)
        cast = cast.to(device)
        action = action.to(device)
        audio = audio.to(device)
        target = target.to(device)
        embedding = place, cast, action, audio

        #Forward Pass
        out = model(embedding)
        #Book keeping for prediction metrics
        train_total += target.size(0)*target.size(1)
        preds = out >.5

        #Compute loss and accuracy
        loss =criterion(out, target)
        #Backward Pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        #Keep Track of Accuracy and Loss
        train_loss_per_epoch += loss.item()
        train_correct_per_epoch += torch.sum(preds == target).item()
    #End Train Loop
    train_losses.append(train_loss_per_epoch/train_total) 
    train_accuracies.append(train_correct_per_epoch/train_total)
    
    #Start Val
    model.eval()
    with torch.no_grad():
        # Do some engineering to get it into a format where we can use evaluate_sceneseg functions
        gt_dict = dict()
        pr_dict = dict()
        shot_to_end_frame_dict = dict()
        scores =dict()
        for file in os.listdir(val_path):
            if file.endswith('.pkl'):
                with open(val_path+file, 'rb') as f:
                    data = pickle.load(f)
                predictions = generate_predictions(model,data,window_size,device)
                
                gt_dict[data['imdb_id']] = data["scene_transition_boundary_ground_truth"].numpy().astype(float)
                pr_dict[data['imdb_id']] = predictions
                shot_to_end_frame_dict[data['imdb_id']] = data['shot_end_frame']


        scores["AP"], scores["mAP"], _ = calc_ap(gt_dict, pr_dict)
        scores["Miou"], _ = calc_miou(gt_dict, pr_dict, shot_to_end_frame_dict)
        scores["Precision"], scores["Recall"], scores["F1"], *_ = calc_precision_recall(gt_dict, pr_dict)
        
    
    # End Val Loop
    scores_list.append(scores)
    print("[EPOCH]: %i, [TRAIN LOSS]: %.6f, [TRAIN ACCURACY]: %.3f" % (t, train_losses[-1], train_accuracies[-1]))
    print("[EPOCH]: %i, [VAL SCORES]: %s \n" % (t,scores_list[-1]))
    if scores_list[-1]['mAP'] > best_val_score:
        best_val_score = scores_list[-1]['mAP']

        # TODO: Save best model, optimizer, epoch_number
        best_model_file = model_save_path+model_name+'_'+str(t)+'.ckpt'
        torch.save(model.state_dict(), best_model_file)


  0%|          | 0/20 [00:00<?, ?it/s]

  precision = tps / tp_fp.sum()
  fscore_dict[imdb_id] = 2 * p * r / (p + r)


[EPOCH]: 0, [TRAIN LOSS]: 0.000295, [TRAIN ACCURACY]: 0.910
[EPOCH]: 0, [VAL SCORES]: {'AP': 0.060098466989969256, 'mAP': 0.062450275748646825, 'Miou': 0.033519879802208975, 'Precision': 0.0, 'Recall': 0.0, 'F1': nan} 



  precision = tps / tp_fp.sum()
  fscore_dict[imdb_id] = 2 * p * r / (p + r)


[EPOCH]: 1, [TRAIN LOSS]: 0.000248, [TRAIN ACCURACY]: 0.927
[EPOCH]: 1, [VAL SCORES]: {'AP': 0.1519858472377807, 'mAP': 0.16770740377320764, 'Miou': 0.033519879802208975, 'Precision': 0.0, 'Recall': 0.0, 'F1': nan} 



  precision = tps / tp_fp.sum()
  fscore_dict[imdb_id] = 2 * p * r / (p + r)


[EPOCH]: 2, [TRAIN LOSS]: 0.000241, [TRAIN ACCURACY]: 0.927
[EPOCH]: 2, [VAL SCORES]: {'AP': 0.15891263981944292, 'mAP': 0.17279513981031858, 'Miou': 0.033519879802208975, 'Precision': 0.0, 'Recall': 0.0, 'F1': nan} 



  fscore_dict[imdb_id] = 2 * p * r / (p + r)
  precision = tps / tp_fp.sum()


[EPOCH]: 3, [TRAIN LOSS]: 0.000238, [TRAIN ACCURACY]: 0.927
[EPOCH]: 3, [VAL SCORES]: {'AP': 0.14555164263320206, 'mAP': 0.15930544303597832, 'Miou': 0.06886613214481825, 'Precision': 0.0, 'Recall': 0.0, 'F1': nan} 



  precision = tps / tp_fp.sum()
  fscore_dict[imdb_id] = 2 * p * r / (p + r)


[EPOCH]: 4, [TRAIN LOSS]: 0.000236, [TRAIN ACCURACY]: 0.927
[EPOCH]: 4, [VAL SCORES]: {'AP': 0.1501095057876441, 'mAP': 0.16149062365907224, 'Miou': 0.033519879802208975, 'Precision': 0.0, 'Recall': 0.0, 'F1': nan} 



  precision = tps / tp_fp.sum()
  fscore_dict[imdb_id] = 2 * p * r / (p + r)


[EPOCH]: 5, [TRAIN LOSS]: 0.000234, [TRAIN ACCURACY]: 0.927
[EPOCH]: 5, [VAL SCORES]: {'AP': 0.14142780740175787, 'mAP': 0.1537870750690852, 'Miou': 0.033519879802208975, 'Precision': 0.0, 'Recall': 0.0, 'F1': nan} 



  fscore_dict[imdb_id] = 2 * p * r / (p + r)
  precision = tps / tp_fp.sum()


[EPOCH]: 6, [TRAIN LOSS]: 0.000232, [TRAIN ACCURACY]: 0.927
[EPOCH]: 6, [VAL SCORES]: {'AP': 0.13153654517826585, 'mAP': 0.1415883968129868, 'Miou': 0.10472336148556395, 'Precision': 0.0, 'Recall': 0.0, 'F1': nan} 



  fscore_dict[imdb_id] = 2 * p * r / (p + r)
  precision = tps / tp_fp.sum()


[EPOCH]: 7, [TRAIN LOSS]: 0.000230, [TRAIN ACCURACY]: 0.928
[EPOCH]: 7, [VAL SCORES]: {'AP': 0.13189719085611346, 'mAP': 0.144607246609279, 'Miou': 0.08981495773531868, 'Precision': 0.020833333333333332, 'Recall': 0.0007267441860465116, 'F1': nan} 



  fscore_dict[imdb_id] = 2 * p * r / (p + r)


[EPOCH]: 8, [TRAIN LOSS]: 0.000228, [TRAIN ACCURACY]: 0.928
[EPOCH]: 8, [VAL SCORES]: {'AP': 0.1254861422503589, 'mAP': 0.1350476659497955, 'Miou': 0.17100818866602224, 'Precision': 0.20967261904761902, 'Recall': 0.01268778953941444, 'F1': nan} 



  fscore_dict[imdb_id] = 2 * p * r / (p + r)


[EPOCH]: 9, [TRAIN LOSS]: 0.000226, [TRAIN ACCURACY]: 0.929
[EPOCH]: 9, [VAL SCORES]: {'AP': 0.1216672723827803, 'mAP': 0.13070574372778956, 'Miou': 0.18324330827121418, 'Precision': 0.12701330532212884, 'Recall': 0.008751959547343216, 'F1': nan} 



  fscore_dict[imdb_id] = 2 * p * r / (p + r)
  precision = tps / tp_fp.sum()


[EPOCH]: 10, [TRAIN LOSS]: 0.000224, [TRAIN ACCURACY]: 0.929
[EPOCH]: 10, [VAL SCORES]: {'AP': 0.12167581602144009, 'mAP': 0.13171752689719066, 'Miou': 0.15673000744556842, 'Precision': 0.18344155844155843, 'Recall': 0.009121838504003131, 'F1': nan} 



  fscore_dict[imdb_id] = 2 * p * r / (p + r)
  precision = tps / tp_fp.sum()


[EPOCH]: 11, [TRAIN LOSS]: 0.000223, [TRAIN ACCURACY]: 0.929
[EPOCH]: 11, [VAL SCORES]: {'AP': 0.11361153517307561, 'mAP': 0.12237815322090699, 'Miou': 0.13634774358776788, 'Precision': 0.1162202380952381, 'Recall': 0.005820715670561556, 'F1': nan} 



  fscore_dict[imdb_id] = 2 * p * r / (p + r)


[EPOCH]: 12, [TRAIN LOSS]: 0.000221, [TRAIN ACCURACY]: 0.930
[EPOCH]: 12, [VAL SCORES]: {'AP': 0.11343680617222153, 'mAP': 0.12379925618457985, 'Miou': 0.206508970068725, 'Precision': 0.1537336029983089, 'Recall': 0.02550193054797127, 'F1': nan} 



  fscore_dict[imdb_id] = 2 * p * r / (p + r)
  precision = tps / tp_fp.sum()


[EPOCH]: 13, [TRAIN LOSS]: 0.000219, [TRAIN ACCURACY]: 0.931
[EPOCH]: 13, [VAL SCORES]: {'AP': 0.12139302607719016, 'mAP': 0.13094134527197454, 'Miou': 0.16130785509024628, 'Precision': 0.14479166666666668, 'Recall': 0.01088064661968539, 'F1': nan} 



  fscore_dict[imdb_id] = 2 * p * r / (p + r)
  precision = tps / tp_fp.sum()


[EPOCH]: 14, [TRAIN LOSS]: 0.000216, [TRAIN ACCURACY]: 0.931
[EPOCH]: 14, [VAL SCORES]: {'AP': 0.11463750880340393, 'mAP': 0.12189908293381929, 'Miou': 0.11694514279218869, 'Precision': 0.03869047619047619, 'Recall': 0.002379414298018949, 'F1': nan} 



  fscore_dict[imdb_id] = 2 * p * r / (p + r)
  precision = tps / tp_fp.sum()


[EPOCH]: 15, [TRAIN LOSS]: 0.000214, [TRAIN ACCURACY]: 0.932
[EPOCH]: 15, [VAL SCORES]: {'AP': 0.10791564053425057, 'mAP': 0.1164443504467277, 'Miou': 0.14548739134468786, 'Precision': 0.1388888888888889, 'Recall': 0.007158721963210884, 'F1': nan} 



  fscore_dict[imdb_id] = 2 * p * r / (p + r)
  precision = tps / tp_fp.sum()


[EPOCH]: 16, [TRAIN LOSS]: 0.000213, [TRAIN ACCURACY]: 0.932
[EPOCH]: 16, [VAL SCORES]: {'AP': 0.11287234979838418, 'mAP': 0.12047558785008868, 'Miou': 0.15731212989537127, 'Precision': 0.16818910256410258, 'Recall': 0.007650296933564707, 'F1': nan} 



  fscore_dict[imdb_id] = 2 * p * r / (p + r)
  precision = tps / tp_fp.sum()


[EPOCH]: 17, [TRAIN LOSS]: 0.000211, [TRAIN ACCURACY]: 0.933
[EPOCH]: 17, [VAL SCORES]: {'AP': 0.10972964588467975, 'mAP': 0.11763142013392791, 'Miou': 0.12322174137561377, 'Precision': 0.09375, 'Recall': 0.003904468764249886, 'F1': nan} 



  fscore_dict[imdb_id] = 2 * p * r / (p + r)
  precision = tps / tp_fp.sum()


[EPOCH]: 18, [TRAIN LOSS]: 0.000208, [TRAIN ACCURACY]: 0.934
[EPOCH]: 18, [VAL SCORES]: {'AP': 0.1104479038315547, 'mAP': 0.1183369927441567, 'Miou': 0.17031203151451138, 'Precision': 0.11645299145299146, 'Recall': 0.007650296933564707, 'F1': nan} 

[EPOCH]: 19, [TRAIN LOSS]: 0.000207, [TRAIN ACCURACY]: 0.934
[EPOCH]: 19, [VAL SCORES]: {'AP': 0.1125798448104446, 'mAP': 0.12052338936114486, 'Miou': 0.21145055288342338, 'Precision': 0.09680876987462665, 'Recall': 0.015351680892113292, 'F1': nan} 



  fscore_dict[imdb_id] = 2 * p * r / (p + r)


In [42]:
with open('Attention_scores', 'wb') as handle:
    pickle.dump(scores_list, handle,)

In [None]:
index_to_predictions_list

In [43]:
ckpt_path = '/home/jolteon/eluvio_challenge/models/Attention_FC_Final.ckpt'
test_dir = '/home/jolteon/eluvio_challenge/data/test/'
output_dir = '/home/jolteon/eluvio_challenge/Attention_output/'

model = BoundaryDetectorAttention(window_size=12)
model.load_state_dict(torch.load(ckpt_path,))
model.eval()
generate_predictions_dir_NN(model,12,test_dir,output_dir,device)


In [24]:
!python.exe evaluate_sceneseg.py FC_simple_output/

# of IMDB IDs: 8
Scores: {
    "AP": 0.3563740077987584,
    "mAP": 0.37175532395490957,
    "Miou": 0.3098844562627911,
    "Precision": 0.7226293151293152,
    "Recall": 0.09877458091669455,
    "F1": 0.16799157518164576
}


In [44]:
!python.exe evaluate_sceneseg.py Attention_output/

# of IMDB IDs: 8
Scores: {
    "AP": 0.12271408528698208,
    "mAP": 0.13798523838151633,
    "Miou": 0.1693378531162345,
    "Precision": 0.2514590672485409,
    "Recall": 0.009354341066358218,
    "F1": NaN
}


  fscore_dict[imdb_id] = 2 * p * r / (p + r)
