In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install focal-loss

Collecting focal-loss
  Downloading https://files.pythonhosted.org/packages/4a/96/babb0f40b2046a45aa2263773d915a34f02d4fb6bae91a505ce2db8ab0b2/focal_loss-0.0.6-py3-none-any.whl
Installing collected packages: focal-loss
Successfully installed focal-loss-0.0.6


In [3]:
import pickle
import torch
import numpy as np
import sklearn
import os
from sklearn import preprocessing
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # Suppress TF info
import tensorflow as tf
import matplotlib.pyplot as plt
from focal_loss import BinaryFocalLoss
import time
from numpy import save
from keras.models import load_model

**This portion of the code is to extract the 4 Sub-datasets**

In [2]:
directory = '/content/drive/MyDrive/data'
files = os.listdir(directory) 
total_files = len(files) #Calculate total number of files 
windows_in_dataset = np.zeros((total_files), dtype = np.uint16) #since maximum number of shots in dataset is 3096

In [3]:
l = 0
window = 7 # Window needs to be an int greater than 1 and odd!
first = int((window - 1)/2)

In [4]:
for i in range(18,35):
    
    filename = directory + '/' + files[i]
    f = open(filename, 'rb')
    data = pickle.load(f)
    f.close()
        
    feat1 = data['place']
    feat1 = feat1.data.numpy() #convert tensors into numpy arrays for sklearn
    feat1_size = feat1.shape[1]
    
    feat2 = data['cast']
    feat2 = feat2.data.numpy()
    feat2_size = feat2.shape[1]
    
    feat3 = data['action']
    feat3 = feat3.data.numpy()
    feat3_size = feat3.shape[1]
    
    feat4 = data['audio']
    feat4 = feat4.data.numpy()
    feat4_size = feat4.shape[1]
    
    x = np.hstack((feat1, feat2, feat3, feat4))
    y = data['scene_transition_boundary_ground_truth']
            
    scaler = preprocessing.MinMaxScaler().fit(x)
    x_scaled = scaler.transform(x)

    # Pad the start and end with zeros 
    padding = np.zeros((first, x_scaled.shape[1]))
    x_scaled = np.concatenate((padding, x_scaled, padding), axis=0)
    N = x_scaled.shape[0] 
    j = 0
    GT = []

    for p in range(first, (N - first) - 1):
        temp1 = x_scaled[p - first: p + first + 1, :]
        temp1 = np.reshape(temp1, (1, window, temp1.shape[1]))
        
        temp2 = y[p - first].data.numpy()
        temp2 = str(temp2)
        if(j == 0):
            X = temp1
        else:
            X = np.concatenate((X, temp1), axis=0)

        GT.append(temp2)
        j = j + 1    

    print('Iter ID:',i,' ','Array size for X:', X.shape,' ','Grount Truth Size:',len(GT))

    if (l == 0):
        X_data = X
        Y_data = GT
    else:
        X_data = np.concatenate((X_data, X), axis = 0)
        Y_data.extend(GT)
           
    l = l + 1

print('X_data')
print('Final array size:', ' ', X_data.shape, ' ', len(Y_data))

Iter ID: 18   Array size for X: (1783, 7, 3584)   Grount Truth Size: 1783
Iter ID: 19   Array size for X: (1301, 7, 3584)   Grount Truth Size: 1301
Iter ID: 20   Array size for X: (3095, 7, 3584)   Grount Truth Size: 3095
Iter ID: 21   Array size for X: (1999, 7, 3584)   Grount Truth Size: 1999
Iter ID: 22   Array size for X: (1069, 7, 3584)   Grount Truth Size: 1069
Iter ID: 23   Array size for X: (1211, 7, 3584)   Grount Truth Size: 1211
Iter ID: 24   Array size for X: (1536, 7, 3584)   Grount Truth Size: 1536
Iter ID: 25   Array size for X: (1779, 7, 3584)   Grount Truth Size: 1779
Iter ID: 26   Array size for X: (2033, 7, 3584)   Grount Truth Size: 2033
Iter ID: 27   Array size for X: (759, 7, 3584)   Grount Truth Size: 759
Iter ID: 28   Array size for X: (1150, 7, 3584)   Grount Truth Size: 1150
Iter ID: 29   Array size for X: (1396, 7, 3584)   Grount Truth Size: 1396
Iter ID: 30   Array size for X: (1176, 7, 3584)   Grount Truth Size: 1176
Iter ID: 31   Array size for X: (1062, 7

In [5]:
M = len(Y_data)
#print('Y_gt')
Y_gt = np.zeros((M), dtype = np.uint8)
for i in range(M):
    if(Y_data[i] == 'True'):
        Y_gt[i] = 1
    elif(Y_data[i] == 'False'):
        Y_gt[i] = 0

# save array to file for easy load later
print('Saving array to file \n')
out_filename = '/content/drive/MyDrive/eluvio/datasets/dataset18-34.npz'
print(out_filename)
np.savez_compressed(out_filename, a = X_data, b = Y_gt) 

Saving array to file 

/content/drive/MyDrive/eluvio/datasets/dataset18-34.npz


**This cell is to enable the GPU**

In [7]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not Found')
else:
  print(gpu_info)

from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('Not using a high-RAM runtime!')
else:
  print('You are using a high-RAM runtime!')

Not Found
Your runtime has 13.7 gigabytes of available RAM

Not using a high-RAM runtime!


In [3]:
window = 7 # Window needs to be an int greater than 1 and odd!
first = int((window - 1)/2)
array_dims = np.zeros((4), dtype = np.int32)
array_dims[0] = 2048
array_dims[1] = 512 #feat1_size + feat2_size
array_dims[2] = 512 #feat1_size + feat2_size + feat3_size
array_dims[3] = 512 #feat1_size + feat2_size + feat3_size + feat4_size

**The data is loaded here from the sub-dataset** 

In [6]:
filename1 = '/content/drive/MyDrive/eluvio/datasets/dataset0-17.npz'
loaded1 = np.load(filename1)
X1 = loaded1['a']
Y1 = loaded1['b']
print(filename1, X1.shape, Y1.shape)

/content/drive/MyDrive/eluvio/datasets/dataset0-17.npz (27452, 7, 3584) (27452,)


**The model is created here**

In [7]:
inputs = tf.keras.layers.Input(shape = (window, 3584)) #X_data.shape[0], X_data.shape[1]
x1, x2, x3, x4 = tf.split(inputs, array_dims, axis = 2) # split inputs into given features for two consecutive shots
print(x1.shape, x2.shape, x3.shape, x4.shape)

(None, 7, 2048) (None, 7, 512) (None, 7, 512) (None, 7, 512)


In [8]:
conv1 = tf.keras.Sequential()
conv1.add(tf.keras.layers.Conv1D(filters = 24, kernel_size = 5, strides = 1, padding='same', activation='linear', 
kernel_initializer='lecun_normal', bias_initializer='lecun_normal', kernel_regularizer='l1', bias_regularizer='l1')) #1
conv1.add(tf.keras.layers.BatchNormalization(axis=2))
conv1.add(tf.keras.layers.ReLU())
conv1.add(tf.keras.layers.GlobalAveragePooling1D()) #24

conv2 = tf.keras.Sequential()
conv2.add(tf.keras.layers.Conv1D(filters = 8, kernel_size = 5, strides = 1, padding='same', activation='linear', 
kernel_initializer='lecun_normal', bias_initializer='lecun_normal', kernel_regularizer='l1', bias_regularizer='l1')) #1
conv2.add(tf.keras.layers.BatchNormalization(axis=2))
conv2.add(tf.keras.layers.ReLU())
conv2.add(tf.keras.layers.GlobalAveragePooling1D()) #8

conv3 = tf.keras.Sequential()
conv3.add(tf.keras.layers.Conv1D(filters = 8, kernel_size = 5, strides = 1, padding='same', activation='linear', 
kernel_initializer='lecun_normal', bias_initializer='lecun_normal', kernel_regularizer='l1', bias_regularizer='l1')) #1
conv3.add(tf.keras.layers.BatchNormalization(axis=2))
conv3.add(tf.keras.layers.ReLU())
conv3.add(tf.keras.layers.GlobalAveragePooling1D()) #8

conv4 = tf.keras.Sequential()
conv4.add(tf.keras.layers.Conv1D(filters = 8, kernel_size = 5, strides = 1, padding='same', activation='linear', 
kernel_initializer='lecun_normal', bias_initializer='lecun_normal', kernel_regularizer='l1', bias_regularizer='l1')) #1
conv4.add(tf.keras.layers.BatchNormalization(axis=2))
conv4.add(tf.keras.layers.ReLU())
conv4.add(tf.keras.layers.GlobalAveragePooling1D()) #8

In [9]:
encode1 = conv1(x1)
encode2 = conv2(x2)
encode3 = conv3(x3)
encode4 = conv4(x4)

In [10]:
shot1 = tf.keras.layers.Concatenate(axis = 1)([encode1, encode2, encode3, encode4])
print(shot1.shape)

(None, 48)


In [11]:
shot_d1 = tf.keras.layers.Dense(50, activation='linear', kernel_regularizer='l1', bias_regularizer='l1')(shot1)
shot_r1 = tf.keras.layers.Dropout(0.4)(shot_d1)
shot_b1 = tf.keras.layers.BatchNormalization(axis = 1)(shot_d1)

In [12]:
output = tf.keras.layers.Dense(1, activation = 'sigmoid')(shot_b1)
model = tf.keras.Model(inputs = inputs, outputs = output)
opt = tf.keras.optimizers.SGD(learning_rate = 0.001)
model.compile(optimizer = opt, loss = BinaryFocalLoss(pos_weight = 9, gamma = 2.5), metrics=[tf.keras.losses.BinaryCrossentropy()]) #BinaryFocalLoss(pos_weight=7, gamma=4)

In [13]:
model.summary()
#tf.keras.utils.plot_model(model, to_file=image_out, dpi=100)

training_log = '/content/drive/MyDrive/eluvio/' + '/' + 'separable_lasso2' + '.txt'
print(training_log)
csv_logger = tf.keras.callbacks.CSVLogger(training_log, append = True, separator=' ')
metrics = model.fit(X1, Y1, epochs = 75, validation_split= 0.2, verbose=2, batch_size = 32, callbacks=[csv_logger])

model_ID = '/content/drive/MyDrive/eluvio/test_model1.h5'
print(model_ID)
tf.keras.models.save_model(model, model_ID)

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 7, 3584)]    0                                            
__________________________________________________________________________________________________
tf.split (TFOpLambda)           [(None, 7, 2048), (N 0           input_1[0][0]                    
__________________________________________________________________________________________________
sequential (Sequential)         (None, 24)           245880      tf.split[0][0]                   
__________________________________________________________________________________________________
sequential_1 (Sequential)       (None, 8)            20520       tf.split[0][1]                   
______________________________________________________________________________________________

In [4]:
filename2 = '/content/drive/MyDrive/eluvio/datasets/dataset18-34.npz'
loaded2 = np.load(filename2)
X2 = loaded2['a']
Y2 = loaded2['b']
print(filename2, X2.shape, Y2.shape)

/content/drive/MyDrive/eluvio/datasets/dataset18-34.npz (27018, 7, 3584) (27018,)


In [None]:
window = 7 # Window needs to be an int greater than 1 and odd!
first = int((window - 1)/2)
array_dims = np.zeros((4), dtype = np.int32)
array_dims[0] = 2048
array_dims[1] = 512 #feat1_size + feat2_size
array_dims[2] = 512 #feat1_size + feat2_size + feat3_size
array_dims[3] = 512 #feat1_size + feat2_size + feat3_size + feat4_size

In [5]:
model=tf.keras.models.load_model('/content/drive/MyDrive/eluvio/test_model1.h5',compile=False)
model.summary()

opt = tf.keras.optimizers.SGD(learning_rate = 0.001)
model.compile(optimizer = opt, loss = BinaryFocalLoss(pos_weight = 9, gamma = 2.5), metrics=[tf.keras.losses.BinaryCrossentropy()]) #BinaryFocalLoss(pos_weight=7, gamma=4)

training_log = '/content/drive/MyDrive/eluvio/' + '/' + 'separable_lasso2' + '.txt'
print(training_log)
csv_logger = tf.keras.callbacks.CSVLogger(training_log, append = True, separator=' ')

metrics = model.fit(X2, Y2, epochs = 75, validation_split= 0.2, verbose=2, batch_size = 32, callbacks=[csv_logger])
model_ID = '/content/drive/MyDrive/eluvio/test_model12.h5'
print(model_ID)
tf.keras.models.save_model(model,model_ID)

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 7, 3584)]    0                                            
__________________________________________________________________________________________________
tf.split (TFOpLambda)           [(None, 7, 2048), (N 0           input_1[0][0]                    
__________________________________________________________________________________________________
sequential (Sequential)         (None, 24)           245880      tf.split[0][0]                   
__________________________________________________________________________________________________
sequential_1 (Sequential)       (None, 8)            20520       tf.split[0][1]                   
______________________________________________________________________________________________

In [None]:
window = 7 # Window needs to be an int greater than 1 and odd!
first = int((window - 1)/2)
array_dims = np.zeros((4), dtype = np.int32)
array_dims[0] = 2048
array_dims[1] = 512 #feat1_size + feat2_size
array_dims[2] = 512 #feat1_size + feat2_size + feat3_size
array_dims[3] = 512 #feat1_size + feat2_size + feat3_size + feat4_size

In [4]:
filename3 = '/content/drive/MyDrive/eluvio/datasets/dataset35-50.npz'
loaded3 = np.load(filename3)
X3 = loaded3['a']
Y3 = loaded3['b']

In [5]:
model=tf.keras.models.load_model('/content/drive/MyDrive/eluvio/test_model12.h5',compile=False)
model.summary()

opt = tf.keras.optimizers.SGD(learning_rate = 0.001)
model.compile(optimizer = opt, loss = BinaryFocalLoss(pos_weight = 9, gamma = 2.5), metrics=[tf.keras.losses.BinaryCrossentropy()]) #BinaryFocalLoss(pos_weight=7, gamma=4)

training_log = '/content/drive/MyDrive/eluvio/' + '/' + 'separable_lasso2' + '.txt'
print(training_log)
csv_logger = tf.keras.callbacks.CSVLogger(training_log, append = True, separator=' ')

metrics = model.fit(X3, Y3, epochs = 75, validation_split= 0.2, verbose=2, batch_size = 25, callbacks=[csv_logger])
model_ID = '/content/drive/MyDrive/eluvio/test_model3.h5'
print(model_ID)
tf.keras.models.save_model(model,model_ID)

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 7, 3584)]    0                                            
__________________________________________________________________________________________________
tf.split (TFOpLambda)           [(None, 7, 2048), (N 0           input_1[0][0]                    
__________________________________________________________________________________________________
sequential (Sequential)         (None, 24)           245880      tf.split[0][0]                   
__________________________________________________________________________________________________
sequential_1 (Sequential)       (None, 8)            20520       tf.split[0][1]                   
______________________________________________________________________________________________

**The metrics are calcuated here**

In [3]:
directory = '/content/drive/MyDrive/data'
files = os.listdir(directory) 
total_files = len(files) #Calculate total number of files 

In [4]:
l = 0
window = 7 # Window needs to be an int greater than 1 and odd!
first = int((window - 1)/2)

In [5]:
model = tf.keras.models.load_model('/content/drive/MyDrive/eluvio/test_model3.h5', compile = False)

In [6]:
pr_dict = dict()
for i in range(51,64):
    
    filename = directory + '/' + files[i]
    f = open(filename, 'rb')
    data = pickle.load(f)
    f.close()
        
    feat1 = data['place']
    feat1 = feat1.data.numpy() #convert tensors into numpy arrays for sklearn
    feat1_size = feat1.shape[1]
    
    feat2 = data['cast']
    feat2 = feat2.data.numpy()
    feat2_size = feat2.shape[1]
    
    feat3 = data['action']
    feat3 = feat3.data.numpy()
    feat3_size = feat3.shape[1]
    
    feat4 = data['audio']
    feat4 = feat4.data.numpy()
    feat4_size = feat4.shape[1]
    
    x = np.hstack((feat1, feat2, feat3, feat4))
    #y = data['scene_transition_boundary_ground_truth']
    #y_new = y.data.numpy()
            
    scaler = preprocessing.MinMaxScaler().fit(x)
    x_scaled = scaler.transform(x)
 
    # Pad the start and end with zeros 
    padding = np.zeros((first, x_scaled.shape[1]))
    x_scaled = np.concatenate((padding, x_scaled, padding), axis=0)
                
    #Fold the data set to obtain features from adjoining shots
    N = x_scaled.shape[0] #changed from x_scaled
    j = 0
    #GT = []

    for p in range(first, (N - first) - 1):
        #window_range = np.arange(start = p - first, stop = p + first + 1)
        temp1 = x_scaled[p - first: p + first + 1, :]
        #print(p - first, p + first + 1, p - first, temp1.shape[0], temp1.shape[1])
        temp1 = np.reshape(temp1, (1, window, temp1.shape[1]))
        
        #temp2 = y[p - first].data.numpy()
        #print(p -first)
        #temp2 = str(temp2)
        if(j == 0):
            X = temp1
        else:
            X = np.concatenate((X, temp1), axis=0)

        #GT.append(temp2)
        j = j + 1    

    predictions=model.predict(X)
    pr_dict[data["imdb_id"]] = predictions




In [7]:
gt_dict = dict()
#pr_dict = dict()
shot_to_end_frame_dict = dict()
for i in range(51,64):
    
    filename = directory + '/' + files[i]
    x = pickle.load(open(filename, "rb"))

    gt_dict[x["imdb_id"]] = x["scene_transition_boundary_ground_truth"]
    #pr_dict[x["imdb_id"]] = x["scene_transition_boundary_prediction"]
    shot_to_end_frame_dict[x["imdb_id"]] = x["shot_end_frame"]



In [9]:
import numpy as np
from sklearn.metrics import average_precision_score


def calc_ap(gt_dict, pr_dict):
    """Average Precision (AP) for scene transitions.
    Args:
        gt_dict: Scene transition ground-truths.
        pr_dict: Scene transition predictions.
    Returns:
        AP, mean AP, and a dict of AP for each movie.
    """
    assert gt_dict.keys() == pr_dict.keys()

    AP_dict = dict()
    gt = list()
    pr = list()
    for imdb_id in gt_dict.keys():
        AP_dict[imdb_id] = average_precision_score(gt_dict[imdb_id], pr_dict[imdb_id])
        gt.append(gt_dict[imdb_id])
        pr.append(pr_dict[imdb_id])

    mAP = sum(AP_dict.values()) / len(AP_dict)

    gt = np.concatenate(gt)
    pr = np.concatenate(pr)
    AP = average_precision_score(gt, pr)

    return AP, mAP, AP_dict


def calc_miou(gt_dict, pr_dict, shot_to_end_frame_dict, threshold=0.5):
    """Maximum IoU (Miou) for scene segmentation.
    Miou measures how well the predicted scenes and ground-truth scenes overlap. The descriptions can be found in
    https://arxiv.org/pdf/1510.08893.pdf. Note the length of intersection or union is measured by the number of frames.
    Args:
        gt_dict: Scene transition ground-truths.
        pr_dict: Scene transition predictions.
        shot_to_end_frame_dict: End frame index for each shot.
        threshold: A threshold to filter the predictions.
    Returns:
        Mean MIoU, and a dict of MIoU for each movie.
    """

    def iou(x, y):
        s0, e0 = x
        s1, e1 = y
        smin, smax = (s0, s1) if s1 > s0 else (s1, s0)
        emin, emax = (e0, e1) if e1 > e0 else (e1, e0)
        return (emin - smax + 1) / (emax - smin + 1)

    def scene_frame_ranges(scene_transitions, shot_to_end_frame):
        end_shots = np.where(scene_transitions)[0]
        scenes = np.zeros((len(end_shots) + 1, 2), dtype=end_shots.dtype)
        scenes[:-1, 1] = shot_to_end_frame[end_shots]
        scenes[-1, 1] = shot_to_end_frame[len(scene_transitions)]
        scenes[1:, 0] = scenes[:-1, 1] + 1
        return scenes

    def miou(gt_array, pr_array, shot_to_end_frame):
        gt_scenes = scene_frame_ranges(gt_array, shot_to_end_frame)
        pr_scenes = scene_frame_ranges(pr_array >= threshold, shot_to_end_frame)
        assert gt_scenes[-1, -1] == pr_scenes[-1, -1]

        m = gt_scenes.shape[0]
        n = pr_scenes.shape[0]

        # IoU for (gt_scene, pr_scene) pairs
        iou_table = np.zeros((m, n))

        j = 0
        for i in range(m):
            # j start prior to i end
            while pr_scenes[j, 0] <= gt_scenes[i, 1]:
                iou_table[i, j] = iou(gt_scenes[i], pr_scenes[j])
                if j < n - 1:
                    j += 1
                else:
                    break
            # j end prior to (i + 1) start
            if pr_scenes[j, 1] < gt_scenes[i, 1] + 1:
                break
            # j start later than (i + 1) start
            if pr_scenes[j, 0] > gt_scenes[i, 1] + 1:
                j -= 1
        assert np.isnan(iou_table).sum() == 0
        assert iou_table.min() >= 0

        # Miou
        return (iou_table.max(axis=0).mean() + iou_table.max(axis=1).mean()) / 2

    assert gt_dict.keys() == pr_dict.keys()

    miou_dict = dict()

    for imdb_id in gt_dict.keys():
        miou_dict[imdb_id] = miou(gt_dict[imdb_id], pr_dict[imdb_id], shot_to_end_frame_dict[imdb_id])
    mean_miou = sum(miou_dict.values()) / len(miou_dict)

    return mean_miou, miou_dict


def calc_precision_recall(gt_dict, pr_dict, threshold=0.5):
    """Precision, Recall and F1 for scene transitions at a given threshold.
    Args:
        gt_dict: Scene transition ground-truths.
        pr_dict: Scene transition predictions.
        threshold: A threshold to filter the predictions.
    Returns:
        Mean Precision, Recall, and F1, per IMDB ID Precisions, Recalls, and F1 scores.
    """

    def precision_recall(gt_array, pr_array):
        tp_fn = gt_array == 1
        tp_fp = pr_array >= threshold

        tps = (tp_fn & tp_fp).sum()

        precision = tps / tp_fp.sum()
        recall = tps / tp_fn.sum()

        return np.nan_to_num(precision), np.nan_to_num(recall)

    assert gt_dict.keys() == pr_dict.keys()

    precision_dict = dict()
    recall_dict = dict()
    fscore_dict = dict()

    for imdb_id in gt_dict.keys():
        p, r = precision_recall(gt_dict[imdb_id], pr_dict[imdb_id])
        precision_dict[imdb_id] = p
        recall_dict[imdb_id] = r
        fscore_dict[imdb_id] = 2 * p * r / (p + r)

    n = len(gt_dict)
    mean_precision = sum(precision_dict.values()) / n
    mean_recall = sum(recall_dict.values()) / n
    mean_fscore = sum(fscore_dict.values()) / n

    return mean_precision, mean_recall, mean_fscore, precision_dict, recall_dict, fscore_dict

In [10]:
import json
scores = dict()

scores["AP"], scores["mAP"], _ = calc_ap(gt_dict, pr_dict)
scores["Miou"], _ = calc_miou(gt_dict, pr_dict, shot_to_end_frame_dict)
scores["Precision"], scores["Recall"], scores["F1"], *_ = calc_precision_recall(gt_dict, pr_dict)

print("Scores:", json.dumps(scores, indent=4))

Scores: {
    "AP": 0.1435839777948291,
    "mAP": 0.1519404850531079,
    "Miou": 0.3321781278961218,
    "Precision": 152.92307692307693,
    "Recall": 694.4615384615385,
    "F1": 246.37457483717378
}
