# Model

Each function is one of the classifiers (trained using sklearn)

In [101]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import csv
from sklearn.model_selection import train_test_split
pd.set_option('display.max_columns', None)

## General

In [78]:
def clean_dataset(df):
    assert isinstance(df, pd.DataFrame), "df needs to be a pd.DataFrame"
    df.dropna(inplace=True)
    indices_to_keep = ~df.isin([np.nan, np.inf, -np.inf]).any(1)
    return df[indices_to_keep].astype(np.float64)

In [115]:
def load_model(data, effectiveness_index):

    data = clean_dataset(data)
    y = data[effectiveness_index]
    y = [int(x) for x in y]
    data.drop(effectiveness_index, inplace = True, axis = 1)
    X = data
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)
    
    return X_train, y_train, X_test, y_test

In [99]:
def analyze_model(X_test, y_test, y_pred):
    
    #importing confusion matrix
    from sklearn.metrics import confusion_matrix
    confusion = confusion_matrix(y_test, y_pred)
    print('Confusion Matrix\n')
    print(confusion)

    #importing accuracy_score, precision_score, recall_score, f1_score
    from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
    print('\nAccuracy: {:.2f}\n'.format(accuracy_score(y_test, y_pred)))

    print('Micro Precision: {:.2f}'.format(precision_score(y_test, y_pred, average='micro')))
    print('Micro Recall: {:.2f}'.format(recall_score(y_test, y_pred, average='micro')))
    print('Micro F1-score: {:.2f}\n'.format(f1_score(y_test, y_pred, average='micro')))

    print('Macro Precision: {:.2f}'.format(precision_score(y_test, y_pred, average='macro')))
    print('Macro Recall: {:.2f}'.format(recall_score(y_test, y_pred, average='macro')))
    print('Macro F1-score: {:.2f}\n'.format(f1_score(y_test, y_pred, average='macro')))

    print('Weighted Precision: {:.2f}'.format(precision_score(y_test, y_pred, average='weighted')))
    print('Weighted Recall: {:.2f}'.format(recall_score(y_test, y_pred, average='weighted')))
    print('Weighted F1-score: {:.2f}'.format(f1_score(y_test, y_pred, average='weighted')))

    from sklearn.metrics import classification_report
    print('\nClassification Report\n')
    print(classification_report(y_test, y_pred, target_names=['Class 1', 'Class 2', 'Class 3', 'Class 4', 'Class 5']))

## Logistic Regression

In [121]:
from sklearn.linear_model import LogisticRegression

def logistic_regression(path_name, effectiveness_index):

    X_train, y_train, X_test, y_test = load_model(path_name, effectiveness_index)
    
    logisticRegr = LogisticRegression()
    logisticRegr.fit(X_train, y_train)
    y_pred = logisticRegr.predict(X_test)
#     print("Accuracy Score: " + logisticRegr.score(X_test, y_test))
    
    analyze_model(X_test, y_test, y_pred)
    
    return y_pred

## Naive Bayes

In [122]:
from sklearn.naive_bayes import GaussianNB

def naive_bayes(path_name, effectiveness_index):
    
    X_train, y_train, X_test, y_test = load_model(path_name, effectiveness_index)
    
    gnb = GaussianNB()
    gnb.fit(X_train, y_train)
    y_pred = gnb.predict(X_test)
    
    analyze_model(X_test, y_test, y_pred)
    
    return y_pred

## SVM

In [123]:
from sklearn import svm

def support_vector(path_name, effectiveness_index):
    
    X_train, y_train, X_test, y_test = load_model(path_name, effectiveness_index)
    
    clf = svm.SVC(decision_function_shape='ovr', C=15)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    
    analyze_model(X_test, y_test, y_pred)
    
    return y_pred

## Decision Tree

In [124]:
from sklearn.tree import DecisionTreeClassifier

def decision_tree(path_name, effectiveness_index):
    
    X_train, y_train, X_test, y_test = load_model(path_name, effectiveness_index)

    clf = DecisionTreeClassifier()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    
    analyze_model(X_test, y_test, y_pred)
    
    return y_pred

## Hybridization

In [89]:
def hybridize(*models):
    
    for model in models:
        
        sum += model
    
    y_pred = []
    
    for pred in sum:
        
        y_pred.append(int(sum > 2))
        
    analyze_model(X_test, y_test, y_pred)
    

# Implementation

In [112]:
#Uploading data
data = pd.read_csv("/Users/harrisonkane/Downloads/annotations_videos/final data version 3.csv")
data.drop(labels = [i for i in range(1835, len(data))], inplace = True, axis = 0)
data.drop(labels = ["Average_Hue.1", "Median_Hue.1"], inplace = True, axis = 1)
data.drop("Videos", inplace = False, axis = 1)
new_data = data.drop(labels = ["Median_Hue", "Average_Hue"], inplace = False, axis = 1)

new_data.head()

Unnamed: 0,Videos,Shot_Boundary,Duration,Entropy,Average_Intensities,Average_Intensities_30,Average_Intensities_60,Optical_Flow,Funny,Average_Memorability,Topics,Exciting,Sentiments,Entropy.1,Optical_Flow.1,Exciting.1,Funny.1,Average_Memorability.1,Language,Topics.1,Sentiments.1,BPM,A,B,C,D,E,F,G,Major/Minor,Sharp/Flat,Effectiveness
0,__C7sd_UDU0,19.0,48.0,0.968461,0.358354,0.417023,0.474382,804401.9,0.4,0.725416,36.0,0.4,11.0,0.968461,804401.9,0.4,0.4,0.725416,1.0,36.0,11.0,93.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,4.0
1,_3sLwG1ZSBA,35.0,38.0,0.398459,0.267378,0.357319,0.327957,1787684.0,1.0,0.724435,27.0,1.0,6.0,0.398459,1787684.0,1.0,1.0,0.724435,1.0,27.0,6.0,136.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,2.0
2,_6MAkLJ79LE,13.0,31.0,0.981152,0.298359,0.271681,0.305467,1278895.0,0.4,0.724163,21.0,0.0,30.0,0.981152,1278895.0,0.0,0.4,0.724163,1.0,21.0,30.0,99.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,3.0
3,_6rj5jisB7g,47.0,60.0,0.754034,0.436912,0.503742,0.498872,1591243.0,0.6,0.725703,9.0,0.8,5.0,0.754034,1591243.0,0.8,0.6,0.725703,1.0,9.0,5.0,137.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,3.0
4,_8enIDEKrzA,11.0,35.0,0.898059,0.412479,0.387862,0.38757,615041.6,0.8,0.725296,28.0,0.4,6.0,0.898059,615041.6,0.4,0.8,0.725296,1.0,28.0,6.0,91.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,3.0


In [125]:
#Logistic Regression

logistic_regression(new_data, "Effectiveness")

Confusion Matrix

[[  0   0   0   0  18]
 [  0   0   0   0  31]
 [  0   0   0   0 102]
 [  0   0   0   0  37]
 [  0   0   0   0 178]]

Accuracy: 0.49

Micro Precision: 0.49
Micro Recall: 0.49
Micro F1-score: 0.49

Macro Precision: 0.10
Macro Recall: 0.20
Macro F1-score: 0.13

Weighted Precision: 0.24
Weighted Recall: 0.49
Weighted F1-score: 0.32

Classification Report

              precision    recall  f1-score   support

     Class 1       0.00      0.00      0.00        18
     Class 2       0.00      0.00      0.00        31
     Class 3       0.00      0.00      0.00       102
     Class 4       0.00      0.00      0.00        37
     Class 5       0.49      1.00      0.65       178

    accuracy                           0.49       366
   macro avg       0.10      0.20      0.13       366
weighted avg       0.24      0.49      0.32       366



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


array([5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,

In [126]:
# Naive-Bayes

naive_bayes(new_data, "Effectiveness")

Confusion Matrix

[[  0   0   6   0  13]
 [  0   0   5   0  18]
 [  0   0  24   0  78]
 [  0   0   9   0  37]
 [  0   0  29   0 147]]

Accuracy: 0.47

Micro Precision: 0.47
Micro Recall: 0.47
Micro F1-score: 0.47

Macro Precision: 0.17
Macro Recall: 0.21
Macro F1-score: 0.18

Weighted Precision: 0.33
Weighted Recall: 0.47
Weighted F1-score: 0.38

Classification Report

              precision    recall  f1-score   support

     Class 1       0.00      0.00      0.00        19
     Class 2       0.00      0.00      0.00        23
     Class 3       0.33      0.24      0.27       102
     Class 4       0.00      0.00      0.00        46
     Class 5       0.50      0.84      0.63       176

    accuracy                           0.47       366
   macro avg       0.17      0.21      0.18       366
weighted avg       0.33      0.47      0.38       366



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


array([5, 5, 5, 5, 5, 5, 5, 5, 3, 5, 5, 5, 5, 5, 3, 5, 5, 3, 5, 3, 5, 3,
       5, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 5, 3, 5, 3, 5,
       5, 5, 5, 5, 5, 5, 5, 5, 3, 5, 5, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
       5, 3, 5, 5, 3, 3, 3, 5, 5, 3, 3, 5, 5, 5, 5, 5, 3, 5, 5, 5, 5, 5,
       5, 5, 3, 5, 3, 5, 3, 5, 3, 5, 5, 3, 5, 5, 5, 3, 5, 5, 5, 5, 5, 5,
       5, 5, 5, 3, 5, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 5, 5, 5, 5,
       5, 3, 5, 5, 5, 5, 3, 5, 3, 5, 5, 5, 5, 5, 5, 3, 5, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 5, 3, 5, 5, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 5,
       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 5, 5, 3, 5, 5, 5, 5, 5, 5, 5, 3, 5, 5, 5, 5, 5, 5, 5,
       5, 3, 5, 5, 5, 5, 5, 5, 5, 5, 3, 5, 5, 3, 5, 3, 5, 5, 3, 3, 5, 5,
       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 5, 5, 5, 5, 3, 5, 3, 5, 5, 5,
       5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 5, 5, 5, 5, 5, 5, 5, 3, 5, 5, 3, 5,
       5, 3, 5, 3, 3, 5, 5, 3, 3, 3, 5, 5, 3, 5, 5,

In [127]:
# SVM

support_vector(new_data, "Effectiveness")

Confusion Matrix

[[  0   0   0   0  18]
 [  0   0   0   0  29]
 [  0   0   0   0  99]
 [  0   0   0   0  39]
 [  0   0   0   0 181]]

Accuracy: 0.49

Micro Precision: 0.49
Micro Recall: 0.49
Micro F1-score: 0.49

Macro Precision: 0.10
Macro Recall: 0.20
Macro F1-score: 0.13

Weighted Precision: 0.24
Weighted Recall: 0.49
Weighted F1-score: 0.33

Classification Report

              precision    recall  f1-score   support

     Class 1       0.00      0.00      0.00        18
     Class 2       0.00      0.00      0.00        29
     Class 3       0.00      0.00      0.00        99
     Class 4       0.00      0.00      0.00        39
     Class 5       0.49      1.00      0.66       181

    accuracy                           0.49       366
   macro avg       0.10      0.20      0.13       366
weighted avg       0.24      0.49      0.33       366



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


array([5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,

In [128]:
# Decision Tree

decision_tree(new_data, "Effectiveness")

Confusion Matrix

[[ 1  4 13  1  5]
 [ 1  1 12  0  8]
 [ 1  9 34 11 41]
 [ 3  4  9 12 15]
 [18 13 40 14 96]]

Accuracy: 0.39

Micro Precision: 0.39
Micro Recall: 0.39
Micro F1-score: 0.39

Macro Precision: 0.26
Macro Recall: 0.25
Macro F1-score: 0.25

Weighted Precision: 0.41
Weighted Recall: 0.39
Weighted F1-score: 0.40

Classification Report

              precision    recall  f1-score   support

     Class 1       0.04      0.04      0.04        24
     Class 2       0.03      0.05      0.04        22
     Class 3       0.31      0.35      0.33        96
     Class 4       0.32      0.28      0.30        43
     Class 5       0.58      0.53      0.55       181

    accuracy                           0.39       366
   macro avg       0.26      0.25      0.25       366
weighted avg       0.41      0.39      0.40       366



array([3, 5, 3, 5, 3, 3, 5, 3, 4, 5, 5, 4, 5, 3, 3, 4, 4, 3, 2, 5, 2, 4,
       4, 5, 3, 2, 5, 5, 5, 3, 4, 5, 2, 3, 5, 1, 1, 5, 4, 4, 3, 3, 5, 3,
       4, 3, 5, 3, 3, 1, 5, 2, 4, 3, 3, 2, 5, 2, 5, 3, 5, 5, 3, 3, 3, 5,
       3, 5, 3, 3, 4, 5, 5, 3, 3, 3, 3, 5, 5, 5, 5, 3, 5, 5, 5, 4, 4, 5,
       4, 3, 2, 4, 1, 5, 3, 5, 3, 5, 3, 3, 5, 5, 5, 4, 5, 4, 5, 3, 3, 5,
       2, 3, 5, 5, 5, 1, 5, 3, 5, 1, 5, 5, 3, 5, 5, 4, 5, 2, 5, 4, 1, 5,
       5, 5, 1, 5, 5, 3, 3, 3, 2, 1, 5, 5, 5, 3, 3, 2, 3, 3, 3, 4, 5, 1,
       5, 5, 5, 2, 3, 4, 5, 5, 5, 3, 3, 1, 5, 1, 5, 4, 3, 3, 5, 5, 3, 5,
       2, 3, 3, 3, 5, 5, 5, 3, 3, 5, 5, 5, 3, 1, 5, 3, 5, 5, 4, 5, 5, 5,
       3, 3, 3, 5, 3, 5, 3, 5, 4, 2, 5, 5, 5, 3, 5, 1, 4, 2, 5, 1, 5, 5,
       5, 3, 1, 4, 2, 1, 3, 5, 4, 5, 3, 1, 5, 1, 3, 3, 5, 3, 5, 5, 5, 5,
       4, 3, 5, 5, 5, 3, 5, 4, 3, 5, 5, 2, 2, 5, 5, 3, 5, 3, 3, 5, 3, 5,
       4, 5, 5, 5, 2, 2, 5, 3, 5, 4, 4, 1, 5, 2, 1, 5, 3, 3, 5, 5, 5, 2,
       5, 5, 5, 4, 5, 3, 5, 3, 5, 5, 5, 5, 3, 3, 1,