In [1]:
#import libraries
import pandas as pd
from ast import literal_eval
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import GridSearchCV
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import f1_score
from sklearn.metrics import make_scorer
from sklearn.metrics import classification_report, recall_score, precision_score
import numpy as np
from sklearn.metrics import average_precision_score
from sklearn.metrics import precision_recall_curve
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import SGDClassifier

In [2]:
def evaluate_model(model, predictors, response, cv=False, params=None):
    """
    evaluate_model()
    
    -splits the predictors & response variables into train and test sets. 
    -creates a dictionary of model outcomes that are of interest
    -if specified, this function will use cross-validation to determine the optimal parameters for a given model
    
    inputs:
        -model: a model object to be fitted
        -predictors: an array, series, or dataframe of predictor variable(s)
        -response: an array or series of the response variable
        -cv: whether or not to cross-validate the model's parameters (default=False)
        -params: if cv=True, params are required to indicate what parameters to optimize in the given model (default=None)
        
    outputs:
        -a results dictionary containing the following:
            -a fitted model object
    
    """
    results = {}
    train_x, test_x = train_test_split(predictors, test_size=0.2, random_state=9001)
    train_y, test_y = train_test_split(response, test_size=0.2, random_state=9001)
    
    if cv:
        model = GridSearchCV(model, params, scoring=make_scorer(f1_score, average='micro'))
    
    classif = OneVsRestClassifier(model)
    classif.fit(train_x, train_y)
    
    train_yhat = classif.predict(train_x)
    test_yhat = classif.predict(test_x)
    
    results['fitted_model'] = classif
    
    results['train_yhat'] = train_yhat
    results['test_yhat'] = test_yhat
    
    results['train_recall_score'] = recall_score(train_y, train_yhat, average='weighted')
    results['test_recall_score'] = recall_score(test_y, test_yhat, average='weighted')
    
    results['train_precision_score'] = precision_score(train_y, train_yhat,average='weighted')
    results['test_precision_score'] = precision_score(test_y, test_yhat,average='weighted')
    
    results['train_classification_report'] = classification_report(train_y, train_yhat,target_names=target_names)
    results['test_classification_report'] = classification_report(test_y, test_yhat,target_names=target_names)
    
    return results

In [3]:
import json
id_to_genre = json.load(open('dataset/id_to_genre.json'))

id_to_genre = {int(key):value for key, value in id_to_genre.items()} #convert string keys to int keys

In [4]:
target_names = json.load(open('dataset/target_names.json'))['tmdb']

target_names

['Adventure',
 'Fantasy',
 'Animation',
 'Drama',
 'Horror',
 'Action',
 'Comedy',
 'History',
 'Western',
 'Thriller',
 'Crime',
 'Science Fiction',
 'Mystery',
 'Music',
 'Romance',
 'Family',
 'War',
 'TV Movie']

In [5]:
tmdb_bow = np.load('dataset/tmdb_bow.npy')
imdb_bow = np.load('dataset/imdb_bow.npy')
combined_bow = np.load('dataset/combined_bow.npy')

In [7]:
tmdb_w2v_mean = np.load('dataset/tmdb_w2v_mean.npy')
imdb_w2v_mean = np.load('dataset/imdb_w2v_mean.npy')
combined_w2v_mean = np.load('dataset/combined_w2v_mean.npy')


tmdb_w2v_mean = np.apply_along_axis(lambda x: list(x), 0, tmdb_w2v_mean)
imdb_w2v_mean = np.apply_along_axis(lambda x: list(x), 0, imdb_w2v_mean)
combined_w2v_mean = np.apply_along_axis(lambda x: list(x), 0, combined_w2v_mean)


In [8]:
from sklearn.preprocessing import MinMaxScaler
scale = MinMaxScaler()


#word2vec scaling
scale.fit(tmdb_w2v_mean)
tmdb_w2v_mean = scale.transform(tmdb_w2v_mean)

scale.fit(imdb_w2v_mean)
imdb_w2v_mean = scale.transform(imdb_w2v_mean)

scale.fit(combined_w2v_mean)
combined_w2v_mean = scale.transform(combined_w2v_mean)


In [10]:
binary_tmdb = np.load('dataset/binary_tmdb.npy')

In [11]:
modelDict = {'Naive-Bayes':{'model':MultinomialNB(),
                           'params':{'alpha':[0.01,0.1,1.0]}},
            
            'SGD':{'model':SGDClassifier(loss='hinge',penalty='l2',max_iter=5,random_state=9001),
                   'params':{'alpha':[0.01,0.1,1.0]}},
            
            'SVC':{'model':SVC(class_weight='balanced', kernel='linear'),
                   'params':{'C':[0.01,0.1,1.0]}}
           }

predictorDict = {
                 'tmdb_bow':tmdb_bow,
                 'imdb_bow':imdb_bow,
                 'combined_bow':combined_bow,
                 'tmdb_w2v_mean':tmdb_w2v_mean,
                 'imdb_w2v_mean':imdb_w2v_mean,
                 'combined_w2v_mean':combined_w2v_mean
                }

In [13]:
"""
resultsDict = {}
import warnings
with warnings.catch_warnings(): #temporarily ignore the warnings described above
    warnings.simplefilter("ignore")
    for model in modelDict:
        for predictor in predictorDict:
            resultsDict['{0}-{1}'.format(model,predictor)] = evaluate_model(model = modelDict[model]['model'],
                                                                            predictors = predictorDict[predictor], 
                                                                            response = binary_tmdb,
                                                                            cv=True,
                                                                            params=modelDict[model]['params'])

"""



In [14]:
scores = ['train_recall_score','test_recall_score',
          'train_precision_score','test_precision_score']

results_df = pd.DataFrame(resultsDict)

In [15]:
results_df = results_df.loc[results_df.index.isin(scores)]

In [16]:
results_df = results_df.transpose()
results_df

Unnamed: 0,train_recall_score,test_recall_score,train_precision_score,test_precision_score
Naive-Bayes-tmdb_bow,0.687407,0.291339,0.896292,0.583739
Naive-Bayes-imdb_bow,0.815309,0.348425,0.941768,0.629775
Naive-Bayes-combined_bow,0.926914,0.387795,0.981968,0.698599
Naive-Bayes-tmdb_w2v_mean,0.265185,0.261811,0.494266,0.316826
Naive-Bayes-imdb_w2v_mean,0.262716,0.253937,0.605336,0.178442
Naive-Bayes-combined_w2v_mean,0.274074,0.267717,0.583491,0.456782
SGD-tmdb_bow,0.254321,0.255906,0.163924,0.166339
SGD-imdb_bow,0.254321,0.255906,0.163924,0.166339
SGD-combined_bow,0.254321,0.255906,0.163924,0.166339
SGD-tmdb_w2v_mean,0.484444,0.437008,0.598578,0.492662


In [17]:
duplicate_scores = pd.concat(group for _, group in results_df.groupby((scores)) if len(group) > 1)
duplicate_scores

Unnamed: 0,train_recall_score,test_recall_score,train_precision_score,test_precision_score
SGD-tmdb_bow,0.254321,0.255906,0.163924,0.166339
SGD-imdb_bow,0.254321,0.255906,0.163924,0.166339
SGD-combined_bow,0.254321,0.255906,0.163924,0.166339


In [18]:
duplicate_list = list(duplicate_scores.index.values)

In [19]:
for model in range(len(duplicate_list)):
    if resultsDict[duplicate_list[0]]['train_classification_report'] != resultsDict[duplicate_list[model]]['train_classification_report']:
        print('Train classification reports do not match between model 0 and model {}'.format(model))
    
    if resultsDict[duplicate_list[0]]['test_classification_report'] != resultsDict[duplicate_list[model]]['test_classification_report']:
        print('Test classification reports do not match between model 0 and model {}'.format(model))

In [20]:
print(resultsDict[duplicate_list[0]]['train_classification_report'])
print(resultsDict[duplicate_list[0]]['test_classification_report'])

                 precision    recall  f1-score   support

      Adventure       0.00      0.00      0.00       137
        Fantasy       0.00      0.00      0.00        76
      Animation       0.00      0.00      0.00        82
          Drama       0.64      1.00      0.78       515
         Horror       0.00      0.00      0.00        41
         Action       0.00      0.00      0.00       124
         Comedy       0.00      0.00      0.00       189
        History       0.00      0.00      0.00        54
        Western       0.00      0.00      0.00        25
       Thriller       0.00      0.00      0.00       184
          Crime       0.00      0.00      0.00       143
Science Fiction       0.00      0.00      0.00        81
        Mystery       0.00      0.00      0.00        77
          Music       0.00      0.00      0.00        34
        Romance       0.00      0.00      0.00       124
         Family       0.00      0.00      0.00        92
            War       0.00    

In [21]:
best_recall = results_df['test_recall_score'].astype(float).idxmax()
best_precision = results_df['test_precision_score'].astype(float).idxmax()

In [22]:
results_df.loc[results_df.index.isin([best_recall, best_precision])]

Unnamed: 0,train_recall_score,test_recall_score,train_precision_score,test_precision_score
Naive-Bayes-combined_bow,0.926914,0.387795,0.981968,0.698599
SGD-combined_w2v_mean,0.523951,0.492126,0.638136,0.595978


In [23]:
print('Results: ', best_precision)
print(resultsDict[best_precision]['train_classification_report'])
print(resultsDict[best_precision]['test_classification_report'])

Results:  Naive-Bayes-combined_bow
                 precision    recall  f1-score   support

      Adventure       0.97      0.88      0.92       137
        Fantasy       1.00      0.97      0.99        76
      Animation       1.00      0.93      0.96        82
          Drama       0.96      0.99      0.98       515
         Horror       1.00      0.83      0.91        41
         Action       0.98      0.88      0.93       124
         Comedy       0.99      0.89      0.94       189
        History       1.00      0.91      0.95        54
        Western       1.00      1.00      1.00        25
       Thriller       0.98      0.85      0.91       184
          Crime       1.00      0.90      0.95       143
Science Fiction       0.99      0.93      0.96        81
        Mystery       0.99      0.99      0.99        77
          Music       1.00      0.82      0.90        34
        Romance       1.00      0.95      0.98       124
         Family       1.00      0.90      0.95      

In [24]:
print('Results: ', best_recall)
print(resultsDict[best_recall]['train_classification_report'])
print(resultsDict[best_recall]['test_classification_report'])

Results:  SGD-combined_w2v_mean
                 precision    recall  f1-score   support

      Adventure       0.94      0.12      0.21       137
        Fantasy       0.87      0.17      0.29        76
      Animation       0.91      0.52      0.67        82
          Drama       0.68      0.99      0.81       515
         Horror       0.00      0.00      0.00        41
         Action       0.86      0.20      0.33       124
         Comedy       0.62      0.70      0.66       189
        History       1.00      0.11      0.20        54
        Western       0.00      0.00      0.00        25
       Thriller       0.35      0.97      0.51       184
          Crime       0.83      0.66      0.74       143
Science Fiction       0.93      0.35      0.50        81
        Mystery       0.00      0.00      0.00        77
          Music       0.00      0.00      0.00        34
        Romance       0.00      0.00      0.00       124
         Family       1.00      0.09      0.16        9

In [25]:
resultsDict[best_precision]['fitted_model'].predict(combined_bow)[0]

array([0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0])

In [26]:
resultsDict[best_precision]['fitted_model'].predict_proba(combined_bow)[0]

array([3.04719005e-02, 7.80087867e-04, 2.41432347e-02, 9.36481209e-01,
       1.25753510e-02, 3.35582768e-02, 1.04326995e-01, 1.48167927e-02,
       1.26084883e-04, 1.50322996e-01, 7.33628097e-01, 3.91758746e-03,
       1.74222489e-03, 1.38138201e-02, 1.83195876e-04, 2.87533281e-02,
       2.04361359e-02, 6.20652724e-05])

In [27]:
def mean_probability(model, predictor):
    _, test_x = train_test_split(predictor, test_size=0.2, random_state=9001)
    
    return model.predict_proba(test_x).mean()

In [28]:
mean_probability(resultsDict[best_precision]['fitted_model'], combined_bow)

0.1085713855522845

In [29]:

best_recall_rerun = {'model':SVC(class_weight='balanced', kernel='linear',
                                probability=True),
                   'params':{'C':[0.01,0.1,1.0]}}
        
        
best_recall_rerun_results = evaluate_model(model = best_recall_rerun['model'],
                        predictors = predictorDict['combined_w2v_mean'], 
                        response = binary_tmdb,
                        cv=True,
                        params=best_recall_rerun['params'])        

  _warn_prf(average, modifier, msg_start, len(result))


In [30]:
mean_probability(best_recall_rerun_results['fitted_model'], combined_w2v_mean)

0.14880587001315373

In [31]:
data_source_results = {}

for group in ['imdb', 'tmdb', 'combined']:
    subset = results_df.filter(like=group, axis=0)

    
    data_source_results[group] = {'min_test_precision': subset['test_precision_score'].min(),
                                'max_test_precision': subset['test_precision_score'].max(),
                                'mean_test_precision':subset['test_precision_score'].mean(),
                                'min_test_recall': subset['test_recall_score'].min(),
                                'max_test_recall': subset['test_recall_score'].max(),
                                'mean_test_recall':subset['test_recall_score'].mean()}

In [32]:
column_order = ['min_test_precision','max_test_precision','mean_test_precision',
                                            'min_test_recall','max_test_recall','mean_test_recall']

pd.DataFrame(data_source_results).transpose()[column_order]

Unnamed: 0,min_test_precision,max_test_precision,mean_test_precision,min_test_recall,max_test_recall,mean_test_recall
imdb,0.166339,0.629775,0.394498,0.253937,0.403543,0.315453
tmdb,0.166339,0.583739,0.414782,0.255906,0.482283,0.345669
combined,0.166339,0.698599,0.479424,0.255906,0.492126,0.350886


In [33]:
predictor_results = {}
for group in ['bow', 'w2v']:
    subset = results_df.filter(like=group, axis=0)

    predictor_results[group] = {'min_test_precision': subset['test_precision_score'].min(),
                                'max_test_precision': subset['test_precision_score'].max(),
                                'mean_test_precision':subset['test_precision_score'].mean(),
                                'min_test_recall': subset['test_recall_score'].min(),
                                'max_test_recall': subset['test_recall_score'].max(),
                                'mean_test_recall':subset['test_recall_score'].mean()}

In [34]:
pd.DataFrame(predictor_results).transpose()[column_order]

Unnamed: 0,min_test_precision,max_test_precision,mean_test_precision,min_test_recall,max_test_recall,mean_test_recall
bow,0.166339,0.698599,0.417925,0.255906,0.482283,0.325366
w2v,0.178442,0.603436,0.440688,0.253937,0.492126,0.35269
