In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import roc_auc_score as AUC

## SVD++ Results

In [2]:
svd_results = pd.read_csv("SVDppResults2.csv")

In [3]:
svd_auc_score = AUC(np.array(svd_results["skipped"]), np.array(svd_results["prediction"]))
print("AUC Score:", svd_auc_score)

AUC Score: 0.807650502126


In [4]:
np.shape(svd_results)

(1151142, 7)

## Neural Network Results

In [5]:
nn_results = pd.read_csv("NNResults.csv")

In [6]:
nn_auc_score = AUC(np.array(nn_results["skipped"]), np.array(nn_results["prediction"]))
print("AUC Score:", nn_auc_score)

AUC Score: 0.593701054732


In [7]:
np.shape(nn_results)

(1151204, 7)

In [8]:
list(nn_results)

['artist-name',
 'prediction',
 'skipped',
 'timestamp',
 'track-name',
 'userid',
 'weekend']

## Combined Results

In [9]:
combined_results = pd.merge(svd_results, nn_results, how="inner",
                            on = ['artist-name','skipped','timestamp','track-name','userid','weekend'],
                           suffixes = ["_svd", "_nn"])

In [10]:
np.shape(combined_results)

(1074514, 8)

In [11]:
combined_results.head()

Unnamed: 0,userid,track-name,artist-name,weekend,timestamp,skipped,prediction_svd,prediction_nn
0,user_000001,Monde,Hudson Mohawke,0,2009-02-16 11:11:10,0,0.034001,0.215752
1,user_000001,Overnight,Hudson Mohawke,0,2009-02-16 12:07:09,0,0.026386,0.493046
2,user_000001,Speed Stick,Hudson Mohawke,0,2009-02-16 12:09:58,0,0.02394,0.542228
3,user_000001,Velvet Peel,Hudson Mohawke,0,2009-02-16 12:12:03,0,0.023543,0.548095
4,user_000001,Yonard,Hudson Mohawke,0,2009-02-16 12:14:22,0,0.035618,0.548095


### Normalizing the probabilities of both models by scaled ranking

In [12]:
combined_results["prediction_svd_scaled"] = combined_results["prediction_svd"].rank() / combined_results.shape[0]
combined_results["prediction_nn_scaled"] = combined_results["prediction_nn"].rank() / combined_results.shape[0]

In [13]:
combined_results.head()

Unnamed: 0,userid,track-name,artist-name,weekend,timestamp,skipped,prediction_svd,prediction_nn,prediction_svd_scaled,prediction_nn_scaled
0,user_000001,Monde,Hudson Mohawke,0,2009-02-16 11:11:10,0,0.034001,0.215752,0.881653,0.616757
1,user_000001,Overnight,Hudson Mohawke,0,2009-02-16 12:07:09,0,0.026386,0.493046,0.846324,0.636173
2,user_000001,Speed Stick,Hudson Mohawke,0,2009-02-16 12:09:58,0,0.02394,0.542228,0.830574,0.639128
3,user_000001,Velvet Peel,Hudson Mohawke,0,2009-02-16 12:12:03,0,0.023543,0.548095,0.828102,0.819741
4,user_000001,Yonard,Hudson Mohawke,0,2009-02-16 12:14:22,0,0.035618,0.548095,0.887119,0.819741


## Weighted Averaging the models (SVD++ : 90%, NN: 10%)

In [20]:
combined_results["weighted_prediction"] = 0.9 * combined_results["prediction_svd_scaled"] + \
                                          0.1 * combined_results["prediction_nn_scaled"]

In [21]:
ensemble_auc_score = AUC(np.array(combined_results["skipped"]), np.array(combined_results["weighted_prediction"]))
print("AUC Score:", ensemble_auc_score)

AUC Score: 0.80862097394
