In [164]:
import pandas as pd
import numpy as np
import math
import pickle
import torch
import random
import torch.nn as nn
import torch.optim as optim
import time
from matplotlib import pyplot as plt
from torch.utils.data import Dataset, DataLoader
from models.BertSeqTransformer import StandardTransformer
from datasets.SpotifyDataset import SpotifyDataset, bert_collate_fn, custom_collate_fn
torch.manual_seed(1)
EPOCHS = 1

N=100000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("You are using device: %s" % device)

You are using device: cpu


In [179]:
def accuracy(output, target):
        
    seq_len = target.shape[1]
    correct = output.eq(target)
    correct = correct.sum(axis=1) * 1.0
    acc = correct / seq_len
    return acc

def accuracy_at_k(output, target):
    
    output = output.to(device)
    target = target.to(device)
    
    T = output.shape[1]
    batch_size = target.shape[0]
    acc = torch.zeros(T)

    for i in range(T):        
        acc[i] = torch.mean(accuracy(output[:,i].reshape(batch_size,1), target[:,i].reshape(batch_size,1)))
        
    return acc

In [2]:
print("READING THE DATA")
with open("data/all_session_tracks_train.pkl", 'rb') as f:
    train_tracks = pickle.load(f)
    train_tracks = train_tracks[0:N]

with open("data/all_session_skips_train.pkl", 'rb') as f:
    train_skips = pickle.load(f)
    train_skips = train_skips[0:N]

with open("data/all_session_tracks_test.pkl", 'rb') as f:
    test_tracks = pickle.load(f)
    test_tracks = test_tracks[0:N]

with open("data/all_session_skips_test.pkl", 'rb') as f:
    test_skips = pickle.load(f)
    test_skips = test_skips[0:N]

with open("data/track_vocabs.pkl", 'rb') as f:
    track_vocab = pickle.load(f)

READING THE DATA


In [180]:
bert_aug_seq_preds = np.load('output/transformer_bert_aug_seq_preds_v2.npy').astype(int)
bert_aug_seq_labels = np.load('output/transformer_bert_aug_seq_labels.npy').astype(int)

seq_preds = np.load('output/transformer_seq_preds_v2.npy')
seq_labels = np.load('output/transformer_seq_labels.npy').astype(int)

skip_preds = np.load('output/transformer_skip_preds_v2.npy')
skip_labels = np.load('output/transformer_skip_labels.npy')
skip_preds = np.argmax(skip_preds, axis=2)

In [152]:
seq_preds

array([[ 12168,  81694,  28681, ...,  27147,  65175,  17010],
       [ 14657,   9756,  99171, ...,  64317,  23134,  42242],
       [ 39370,   2339,  64515, ...,  10186,  79031,  52474],
       ...,
       [ 75651,  71519,  36036, ...,  98671,  99672,  22421],
       [ 68105,  76688,  81105, ...,  89613,  60948,  11095],
       [  3744,  80300,  91501, ...,   9235, 101661,   9235]])

In [153]:
bert_aug_seq_preds

array([[12562, 52180, 62182, ..., 65033, 75651, 12168],
       [14657, 38284, 83850, ..., 83850, 38284, 83850],
       [39370, 43012, 28185, ..., 24596, 28185, 43012],
       ...,
       [75651, 71519, 36036, ..., 98671, 99672, 22421],
       [99652, 99652, 99652, ...,   230, 20159, 20159],
       [27147, 24049, 24049, ..., 24049, 24049, 24049]])

In [154]:
skip_preds

array([[1, 1, 1, ..., 1, 1, 0],
       [1, 1, 1, ..., 0, 0, 0],
       [1, 0, 1, ..., 0, 0, 0],
       ...,
       [0, 0, 1, ..., 0, 1, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [1, 1, 0, ..., 0, 0, 0]])

In [156]:
seq_by_location = accuracy_at_k(torch.Tensor(seq_preds), torch.Tensor(seq_labels))
bert_aug_by_location = accuracy_at_k(torch.Tensor(bert_aug_seq_preds), torch.Tensor(bert_aug_seq_labels))
df = pd.DataFrame({'Standard Transformer':seq_by_location, 'Bert Augmented Transformer':bert_aug_by_location})
df.transpose()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
Standard Transformer,0.27742,0.2451,0.21724,0.20068,0.18256,0.16819,0.15559,0.14308,0.131,0.12216
Bert Augmented Transformer,0.24566,0.21676,0.19468,0.17877,0.16471,0.1522,0.14038,0.13013,0.11829,0.10907


In [181]:
skip_by_location = accuracy_at_k(torch.Tensor(skip_preds), torch.Tensor(skip_labels))
df = pd.DataFrame({'Skip Model Transformer':skip_by_location})
df

Unnamed: 0,Skip Model Transformer
0,0.52895
1,0.52962
2,0.53674
3,0.53672
4,0.54528
5,0.53519
6,0.54489
7,0.53364
8,0.52752
9,0.50754


In [182]:
#number of songs correct in the whole sequence
x = np.sum(bert_aug_seq_preds==seq_labels, axis=1)
unique, counts = np.unique(x, return_counts=True)
bert_trans_counts = counts/100000*100

In [185]:
counts

array([67777,  9520,  3637,  2123,  1586,  1383,  1171,  1092,  1066,
        1005,  9640])

In [160]:
#number of songs correct in the whole sequence
x = np.sum(seq_preds==seq_labels, axis=1)
unique, counts = np.unique(x, return_counts=True)
trans_counts = counts/100000*100

In [167]:
df = pd.DataFrame({'Standard Transformer':trans_counts, 'Bert Augmented Transformer':bert_trans_counts})
df

Unnamed: 0,Standard Transformer,Bert Augmented Transformer
0,66.502,67.777
1,8.448,9.52
2,3.407,3.637
3,2.236,2.123
4,1.823,1.586
5,1.611,1.383
6,1.362,1.171
7,1.275,1.092
8,1.193,1.066
9,1.086,1.005


In [175]:
seq_preds_sort = np.sort(seq_preds,axis=1)
unique_per_session = (seq_preds_sort[:,1:] != seq_preds_sort[:,:-1]).sum(axis=1)+1
correct_per_session = np.sum(seq_preds==seq_labels, axis=1)
df = pd.DataFrame({'unique_tracks_per_session':unique_per_session, 'Standard_Transformer':correct_per_session})
unique_session_correct = df.groupby('unique_tracks_per_session').sum('Standard_Transformer')/df.groupby('unique_tracks_per_session').count()


In [176]:
seq_preds_sort = np.sort(bert_aug_seq_preds,axis=1)
unique_per_session = (seq_preds_sort[:,1:] != seq_preds_sort[:,:-1]).sum(axis=1)+1
correct_per_session = np.sum(seq_preds==seq_labels, axis=1)
df = pd.DataFrame({'unique_per_session':unique_per_session, 'correct_per_session':correct_per_session})
df = df.groupby('unique_per_session').sum('correct_per_session')/df.groupby('unique_per_session').count()
unique_session_correct['Bert Augmented Transformer'] = df['correct_per_session']
unique_session_correct

Unnamed: 0_level_0,Standard_Transformer,Bert Augmented Transformer
unique_tracks_per_session,Unnamed: 1_level_1,Unnamed: 2_level_1
1,0.478357,0.382192
2,0.299649,0.468513
3,0.324548,0.441935
4,0.492754,0.562171
5,0.494466,0.600872
6,0.582177,0.730182
7,0.706208,0.921245
8,0.91757,1.437368
9,0.773691,1.787886
10,2.683918,6.176684


In [178]:
#number of skips correct in the whole sequence
x = np.sum(skip_preds==skip_labels, axis=1)
unique, counts = np.unique(x, return_counts=True)
counts = counts/100000*100
df = pd.DataFrame({'Skip Model Transformer':counts})
df

Unnamed: 0,Skip Model Transformer
0,3.201
1,5.576
2,7.796
3,9.618
4,11.939
5,12.827
6,13.124
7,12.477
8,10.906
9,7.995


In [138]:
number_of_skips = np.sum(skip_preds, axis=1)
correct_per_session = np.sum(skip_preds==skip_labels, axis=1)
df = pd.DataFrame({'number_of_session_skips':unique_per_session, 'avg_session_accuracy':correct_per_session})
df = df.groupby('number_of_session_skips').sum('correct_per_session')/df.groupby('number_of_session_skips').count()
df = df/10
df

Unnamed: 0_level_0,avg_session_accuracy
number_of_session_skips,Unnamed: 1_level_1
1,0.565702
2,0.553062
3,0.549985
4,0.547071
5,0.53256
6,0.545497
7,0.534895
8,0.534553
9,0.53157
10,0.525982
