In [None]:
import json
import pickle as pkl
import operator
import time
from collections import Counter
from itertools import product
import random

import pandas as pd
from pandas.io.json import json_normalize
import numpy as np
import scipy
from scipy.sparse import csr_matrix
from sklearn.model_selection import train_test_split, StratifiedKFold

import psycopg2 as pg

from lightfm import LightFM
from lightfm.evaluation import precision_at_k
from lightfm.evaluation import recall_at_k
from lightfm.evaluation import auc_score
from lightfm.evaluation import reciprocal_rank

%run '../lib/cookbook/recsys.py'
%run '../lib/cookbook/generic_preprocessing.py'
%run '../lib/utility.py'

import matplotlib.pyplot as plt
from IPython.display import HTML ## Setting display options for Ipython Notebook

## Reload Data

In [None]:
## No Free Games
user_top_games = pkl.load(open('../data/preprocessed_data/no_free_games/user_top_games.pkl', 'rb'))
user_top_games_filtered_hours = pkl.load(open('../data/preprocessed_data/no_free_games/user_top_games_filtered_hours.pkl', 'rb'))
user_top_games_filtered_percentile = pkl.load(open('../data/preprocessed_data/no_free_games/user_top_games_filtered_percentile.pkl', 'rb'))
games = pkl.load(open('../data/preprocessed_data/no_free_games/games.pkl', 'rb'))

In [None]:
action_games = pkl.load(open('../data/preprocessed_data/all_games/action_games.pkl', 'rb'))

In [None]:
sport_games = pkl.load(open('../data/preprocessed_data/all_games/sport_games.pkl', 'rb'))
rpg_games = pkl.load(open('../data/preprocessed_data/all_games/rpg_games.pkl', 'rb'))
simulation_games = pkl.load(open('../data/preprocessed_data/all_games/simulation_games.pkl', 'rb'))
casual_games = pkl.load(open('../data/preprocessed_data/all_games/casual_games.pkl', 'rb'))

In [None]:
#Load Steam games of each selected genres
game_type = ['Strategy', 'Adventure', 'Indie', 'Racing', 'Singleplayer', 'Classic', 'VR', 'Sci-fi']
other_games = []
for item in game_type:
    tmp = '../data/preprocessed_data/all_games/' + item + '_games.pkl'
    other_games.append(pkl.load(open(tmp, 'rb')))

In [None]:
#Read top 20 recommended games for each user from stored datafile
top20_lst = pd.read_csv('../data/fixed/top20_user_titleslist.csv',index_col=None )

In [None]:
all_users = []
for x in top20_lst:
    all_users.append(top20_lst[x].values)

## Train Test Split and Building Interaction Matrices

In [None]:
train_val, test = train_test_split(user_top_games, test_size=0.2, random_state=1337)

In [None]:
interactions_train_all = create_interaction_matrix(df = train_val,
                                                 user_col = 'user',
                                                 item_col = 'item',
                                                 rating_col = 'rating',
                                                 threshold = '1')

In [None]:
interactions_test = create_interaction_matrix(df = test,
                                         user_col = 'user',
                                         item_col = 'item',
                                         rating_col = 'rating',
                                         threshold = '1')

In [None]:
interactions_full = create_interaction_matrix(df = user_top_games,
                                         user_col = 'user',
                                         item_col = 'item',
                                         rating_col = 'rating',
                                         threshold = '1')

In [None]:
user_dict = create_user_dict(interactions=interactions_train_all)
games_dict = create_item_dict(df=games, id_col='item', name_col='item')

In [None]:
## create sparse matrices from interactions
sparse_train_all = sparse.csr_matrix(interactions_train_all.values)
sparse_test = sparse.csr_matrix(interactions_test.values)
sparse_full = sparse.csr_matrix(interactions_full.values)

## Train Model

In [None]:
sqlalchemy_conn = create_sqlalchemy_connection('sqlalchemy_conn_str.txt')
best_parameters = query_best_parameters('sqlalchemy_conn_str.txt')
best_parameters['no_components'] = 20
model = LightFM(**best_parameters)
model.fit(sparse_full,
            epochs=15,
            num_threads=30)

## PCA Visualization

In [None]:
from sklearn.decomposition import PCA

In [None]:
item_embeddings = model.item_embeddings
user_embeddings = model.user_embeddings

In [None]:
pca_model = PCA(n_components=2)
test = pca_model.fit_transform(item_embeddings)

## Extract the index positions where selected genres games belong to in the PCA embedded item matrix

In [None]:
def intersection(lst1, lst2): 
    lst3 = [value for value in lst1 if value in lst2] 
    return lst3

In [None]:
#Genres= other generes defineds in othergames 

intersected_othergames = []
for item in other_games:
    intersected_othergames.append(intersection(list(interactions_train_all.columns), list(item)))

In [None]:
intersected_user = []
for user1 in all_users:
    tmp = []
    for item in user1:
        tmp.append(intersection(list(interactions_train_all.columns), item))
    intersected_user.append(tmp)
#intersected_user[0]


processed_user_item = []
for j_item in intersected_user:
    flat_list = []
    for sublist in j_item:
        for item in sublist:
            flat_list.append(item)
    processed_user_item.append(flat_list)

In [None]:
#processed_user_item

In [None]:
other_g_idx_new = []
for curr_game in processed_user_item:
    tmp_pos = []
    for i in curr_game:
        try:
            tmp_pos.append(interactions_train_all.columns.get_loc(i))
        except:
            pass
    other_g_idx_new.append(tmp_pos)

In [None]:
#Genres = action, sport, rpg, simulation, casual

action = intersection(list(interactions_train_all.columns), list(action_games))
sport = intersection(list(interactions_train_all.columns), list(sport_games))
rpg = intersection(list(interactions_train_all.columns), list(rpg_games))
simulation = intersection(list(interactions_train_all.columns), list(simulation_games))
casual = intersection(list(interactions_train_all.columns), list(casual_games))

action_idx = []
for i in action:
    try:
        action_idx.append(interactions_train_all.columns.get_loc(i))
    except:
        pass
sport_idx = []
for i in sport:
    try:
        sport_idx.append(interactions_train_all.columns.get_loc(i))
    except:
        pass
rpg_idx = []
for i in rpg:
    try:
        rpg_idx.append(interactions_train_all.columns.get_loc(i))
    except:
        pass
simulation_idx = []
for i in simulation:
    try:
        simulation_idx.append(interactions_train_all.columns.get_loc(i))
    except:
        pass
casual_idx = []
for i in casual:
    try:
        casual_idx.append(interactions_train_all.columns.get_loc(i))
    except:
        pass

In [None]:
# Other genres descrioted in othergames list
other_g_idx = []
for j in range(len(intersected_othergames)):
    tmplst = []
    for i in intersected_othergames[j]:
        try:
            tmplst.append(interactions_train_all.columns.get_loc(i))
        except:
            pass
    other_g_idx.append(tmplst)

## PCA Plotting

In [None]:
import matplotlib.pyplot as plt

size = 300
target = 300


plt.figure(figsize=(18,18))
plt.scatter(test[:,0], test[:,1], s=size,color='gray')
plt.scatter(test[action_idx,0], test[action_idx,1], s=target, color='aqua',edgecolors='r')
plt.title('PCA - Top 1000 Picked Genre=Action', fontsize=50)

plt.tick_params(axis='both', which='major', labelsize=50)
plt.tick_params(axis='both', which='minor', labelsize=50)
plt.grid()
#plt.show()
plt.savefig('../data/PCA_Plots/Action.png')


# #ax[1, 0].plot(range(10), 'b') #row=1, col=0
plt.figure(figsize=(20,18))
plt.scatter(test[:,0], test[:,1], s=size,color='gray')
plt.scatter(test[sport_idx,0], test[sport_idx,1], s=target, color='darkorchid',edgecolors='r')
plt.title('PCA - Top 1000 Picked Genre=Sport',fontsize=50)
plt.tick_params(axis='both', which='major', labelsize=50)
plt.tick_params(axis='both', which='minor', labelsize=50)
plt.grid()
#plt.show()
plt.savefig('../data/PCA_Plots/Sport.png')

#ax[2, 0].plot(range(10), 'y') #row=1, col=0
plt.figure(figsize=(20,18))
plt.scatter(test[:,0], test[:,1], s=size,color='gray')
plt.scatter(test[rpg_idx,0], test[rpg_idx,1], s=target, color='g',edgecolors='r')
plt.title('PCA - Top 1000 Picked Genre=RPG',fontsize=50)
plt.tick_params(axis='both', which='major', labelsize=50)
plt.tick_params(axis='both', which='minor', labelsize=50)
plt.grid()
#plt.show()
plt.savefig('../data/PCA_Plots/RPG.png')

#ax[0, 1].plot(range(10), 'g') #row=0, col=1
plt.figure(figsize=(20,18))
plt.scatter(test[:,0], test[:,1], s=size,color='gray')
plt.scatter(test[simulation_idx,0], test[simulation_idx,1], s=target, color='m',edgecolors='r')
plt.title('PCA - Top 1000 Picked Genre=Simulation',fontsize=50)
plt.tick_params(axis='both', which='major', labelsize=50)
plt.tick_params(axis='both', which='minor', labelsize=50)
plt.grid()
#plt.show()
plt.savefig('../data/PCA_Plots/Simulation.png')


#ax[1, 1].plot(range(10), 'm') #row=1, col=1
plt.figure(figsize=(20,18))
plt.scatter(test[:,0], test[:,1], s=size,color='gray')
plt.scatter(test[casual_idx,0], test[casual_idx,1], s=target, color='darkorange',edgecolors='r')
plt.title('PCA - Top 1000 Picked Genre=Casual',fontsize=50)
plt.tick_params(axis='both', which='major', labelsize=50)
plt.tick_params(axis='both', which='minor', labelsize=50)
plt.grid()
#plt.show()
plt.savefig('../data/PCA_Plots/Casual.png')


In [None]:
import matplotlib.pyplot as plt
from itertools import cycle
cycol = cycle('bgrcmk')
count = 0
for i in range(4):
    for j in range(2):
        size = 300
        target = 300
        current_genre = 'PCA - Top 1000 Picked Genre=' + game_type[count]
        plt.figure(figsize=(18,18))
        plt.scatter(test[:,0], test[:,1], s=size,color='gray')
        plt.scatter(test[other_g_idx[count],0], test[other_g_idx[count],1], s=target, c=next(cycol),edgecolors='r')
        plt.title(current_genre,fontsize=50)
        plt.tick_params(axis='both', which='major', labelsize=50)
        plt.tick_params(axis='both', which='minor', labelsize=50)
        plt.grid()
        #plt.show()
        count = count + 1

In [None]:
game_type[0]

In [None]:
#!pip install pillow

In [None]:
import matplotlib.pyplot as plt
from itertools import cycle
cycol = cycle('bgrcmk')
count = 0
top20_lst.columns
for j in range(len(top20_lst.columns)):
    plt.rcParams["font.weight"] = "bold"
    size = 400
    target = 700
    current_genre = 'PCA - Top 20 Recommended Games=' + top20_lst.columns[j]
    plt.figure(figsize=(18,18))
    plt.scatter(test[:,0], test[:,1], s=size,color='darkgray')
    #plt.scatter(test[other_g_idx_new[count],0], test[other_g_idx_new[count],1], s=target, c=next(cycol),edgecolors='r')
    plt.scatter(test[other_g_idx_new[count],0], test[other_g_idx_new[count],1], s=target, c='red',edgecolors='c',linewidth='5')
    plt.title(current_genre,fontsize=50,fontweight="bold")
    plt.tick_params(axis='both', which='major', labelsize=50)
    plt.tick_params(axis='both', which='minor', labelsize=50)
    
    plt.grid()
    name = '../data/PCA_Plots/' + top20_lst.columns[j] + '.jpg'
    plt.savefig(name)
    count = count + 1
