#                            Recommender System

### The system below recommends Race Class combinations, New Guilds for users already playing in Guilds and Guilds for new Users or Users not playing in Guilds using Collaborative filtering

In [1]:
# Required libraries are imported

import pandas as pd
import numpy as np
import matplotlib as plt
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors

import warnings
warnings.filterwarnings('ignore')

In [2]:
# Data and the required columns are loaded

cols = ['Unnamed: 0','Unnamed: 0.1','QueryTime','QuerySeq','AvatarID','Guild','Level','Race','Class','Zone']
df = pd.read_csv("logs_sample.csv", names = cols, usecols = ['AvatarID','Guild','Level','Race','Class'])

#### Loaded data needs to be seperately treated
Race & Class patterns for each Avatar ID are grouped and a new Column RaceClassCount is created. Likewise, Guild and Avatar ID are grouped to create a new column GuildCount

In [3]:
df_rc = df.groupby(['AvatarID','Race','Class']).size().reset_index()
#df_g = df.groupby(['AvatarID','Guild']).size().reset_index()
df = df.groupby(['AvatarID','Guild','Level','Race','Class']).size().reset_index()

In [4]:
df_rc.rename(columns = {0: 'RaceClassCount'},  inplace=True)
#df_g.rename(columns = {0: 'GuildCount'},  inplace=True)
df.rename(columns = {0: 'PatternCount'},  inplace=True)

Under all races same classes are there and we cannot count them seperately. So first, a Race Flag is created with unique number value and then a Class Flag with again unique values, such that when they are totalled they give unique values for Race and Class combinations.

In [5]:
def addraceflag(x):
    if x == ' Blood Elf':
        return 15
    elif x == ' Orc':
        return 3
    elif x == ' Tauren':
        return 6
    elif x == ' Troll':
        return 9
    elif x == ' Undead':
        return 12

In [6]:
df_rc['RaceFlag'] = df_rc.apply(lambda col: addraceflag(col['Race']), axis = 1)

In [7]:
def addclassflag(x):
    if x == ' Warrior':
        return 10
    elif x == ' Hunter':
        return 30
    elif x == ' Rogue':
        return 50
    elif x == ' Paladin':
        return 90
    elif x == ' Death Knight':
        return 100
    elif x == ' Shaman':
        return 20
    elif x == ' Warlock':
        return 40
    elif x == ' Druid':
        return 60
    elif x == ' Mage':
        return 70
    elif x == ' Priest':
        return 80

In [8]:
df_rc['ClassFlag'] = df_rc.apply(lambda col: addclassflag(col['Class']), axis = 1)

In [9]:
def addraceclass(row):
    if row['RaceFlag'] + row['ClassFlag'] == 25:
        return 'BloodElf & Warrior'
    elif row['RaceFlag'] + row['ClassFlag'] == 35:
        return 'Blood Elf & Shaman'
    elif row['RaceFlag'] + row['ClassFlag'] == 45:
        return 'Blood Elf & Hunter'
    elif row['RaceFlag'] + row['ClassFlag'] == 55:
        return 'Blood Elf & Warlock'
    elif row['RaceFlag'] + row['ClassFlag'] == 65:
        return 'Blood Elf & Rogue'
    elif row['RaceFlag'] + row['ClassFlag'] == 75:
        return 'Blood Elf & Druid'
    elif row['RaceFlag'] + row['ClassFlag'] == 85:
        return 'Blood Elf & Mage'
    elif row['RaceFlag'] + row['ClassFlag'] == 95:
        return 'Blood Elf & Priest'
    elif row['RaceFlag'] + row['ClassFlag'] == 105:
        return 'Blood Elf & Paladin'
    elif row['RaceFlag'] + row['ClassFlag'] == 115:
        return 'Blood Elf & Knight'
    elif row['RaceFlag'] + row['ClassFlag'] == 13:
        return 'Orc & Warrior'
    elif row['RaceFlag'] + row['ClassFlag'] == 23:
        return 'Orc & Shaman'
    elif row['RaceFlag'] + row['ClassFlag'] == 33:
        return 'Orc & Hunter'
    elif row['RaceFlag'] + row['ClassFlag'] == 43:
        return 'Orc & Warlock'
    elif row['RaceFlag'] + row['ClassFlag'] == 53:
        return 'Orc & Rogue'
    elif row['RaceFlag'] + row['ClassFlag'] == 63:
        return 'Orc & Druid'
    elif row['RaceFlag'] + row['ClassFlag'] == 73:
        return 'Orc & Mage'
    elif row['RaceFlag'] + row['ClassFlag'] == 83:
        return 'Orc & Priest'
    elif row['RaceFlag'] + row['ClassFlag'] == 93:
        return 'Orc & Paladin'
    elif row['RaceFlag'] + row['ClassFlag'] == 103:
        return 'Orc & Knight'
    elif row['RaceFlag'] + row['ClassFlag'] == 16:
        return 'Tauren & Warrior'
    elif row['RaceFlag'] + row['ClassFlag'] == 26:
        return 'Tauren & Shaman'
    elif row['RaceFlag'] + row['ClassFlag'] == 36:
        return 'Tauren & Hunter'
    elif row['RaceFlag'] + row['ClassFlag'] == 46:
        return 'Tauren & Warlock'
    elif row['RaceFlag'] + row['ClassFlag'] == 56:
        return 'Tauren & Rogue'
    elif row['RaceFlag'] + row['ClassFlag'] == 66:
        return 'Tauren & Druid'
    elif row['RaceFlag'] + row['ClassFlag'] == 76:
        return 'Tauren & Mage'
    elif row['RaceFlag'] + row['ClassFlag'] == 86:
        return 'Tauren & Priest'
    elif row['RaceFlag'] + row['ClassFlag'] == 96:
        return 'Tauren & Paladin'
    elif row['RaceFlag'] + row['ClassFlag'] == 106:
        return 'Tauren & Knight'
    elif row['RaceFlag'] + row['ClassFlag'] == 19:
        return 'Troll & Warrior'
    elif row['RaceFlag'] + row['ClassFlag'] == 29:
        return 'Troll & Shaman'
    elif row['RaceFlag'] + row['ClassFlag'] == 39:
        return 'Troll & Hunter'
    elif row['RaceFlag'] + row['ClassFlag'] == 49:
        return 'Troll & Warlock'
    elif row['RaceFlag'] + row['ClassFlag'] == 59:
        return 'Troll & Rogue'
    elif row['RaceFlag'] + row['ClassFlag'] == 69:
        return 'Troll & Druid'
    elif row['RaceFlag'] + row['ClassFlag'] == 79:
        return 'Troll & Mage'
    elif row['RaceFlag'] + row['ClassFlag'] == 89:
        return 'Troll & Priest'
    elif row['RaceFlag'] + row['ClassFlag'] == 99:
        return 'Troll & Paladin'
    elif row['RaceFlag'] + row['ClassFlag'] == 109:
        return 'Troll & Knight'
    elif row['RaceFlag'] + row['ClassFlag'] == 22:
        return 'Undead & Warrior'
    elif row['RaceFlag'] + row['ClassFlag'] == 32:
        return 'Undead & Shaman'
    elif row['RaceFlag'] + row['ClassFlag'] == 42:
        return 'Undead & Hunter'
    elif row['RaceFlag'] + row['ClassFlag'] == 52:
        return 'Undead & Warlock'
    elif row['RaceFlag'] + row['ClassFlag'] == 62:
        return 'Undead & Rogue'
    elif row['RaceFlag'] + row['ClassFlag'] == 72:
        return 'Undead & Druid'
    elif row['RaceFlag'] + row['ClassFlag'] == 82:
        return 'Undead & Mage'
    elif row['RaceFlag'] + row['ClassFlag'] == 92:
        return 'Undead & Priest'
    elif row['RaceFlag'] + row['ClassFlag'] == 102:
        return 'Undead & Paladin'
    elif row['RaceFlag'] + row['ClassFlag'] == 112:
        return 'Undead & Knight'

In [10]:
df_rc['RaceClass'] = df_rc.apply (lambda row: addraceclass (row),axis=1)

Once the new column RaceClass is created, create a new dataframe with only required columns. AvatarID, RaceClass and RaceClassCount. 

In [11]:
df_rc_final = df_rc[['AvatarID','RaceClass','RaceClassCount']]

Create another new column Guild Flag to see how many users are actually part of Guilds. Assign 0 for Usrs with no Guild and 1 for Users with Guilds

In [12]:
def addguildflag(x):
    if x== ' ':
        return 0
    else:
        return 1

In [13]:
df['GuildFlag'] = df.apply(lambda col: addguildflag(col['Guild']), axis = 1)

Remove the users with no Guilds as we cannot recommend Guilds for them following the same process. For Users playing in Guilds already we will recommend new Guilds.

In [14]:
#df_nog = df_g['Guild'] == ' '
#df_g_final = df_g[~df_nog]

df_g = df['Guild'] >='0'
df_no_g = df[~df_g]
df_g = df[df_g]

In [15]:
df_no_g = df_no_g[['AvatarID','Level','Guild','PatternCount']]
df_g = df_g[['AvatarID','Level','Guild','PatternCount']]

In [16]:
if not df_no_g[df_no_g.duplicated(['AvatarID', 'Guild'])].empty:
    initial_rows = df_no_g.shape[0]
    print('Initial dataframe shape {0}'.format(df_no_g.shape))
    df_no_g = df_no_g.drop_duplicates(['AvatarID', 'Guild'])
    current_rows = df_no_g.shape[0]
    print('New dataframe shape {0}'.format(df_no_g.shape))
    print('Removed {0} rows'.format(initial_rows - current_rows))

Initial dataframe shape (32902, 4)
New dataframe shape (14953, 4)
Removed 17949 rows


In [17]:
if not df_g[df_g.duplicated(['AvatarID', 'Guild'])].empty:
    initial_rows = df_g.shape[0]
    print('Initial dataframe shape {0}'.format(df_g.shape))
    df_g = df_g.drop_duplicates(['AvatarID', 'Guild'])
    current_rows = df_g.shape[0]
    print('New dataframe shape {0}'.format(df_g.shape))
    print('Removed {0} rows'.format(initial_rows - current_rows))

Initial dataframe shape (71538, 4)
New dataframe shape (19900, 4)
Removed 51638 rows


Change the Dataframe to User Item matrix and then a Similarity matrix.

A distance metric commonly used in recommender systems is cosine similarity, where the ratings are seen as vectors in n-dimensional space and the similarity is calculated based on the angle between these vectors.

In [18]:
wide_df_g = df_g.pivot(index = 'Guild', columns = 'AvatarID', values = 'PatternCount').fillna(0)
wide_df_rc = df_rc_final.pivot(index = 'RaceClass', columns = 'AvatarID', values = 'RaceClassCount').fillna(0)

In [19]:
wide_df_rc_sparse = csr_matrix(wide_df_rc.values)
wide_df_g_sparse = csr_matrix(wide_df_g.values)

Create a KNN model Cosine matric sepeartely for Race & Class and Guild recommenders fit the models to the similarity matirx we created.

In [20]:
model_rc = NearestNeighbors(metric = 'cosine', algorithm = 'brute')
model_g = NearestNeighbors(metric = 'cosine', algorithm = 'brute')
model_rc.fit(wide_df_rc_sparse)
model_g.fit(wide_df_g_sparse)

NearestNeighbors(algorithm='brute', leaf_size=30, metric='cosine',
         metric_params=None, n_jobs=1, n_neighbors=5, p=2, radius=1.0)

Create 2 sepearte functions for Race & Class and Guild recommenders.

For ramdomly queried index we will get the nearest neighbors from the above KNN model and recommend to User with the randomly queried index.

In [21]:
def raceclass_recommender(wide_df_rc_sparse,wide_df_rc):
    
    query_index = np.random.choice(wide_df_rc.shape[0])
    print ('Hello {0}!! \n'.format(wide_df_rc.columns[query_index]))
    distances, indices = model_rc.kneighbors(wide_df_rc.iloc[query_index, :].reshape(1, -1), n_neighbors = 6)
        
    for i in range(0, len(distances.flatten())):
        if i == 0:
            print ('Below are recommendations for Race and Class combinations:\n')
        else:
            print ('{0}: {1}, with distance of {2}:'.format(i, wide_df_rc.index[indices.flatten()[i]], distances.flatten()[i]))                            

In [22]:
def guild_recommender(wide_df_g_sparse,wide_df_g):
    
    query_index_g = np.random.choice(wide_df_g.shape[0])
    print ('Hello {0}!! \n'.format(wide_df_g.columns[query_index_g]))
    distances_g, indices_g = model_g.kneighbors(wide_df_g.iloc[query_index_g, :].reshape(1, -1), n_neighbors = 6)
    
    for i in range(0, len(distances_g.flatten())):
        if i == 0:
            print ('Below are new Guild Recommendations:\n') 
        else:
            print ('{0}: {1} Guild, with distance of {2}:'.format(i, wide_df_g.index[indices_g.flatten()[i]], distances_g.flatten()[i]))

Recommend Guilds for new users or users who are not part of any Guilds.

For a randomly selected User with no Guild, take the Level. Take the Guilds of the same Level with top Pattern Counts, as they are the Users with Guilds who played more. Recommend those to our User!!

In [23]:
def guild_recommender_new_users(df_no_g,df_g):
    
    query_index_no_g = np.random.choice(df_no_g.shape[0])
    query_user = df_no_g.iloc[query_index_no_g, :]
    df_rec = df_g[df_g['Level'].isin([query_user[1]])]
    df_rec=df_rec.sort_values(['PatternCount'], ascending=False).head()
    df_rec = df_rec[['Guild','PatternCount']]
    df_rec=df_rec.to_dict(orient='list')
    print ('Hello {0}!! \n'.format(query_user[0]))
    print ('Below are your Guild Recommendations:\n')      
    print (df_rec['Guild'])

In [24]:
raceclass_recommender(wide_df_rc_sparse, wide_df_rc)

Hello 7!! 

Below are recommendations for Race and Class combinations:

1: Undead & Rogue, with distance of 0.9999242420478779:
2: Blood Elf & Hunter, with distance of 0.9999430869321557:
3: Undead & Mage, with distance of 0.9999452786632852:
4: Tauren & Warrior, with distance of 0.9999789444668533:
5: Blood Elf & Paladin, with distance of 0.9999911328926738:


In [29]:
guild_recommender(wide_df_g_sparse, wide_df_g)

Hello 109!! 

Below are new Guild Recommendations:

1: 235 Guild, with distance of 0.22721195199458677:
2: 55 Guild, with distance of 0.5781964490304243:
3: 6 Guild, with distance of 0.8795782867505325:
4: 0 Guild, with distance of 0.9283130820051884:
5: 201 Guild, with distance of 0.9362782951157229:


In [26]:
guild_recommender_new_users(df_no_g,df_g)

Hello 72885!! 

Below are your Guild Recommendations:

['65', '220', '36', '24', '53']


As in when needed or as required we can recommend one of the below or all in combination to personalize Users game playing experience

Race Class combinations
New Guilds for users already playing in Guilds
Guilds for new Users or Users not playing in Guilds

Collaborative filtering produces recommendations based on the knowledge of users’ attitude to items, that is it uses the “wisdom of the crowd” to recommend items.

In general, Collaborative filtering is the workhorse of recommender engines.