In [1]:
import os
import time
import json
import joblib
from sklearn.model_selection import train_test_split 
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import *
from sklearn.cluster import KMeans
from lightgbm import LGBMClassifier
from surprise import *
from sklearn import preprocessing
import pandas as pd

In [2]:
# Data Reading
df = pd.read_csv(r'Data/new_data_web.csv')
df = df.dropna(subset=['id'])
df = df.drop(["total_time"],axis=1)

df["id"] =df["id"].astype('int64')
df = df.fillna(0)
df

Unnamed: 0,id,level_id,donuts,candy,level,progress,id Quizo,nbr Items,perseverance badge,concentration,...,Q_Avg_Time,Avg_Time_math,Avg_Time__science,Avg_Time_ar,Avg_Time_fr,Player_Type,type_achiever,type_disruptor,type_freeSpirit,type_player
0,1661,4.0,1.0,350.0,1.0,0.000000,0.0,0.0,0.0,0.0,...,0,0,0,0,0,type_player,42.35,2.0,0.0,55.65
1,1677,3.0,20.0,350.0,1.0,0.000000,0.0,0.0,0.0,0.0,...,5,0,5,0,0,type_achiever,71.00,22.0,0.0,7.00
2,1681,3.0,10.0,350.0,1.0,0.000000,0.0,0.0,0.0,0.0,...,5,0,5,0,0,type_achiever,70.00,20.0,0.0,10.00
3,1683,3.0,2.0,350.0,1.0,0.000000,0.0,0.0,0.0,0.0,...,1,1,0,0,0,type_disruptor,7.00,92.0,0.0,1.00
4,1700,3.0,46.0,350.0,1.0,0.000000,0.0,0.0,0.0,0.0,...,0,0,0,0,0,type_disruptor,20.00,77.0,0.0,3.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16797,64664,7.0,1.0,350.0,1.0,0.000000,0.0,0.0,0.0,1.0,...,1,3,1,0,0,type_disruptor,0.00,88.0,0.0,12.00
16798,64670,8.0,86.0,350.0,1.0,0.000000,0.0,0.0,4.0,7.0,...,69,79,0,0,0,type_player,5.00,9.0,4.0,82.00
16799,64673,6.0,20.0,350.0,1.0,0.392857,42866.0,1.0,1.0,3.0,...,1,1,0,0,0,type_disruptor,21.00,64.0,0.0,15.00
16800,64677,8.0,18.0,350.0,1.0,0.000000,0.0,0.0,3.0,7.0,...,15,15,0,0,0,type_player,4.00,6.0,0.0,90.00


In [3]:
# Rename Arabic columns (model saved with english names that's why)
df = df.rename(columns={"badge المثابرة": "perseverance badge", "النجاح": "Success","التركيز": "concentration", "التميز": "Excellence", "الإرتقاء": "Upgrade"})
df

Unnamed: 0,id,level_id,donuts,candy,level,progress,id Quizo,nbr Items,perseverance badge,concentration,...,Q_Avg_Time,Avg_Time_math,Avg_Time__science,Avg_Time_ar,Avg_Time_fr,Player_Type,type_achiever,type_disruptor,type_freeSpirit,type_player
0,1661,4.0,1.0,350.0,1.0,0.000000,0.0,0.0,0.0,0.0,...,0,0,0,0,0,type_player,42.35,2.0,0.0,55.65
1,1677,3.0,20.0,350.0,1.0,0.000000,0.0,0.0,0.0,0.0,...,5,0,5,0,0,type_achiever,71.00,22.0,0.0,7.00
2,1681,3.0,10.0,350.0,1.0,0.000000,0.0,0.0,0.0,0.0,...,5,0,5,0,0,type_achiever,70.00,20.0,0.0,10.00
3,1683,3.0,2.0,350.0,1.0,0.000000,0.0,0.0,0.0,0.0,...,1,1,0,0,0,type_disruptor,7.00,92.0,0.0,1.00
4,1700,3.0,46.0,350.0,1.0,0.000000,0.0,0.0,0.0,0.0,...,0,0,0,0,0,type_disruptor,20.00,77.0,0.0,3.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16797,64664,7.0,1.0,350.0,1.0,0.000000,0.0,0.0,0.0,1.0,...,1,3,1,0,0,type_disruptor,0.00,88.0,0.0,12.00
16798,64670,8.0,86.0,350.0,1.0,0.000000,0.0,0.0,4.0,7.0,...,69,79,0,0,0,type_player,5.00,9.0,4.0,82.00
16799,64673,6.0,20.0,350.0,1.0,0.392857,42866.0,1.0,1.0,3.0,...,1,1,0,0,0,type_disruptor,21.00,64.0,0.0,15.00
16800,64677,8.0,18.0,350.0,1.0,0.000000,0.0,0.0,3.0,7.0,...,15,15,0,0,0,type_player,4.00,6.0,0.0,90.00


In [4]:
# Pick a test sample 
test_sample = df.iloc[1]
test_sample

id                               1677
level_id                          3.0
donuts                           20.0
candy                           350.0
level                             1.0
progress                          0.0
id Quizo                          0.0
nbr Items                         0.0
perseverance badge                0.0
concentration                     0.0
Success                           0.0
Excellence                        0.0
Upgrade                           0.0
total_Time_Education           1764.0
nb_mistakes                       0.0
correct Question                 10.0
correct_Q_Math                    0.0
correct_Q_Science                10.0
correct_Q_Ar                      0.0
correct_Q_Fr                      0.0
Q_Best_Time                       5.0
best_time_math                    0.0
best_time_science                 5.0
best_time_ar                      0.0
best_time_fr                      0.0
Q_Worst_Time                      0.0
worst_time_m

In [5]:
from collections import * 
Counter(df['Player_Type'])

Counter({'type_player': 4625,
         'type_achiever': 7065,
         'type_disruptor': 4001,
         'type_freeSpirit': 1111})

In [12]:
# Select only features in relation gamification elements (elements selected can be modified and upgraded)
# Rewards : donuts, candy, nbr Items
# Challenges : correct Question, Q_Best_Time
# Badges : perseverance badge, concentration, Success, Excellence, Upgrade
# levels : level
# Learning : total_Time_Education, correct Question
game_elem = {'LeaderBoard' : ['correct Question', 'Q_Best_Time'],
             'Puzzle' : ['id Quizo', 'nbr Items'],
            'Badges' : ['perseverance badge', 'concentration', 'Success', 'Excellence', 'Upgrade'],
            'Trophy' : ['donuts', 'candy', 'nbr Items'],
            'Learning' : ['total_Time_Education', 'correct Question'],
            'levels' : ['level','level_id']}

df_p = df[['id', 'level_id', 'id Quizo','donuts', 'candy', 'nbr Items','correct Question', 'Q_Best_Time', 'total_Time_Education', 'correct Question',
         'level', 'perseverance badge', 'concentration', 'Success', 'Excellence', 'Upgrade', "Player_Type"]]
df_p

Unnamed: 0,id,level_id,id Quizo,donuts,candy,nbr Items,correct Question,Q_Best_Time,total_Time_Education,correct Question.1,level,perseverance badge,concentration,Success,Excellence,Upgrade,Player_Type
0,1661,4.0,0.0,1.0,350.0,0.0,1.0,1.0,21.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,type_player
1,1677,3.0,0.0,20.0,350.0,0.0,10.0,5.0,1764.0,10.0,1.0,0.0,0.0,0.0,0.0,0.0,type_achiever
2,1681,3.0,0.0,10.0,350.0,0.0,10.0,5.0,1757.0,10.0,1.0,0.0,0.0,0.0,0.0,0.0,type_achiever
3,1683,3.0,0.0,2.0,350.0,0.0,1.0,0.0,119.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,type_disruptor
4,1700,3.0,0.0,46.0,350.0,0.0,1.0,0.0,1708.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,type_disruptor
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16797,64664,7.0,0.0,1.0,350.0,0.0,1.0,0.0,637.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,type_disruptor
16798,64670,8.0,0.0,86.0,350.0,0.0,86.0,15.0,23387.0,86.0,1.0,4.0,7.0,7.0,4.0,0.0,type_player
16799,64673,6.0,42866.0,20.0,350.0,1.0,4.0,3.0,1589.0,4.0,1.0,1.0,3.0,0.0,2.0,0.0,type_disruptor
16800,64677,8.0,0.0,18.0,350.0,0.0,18.0,1.0,15183.0,18.0,1.0,3.0,7.0,3.0,2.0,0.0,type_player


In [13]:
def get_elemenets(elements, game_elem):
    base = []
    for i in elements :
        for j in game_elem :
            if i in game_elem[j]:
                base.append(j)
    return(list(set(base)))

In [30]:
def custom_recommender(df_p, user_id, player_type = 'type_player'):
    # select the portion of the dataset that we will work with based on the player type
    df_p = df_p.loc[df_p["Player_Type"] == player_type]
    
    # Build a dataset format to be able to construct the similarity matrix 
    final_df = df_p.drop(["Player_Type"],axis=1).melt(id_vars="id")

    # Rename columns 
    final_df.columns = ["user", "item", "rating"]

    # Normalize the raatings
    normalized_arr = preprocessing.normalize([final_df['rating']])
    original_final = final_df.copy() #Save a original copy
    final_df['rating'] = normalized_arr[0]
    
    # Specifiy the reader scale
    reader = Reader(rating_scale=(0, 1))
    
    # Loads Pandas dataframe
    data = Dataset.load_from_df(final_df[["user", "item", "rating"]], reader)
    trainingSet = data.build_full_trainset()

    # To use item-based cosine similarity
    
    # Build the algo training 
    algo = SVD(n_epochs = 15, lr_all=0.001, reg_all=0.2, random_state=0)
    # Fit the model
    algo.fit(trainingSet)
    
    # List the recommandation for the selected user 
    recommandation = {"Subject" :[], "recommanded" : []}
    for element in set(original_final['item']) :
        prediction = algo.predict(user_id, element)
        recommandation["Subject"].append(element)
        recommandation["recommanded"].append(prediction.est)

    rec = pd.DataFrame(recommandation)  
    rec = rec.fillna(0)
    rec = rec.sort_values(by=['recommanded'])  
    player = list((df_p.loc[df_p['id'] == user_id])["Player_Type"])
    return(rec,player[0])


In [31]:
recommandations, player = custom_recommender(df_p,1700,"type_disruptor")
try :
   print('Recommandation Results:')
   print(" * Top recommanded subject :",str(recommandations.iloc[-1]['Subject']))
   subject_r = str(recommandations.iloc[-1]['Subject'])
   print(" * Least recommanded subject :",str(recommandations.iloc[0]['Subject']))
except :
   pass
print("\nPlayer_type for this user :", player)

#Only the top 5 elements will be considered
print("\nRecommended game elements for this user to focus on:", get_elemenets(recommandations[-5:]["Subject"], game_elem))

recommandations[-5:]

Recommandation Results:
 * Top recommanded subject : id Quizo
 * Least recommanded subject : correct Question

Player_type for this user : type_disruptor

Recommended game elements for this user to focus on: ['Puzzle', 'Trophy', 'levels', 'Learning']


Unnamed: 0,Subject,recommanded
12,level_id,0.0
13,donuts,0.0
2,candy,6.5e-05
7,total_Time_Education,0.003269
11,id Quizo,0.004803


In [32]:
recommandations, player = custom_recommender(df_p,64670,"type_player")
try :
   print('Recommandation Results:')
   print(" * Top recommanded subject :",str(recommandations.iloc[-1]['Subject']))
   subject_r = str(recommandations.iloc[-1]['Subject'])
   print(" * Least recommanded subject :",str(recommandations.iloc[0]['Subject']))
except :
   pass
print("\nPlayer_type for this user :", player)

#Only the top 5 elements will be considered
print("\nRecommended game elements for this user to focus on:", get_elemenets(recommandations[-5:]["Subject"], game_elem))

recommandations[-5:] 

Recommandation Results:
 * Top recommanded subject : total_Time_Education
 * Least recommanded subject : correct Question

Player_type for this user : type_player

Recommended game elements for this user to focus on: ['Puzzle', 'Trophy', 'levels', 'Learning']


Unnamed: 0,Subject,recommanded
12,level_id,0.0
13,donuts,0.0
2,candy,1.9e-05
11,id Quizo,0.002398
7,total_Time_Education,0.004413


In [33]:
recommandations, player = custom_recommender(df_p,1677,"type_achiever")
try :
   print('Recommandation Results:')
   print(" * Top recommanded subject :",str(recommandations.iloc[-1]['Subject']))
   subject_r = str(recommandations.iloc[-1]['Subject'])
   print(" * Least recommanded subject :",str(recommandations.iloc[0]['Subject']))
except :
   pass
print("\nPlayer_type for this user :", player)

#Only the top 5 elements will be considered
print("\nRecommended game elements for this user to focus on:", get_elemenets(recommandations[-5:]["Subject"], game_elem))

recommandations[-5:]

Recommandation Results:
 * Top recommanded subject : id Quizo
 * Least recommanded subject : Upgrade

Player_type for this user : type_achiever

Recommended game elements for this user to focus on: ['Learning', 'Trophy', 'Puzzle', 'LeaderBoard']


Unnamed: 0,Subject,recommanded
0,correct Question,0.000942
13,donuts,0.00095
2,candy,0.001282
7,total_Time_Education,0.005465
11,id Quizo,0.006158
