In [1]:
import pandas as pd
import os
from scipy.sparse import csr_matrix
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import seaborn as sns
from lightfm.cross_validation import random_train_test_split
from lightfm.evaluation import auc_score, precision_at_k, recall_at_k
from lightfm import LightFM
from skopt import forest_minimize

In [2]:
interaction_df = pd.read_csv("data/azure/peoTV_user_ratings.csv")

In [3]:
interaction_df.drop("Unnamed: 0",axis=1, inplace=True)

In [4]:
interaction_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1142 entries, 0 to 1141
Data columns (total 3 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   ACCOUNT_NUM.hash  1142 non-null   object 
 1   package           1142 non-null   object 
 2   ratings           1142 non-null   float64
dtypes: float64(1), object(2)
memory usage: 26.9+ KB


#### Interaction Matrix

In [5]:
interaction_matrix = pd.pivot_table(interaction_df, index='ACCOUNT_NUM.hash', columns='package', values='ratings')

# fill missing values with 0
interaction_matrix = interaction_matrix.fillna(0)


In [6]:
interaction_matrix

package,DOUBLEPROMO,LOTUS,NEW_SLT_STAFF_PACKAGE,NEW_YEAR_TREAT,PEO_ENTERTAINMENT,PEO_FAMILY,PEO_GOLD,PEO_PLATINUM,PEO_RELIGIOUS_PACKAGE,PEO_SILVER,PEO_SILVER_FTTH,PEO_SILVER_PLUS,PEO_TITANIUM,PEO_UNNATHAM,PEO_UTHAYAM,PRANAMA,SLTStaff_PEO_SILVER,SLT_STAFF_PEO_UNNATHAM_FTTH,SLT_STAFF_TRIPLE_PLAY
ACCOUNT_NUM.hash,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
0009e7e4d940c2a539b89342af07e7f1,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.21875,0.0,0.00,0.0,0.0,0.00000,0.0,0.0,0.0,0.0
00126426420cdb4b1fe951ce5bb3c05a,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.73750,0.0,0.00,0.0,0.0,0.00000,0.0,0.0,0.0,0.0
006ff446f4e8c78e943f8ea7ee9f7388,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.71250,0.0,0.00,0.0,0.0,0.00000,0.0,0.0,0.0,0.0
008bca99f0cccb5d07d4f03744709cf8,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.23125,0.0,0.00,0.0,0.0,0.00000,0.0,0.0,0.0,0.0
009ef2d25878cecee8d2c8115d26f4ab,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.00,0.0,0.0,0.89375,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ff6a9ebde4f19c42c852aa679c877b41,0.0,0.0,0.5625,0.0,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.00,0.0,0.0,0.00000,0.0,0.0,0.0,0.0
ff98ca51d1c83549f22f957b7de68815,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.00,0.0,0.0,0.38750,0.0,0.0,0.0,0.0
ffaa8955dcd8a0af7d148c10fd447fcc,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.00,0.0,0.0,0.00000,0.0,0.0,0.0,0.0
ffaaa3f5d45ce8c8553efbcfa7e6ce00,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.95,0.0,0.0,0.00000,0.0,0.0,0.0,0.0


In [7]:
user_id = list(interaction_matrix.index)
user_dict = {}
counter = 0 
for i in user_id:
    user_dict[i] = counter
    counter += 1

In [8]:
len(user_dict)

1142

In [9]:
item_id = interaction_df.package.to_list()
item_dict = {}
counter = 0 
for i in item_id:
    item_dict[i] = counter
    counter += 1

In [10]:
item_dict

{'PEO_ENTERTAINMENT': 986,
 'PEO_SILVER': 1141,
 'PEO_UTHAYAM': 1114,
 'PEO_SILVER_PLUS': 1125,
 'PRANAMA': 1138,
 'PEO_UNNATHAM': 1088,
 'PEO_RELIGIOUS_PACKAGE': 1042,
 'PEO_PLATINUM': 698,
 'NEW_SLT_STAFF_PACKAGE': 1049,
 'PEO_TITANIUM': 939,
 'SLT_STAFF_TRIPLE_PLAY': 226,
 'NEW_YEAR_TREAT': 1098,
 'SLTStaff_PEO_SILVER': 775,
 'PEO_SILVER_FTTH': 774,
 'PEO_GOLD': 989,
 'DOUBLEPROMO': 1030,
 'LOTUS': 906,
 'SLT_STAFF_PEO_UNNATHAM_FTTH': 729,
 'PEO_FAMILY': 759}

In [13]:

# convert to csr matrix
interaction_csr = csr_matrix(interaction_matrix.values)
interaction_csr

<1142x19 sparse matrix of type '<class 'numpy.float64'>'
	with 948 stored elements in Compressed Sparse Row format>

### LightFM CF Recommender

In [14]:
model = LightFM(loss='warp',
                random_state=2016,
                learning_rate=0.90,
                no_components=150,
                user_alpha=0.000005)

In [15]:
model = model.fit(interaction_csr,
                  epochs=100,
                  num_threads=16, verbose=False)

In [19]:
def get_item(mydict,item):
    for k, v in mydict.items():
        if v == item:
            return k

In [20]:
def sample_recommendation_user(model, interactions, user_id, user_dict, 
                               item_dict,threshold = 0,nrec_items = 5, show = True):
    
    n_users, n_items = interactions.shape
    user_x = user_dict[user_id]
    scores = pd.Series(model.predict(user_x,np.arange(n_items), item_features=interaction_csr))
    scores.index = interactions.columns
    scores = list(pd.Series(scores.sort_values(ascending=False).index))
    
    known_items = list(pd.Series(interactions.loc[user_id,:] \
                                 [interactions.loc[user_id,:] > threshold].index).sort_values(ascending=False))
    
    scores = [x for x in scores if x not in known_items]
    return_score_list = scores[0:nrec_items]
    known_items = list(pd.Series(known_items).apply(lambda x: get_item(item_dict, item_dict[x])))
    scores = list(pd.Series(return_score_list).apply(lambda x:  get_item(item_dict, item_dict[x])))
    if show == True:
        print ("User: " + str(user_id))
        print("Known Likes:")
        counter = 1
        for i in known_items:
            print(str(counter) + '- ' + str(i))
            counter+=1

        print("\n Recommended Items:")
        counter = 1
        for i in scores:
            print(str(counter) + '- ' + str(i))
            counter+=1

In [21]:
sample_recommendation_user(model,interaction_matrix,"0009e7e4d940c2a539b89342af07e7f1",user_dict,item_dict)

User: 0009e7e4d940c2a539b89342af07e7f1
Known Likes:
1- PEO_SILVER

 Recommended Items:
1- PEO_GOLD
2- SLT_STAFF_TRIPLE_PLAY
3- PEO_UTHAYAM
4- LOTUS
5- PEO_ENTERTAINMENT
