In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from rankfm.rankfm import RankFM
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
import streamlit as st
from PIL import Image
import warnings

## Streamlit config

In [None]:
if 'total' not in st.session_state:
    st.session_state['total'] = 300

## Dataset Sourcing

In [94]:
#Load data
path = "data/up-selling/peotv/"

#Ratings
ratings = pd.read_csv(path+'azure/peoTV_user_ratings.csv')

try:
    ratings.drop(["Unnamed: 0","ratings"],axis=1,inplace=True)
except:
    pass

In [95]:
ITEM_COLUMN = "itemId"
USER_COULMN = "userId"
RATING_COLUMN = "rating"

In [96]:
ratings.columns = [USER_COULMN, ITEM_COLUMN]

ratings.sort_values(by=[USER_COULMN],inplace=True,ascending=False)
# ratings = ratings.drop_duplicates(keep="first").reset_index().drop("index", axis=1)

In [97]:
le = LabelEncoder()
ratings[ITEM_COLUMN+"_des"] = ratings[ITEM_COLUMN]
ratings[USER_COULMN+"_des"] = ratings[USER_COULMN]
ratings[USER_COULMN] = le.fit_transform(ratings[USER_COULMN])
ratings[ITEM_COLUMN] = le.fit_transform(ratings[ITEM_COLUMN])

r_dict = pd.Series(ratings[ITEM_COLUMN+"_des"].values,index=ratings[ITEM_COLUMN]).to_dict()
u_dict = pd.Series(ratings[USER_COULMN+"_des"].values,index=ratings[USER_COULMN]).to_dict()

final_r_dict = {}
final_u_dict = {}

for key,value in r_dict.items():
    if value not in final_r_dict.values():
        final_r_dict[key] = value

for key,value in u_dict.items():
    if value not in final_u_dict.values():
        final_u_dict[key] = value

In [98]:
np.random.seed(100)
interactions_train, interactions_valid = np.split(ratings[[USER_COULMN,ITEM_COLUMN]], [int(.7*len(ratings))])

In [99]:
interactions_train.shape

(111590, 2)

In [100]:
interactions_valid.shape

(47825, 2)

In [101]:
model = RankFM(factors=20, loss='warp', max_samples=20, alpha=0.01, sigma=0.1, learning_rate=0.1, learning_schedule='invscaling')
model.fit(interactions_train, epochs=20, verbose=True)


training epoch: 0
log likelihood: -41808.5703125

training epoch: 1
log likelihood: -41890.87109375

training epoch: 2
log likelihood: -40716.12109375

training epoch: 3
log likelihood: -39921.44921875

training epoch: 4
log likelihood: -39265.48046875

training epoch: 5
log likelihood: -38521.078125

training epoch: 6
log likelihood: -37690.7109375

training epoch: 7
log likelihood: -36763.23828125

training epoch: 8
log likelihood: -36118.96875

training epoch: 9
log likelihood: -35482.69921875

training epoch: 10
log likelihood: -34965.44921875

training epoch: 11
log likelihood: -34382.78125

training epoch: 12
log likelihood: -33807.19140625

training epoch: 13
log likelihood: -33149.41015625

training epoch: 14
log likelihood: -32553.060546875

training epoch: 15
log likelihood: -32005.150390625

training epoch: 16
log likelihood: -31491.1796875

training epoch: 17
log likelihood: -30991.439453125

training epoch: 18
log likelihood: -30549.9296875

training epoch: 19
log likelih

In [102]:
valid_scores = model.predict(interactions_valid, cold_start='nan')
valid_scores

array([ 0.7474053 , -0.64031535,  0.7474053 , ...,         nan,
               nan,         nan], dtype=float32)

In [103]:
valid_recs = model.recommend(ratings[USER_COULMN], n_items=10, filter_previous=True, cold_start='drop')

In [104]:
def remap_labels(col):
    for k,v in final_r_dict.items():
        if int(col) == k:
            return v

def remap_index(col):
    for k,v in final_u_dict.items():
        if int(col) == k:
            return v

In [105]:
valid_recs.drop_duplicates(inplace=True)
for col in list(valid_recs.columns):
    valid_recs[col] = valid_recs[col].apply(lambda x: remap_labels(x))
valid_recs.index = valid_recs.index.map(remap_index)

In [106]:
valid_recs.head()

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
ffff7fc0c266ac5b34f91de1ba4f3039,PEO_SILVER_FTTH,PEO_UTHAYAM,PRANAMA,PEO_UNNATHAM,PEO_TITANIUM_FTTH,PEO_SILVER_PLUS_FTTH,PEO_UNNATHAM_FTTH,PEO_SILVER_PLUS,PEO_TITANIUM,PEO_ENTERTAINMENT
fffd0b8541e9f62496092d2ea8609135,PEO_SILVER_FTTH,PEO_UTHAYAM,PRANAMA,PEO_GOLD,PEO_SILVER_PLUS,PEO_UNNATHAM_FTTH,PEO_ENTERTAINMENT,PEO_FAMILY_FTTH,PEO_SILVER_PLUS_FTTH,NEW_SLT_STAFF_PACKAGE_FTTH
fffaeeddaabbd2868c183d8884b2a98d,PEO_SILVER_FTTH,PEO_UTHAYAM,PEO_UNNATHAM,PEO_SILVER_PLUS,PRANAMA,PEO_SILVER_PLUS_FTTH,PEO_TITANIUM_FTTH,PEO_TITANIUM,PEO_ENTERTAINMENT,PEO_ENTERTAINMENT_FTTH
fff9148b195a844454cf5b07c2fccbcb,PEO_SILVER_FTTH,PEO_UTHAYAM,PEO_UNNATHAM,PRANAMA,PEO_SILVER_PLUS,PEO_SILVER_PLUS_FTTH,PEO_TITANIUM_FTTH,PEO_ENTERTAINMENT_FTTH,PEO_UNNATHAM_FTTH,PEO_TITANIUM
fff25ad7f2b1ccf36fa3573e5582f0bf,PEO_SILVER_FTTH,PEO_UTHAYAM,PEO_SILVER_PLUS,PEO_UNNATHAM,PEO_SILVER_PLUS_FTTH,PEO_ENTERTAINMENT_FTTH,PRANAMA,PEO_TITANIUM,PEO_UNNATHAM_FTTH,SLTStaff_PEO_SILVER


In [107]:
valid_recs.to_csv("data/up-selling/peotv/fm/rank_fm_top_10_recs.csv")