# Importing necessary libraries

In [21]:
import pandas as pd
import numpy as np
from lightfm import LightFM

# Creating dummy datasets 

In [2]:
# create dummy dataset
data = {'user': ['u1','u1','u2','u2', 'u3', 'u3', 'u3'], 
        'item': ['i1', 'i3', 'i2', 'i3', 'i1', 'i4', 'i2'], 
        'r': [1,2,1,3,4,5,2]
       }
df = pd.DataFrame(data,  columns = ['user', 'item', 'r'])
df

Unnamed: 0,user,item,r
0,u1,i1,1
1,u1,i3,2
2,u2,i2,1
3,u2,i3,3
4,u3,i1,4
5,u3,i4,5
6,u3,i2,2


In [6]:
#dummy item features
data = {'user': ['u1','u2','u3'], 
        'f1': [1, 0, 1], 
        'f2': [1, 1, 1],
        'f3': [0, 0, 1],
        'loc': ['del', 'mum', 'del']
       }
features = pd.DataFrame(data,  columns = ['user', 'f1', 'f2', 'f3', 'loc'])
features

Unnamed: 0,user,f1,f2,f3,loc
0,u1,1,1,0,del
1,u2,0,1,0,mum
2,u3,1,1,1,del


# Creating user features

In [7]:
uf = []
col = ['f1']*len(features.f1.unique()) + ['f2']*len(features.f2.unique()) + ['f3']*len(features.f3.unique()) + ['loc']*len(features['loc'].unique())
unique_f1 = list(features.f1.unique()) + list(features.f2.unique()) + list(features.f3.unique()) + list(features['loc'].unique())
#print('f1:', unique_f1)
for x,y in zip(col, unique_f1):
    res = str(x)+ ":" +str(y)
    uf.append(res)
    print(res)


f1:1
f1:0
f2:1
f3:0
f3:1
loc:del
loc:mum


# Fitting the dataset

In [8]:
from lightfm.data import Dataset
# we call fit to supply userid, item id and user/item features
dataset1 = Dataset()
dataset1.fit(
        df['user'].unique(), # all the users
        df['item'].unique(), # all the items
        #user_features = ['f1:1', 'f1:0', 'f2:1', 'f2:0', 'f3:1', 'f3:0', 'loc:mum', 'loc:del']
        user_features = uf
)



In [9]:
# plugging in the interactions and their weights
(interactions, weights) = dataset1.build_interactions([(x[0], x[1], x[2]) for x in df.values ])


In [10]:

interactions.todense()



matrix([[1, 1, 0, 0],
        [0, 1, 1, 0],
        [1, 0, 1, 1]], dtype=int32)

In [11]:
weights.todense()

matrix([[1., 2., 0., 0.],
        [0., 3., 1., 0.],
        [4., 0., 2., 5.]], dtype=float32)

# Building user features

In [12]:
def feature_colon_value(my_list):
    """
    Takes as input a list and prepends the columns names to respective values in the list.
    For example: if my_list = [1,1,0,'del'],
    resultant output = ['f1:1', 'f2:1', 'f3:0', 'loc:del']
   
    """
    result = []
    ll = ['f1:','f2:', 'f3:', 'loc:']
    aa = my_list
    for x,y in zip(ll,aa):
        res = str(x) +""+ str(y)
        result.append(res)
    return result


In [13]:

ad_subset = features[["f1", 'f2','f3', 'loc']] 
ad_list = [list(x) for x in ad_subset.values]
feature_list = []
for item in ad_list:
    feature_list.append(feature_colon_value(item))
    print(feature_colon_value(item))
print(f'Final output: {feature_list}')    

['f1:1', 'f2:1', 'f3:0', 'loc:del']
['f1:0', 'f2:1', 'f3:0', 'loc:mum']
['f1:1', 'f2:1', 'f3:1', 'loc:del']
Final output: [['f1:1', 'f2:1', 'f3:0', 'loc:del'], ['f1:0', 'f2:1', 'f3:0', 'loc:mum'], ['f1:1', 'f2:1', 'f3:1', 'loc:del']]


In [15]:
user_tuple = list(zip(features.user, feature_list))
user_tuple


[('u1', ['f1:1', 'f2:1', 'f3:0', 'loc:del']),
 ('u2', ['f1:0', 'f2:1', 'f3:0', 'loc:mum']),
 ('u3', ['f1:1', 'f2:1', 'f3:1', 'loc:del'])]

In [16]:
user_features = dataset1.build_user_features(user_tuple, normalize= False)
user_features.todense()

matrix([[1., 0., 0., 1., 0., 1., 1., 0., 1., 0.],
        [0., 1., 0., 0., 1., 1., 1., 0., 0., 1.],
        [0., 0., 1., 1., 0., 1., 0., 1., 1., 0.]], dtype=float32)

In [17]:
user_features.todense() # rows are the users and columns are the user features : total 10 features. WHY 10 see below


matrix([[1., 0., 0., 1., 0., 1., 1., 0., 1., 0.],
        [0., 1., 0., 0., 1., 1., 1., 0., 0., 1.],
        [0., 0., 1., 1., 0., 1., 0., 1., 1., 0.]], dtype=float32)

In [18]:
user_id_map, user_feature_map, item_id_map, item_feature_map = dataset1.mapping()
dataset1.mapping()

({'u1': 0, 'u2': 1, 'u3': 2},
 {'u1': 0,
  'u2': 1,
  'u3': 2,
  'f1:1': 3,
  'f1:0': 4,
  'f2:1': 5,
  'f3:0': 6,
  'f3:1': 7,
  'loc:del': 8,
  'loc:mum': 9},
 {'i1': 0, 'i3': 1, 'i2': 2, 'i4': 3},
 {'i1': 0, 'i3': 1, 'i2': 2, 'i4': 3})

In [19]:
user_feature_map


{'u1': 0,
 'u2': 1,
 'u3': 2,
 'f1:1': 3,
 'f1:0': 4,
 'f2:1': 5,
 'f3:0': 6,
 'f3:1': 7,
 'loc:del': 8,
 'loc:mum': 9}

# Training the model

In [22]:

model = LightFM(loss='warp')
model.fit(interactions, # spase matrix representing whether user u and item i interacted
      user_features= user_features, # we have built the sparse matrix above
      sample_weight= weights, # spase matrix representing how much value to give to user u and item i inetraction: i.e ratings
      epochs=10)

<lightfm.lightfm.LightFM at 0x1a1f0a1b50>

# Evaluating the model

In [23]:
from lightfm.evaluation import auc_score
train_auc = auc_score(model,
                      interactions,
                      user_features=user_features
                     ).mean()
print('Hybrid training set AUC: %s' % train_auc)


Hybrid training set AUC: 0.8333333


# Prediction for KNOWN user

In [24]:

# predict for existing user
user_x = user_id_map['u3']
n_users, n_items = interactions.shape # no of users * no of items
model.predict(user_x, np.arange(n_items)) # means predict for all


array([-0.14289093, -0.64892906, -0.27624682, -0.23572367])

# Prediction for NEW user

In [25]:
# predict for new user
user_feature_list = ['f1:1', 'f2:1', 'f3:0', 'loc:del']

In [29]:
def format_newuser_input(user_feature_map, user_feature_list):
  #user_feature_map = user_feature_map  
  num_features = len(user_feature_list)
  normalised_val = 1.0 
  target_indices = []
  for feature in user_feature_list:
    try:
        target_indices.append(user_feature_map[feature])
    except KeyError:
        print("new user feature encountered '{}'".format(feature))
        pass
  #print("target indices: {}".format(target_indices))
  new_user_features = np.zeros(len(user_feature_map.keys()))
  for i in target_indices:
    new_user_features[i] = normalised_val
  new_user_features = sparse.csr_matrix(new_user_features)
  return(new_user_features)

In [30]:
new_user_features = format_newuser_input(user_feature_map, user_feature_list)

In [31]:
new_user_features.todense()


matrix([[0., 0., 0., 1., 0., 1., 1., 0., 1., 0.]])

In [28]:

model.predict(0, np.arange(n_items), user_features=new_user_features) # Here 0 means pick the first row of the user_features sparse matrix

array([-1.71516085, -2.16669893, -1.87196445, -1.85628653])

In [None]:
# TO-DO: Add item_features matrix!