In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/cellphones-recommendations/cellphones ratings.csv
/kaggle/input/cellphones-recommendations/cellphones data.csv
/kaggle/input/cellphones-recommendations/cellphones users.csv


In [2]:
!pip install lightfm

Collecting lightfm
  Downloading lightfm-1.17.tar.gz (316 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m316.4/316.4 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: lightfm
  Building wheel for lightfm (setup.py) ... [?25l[?25hdone
  Created wheel for lightfm: filename=lightfm-1.17-cp310-cp310-linux_x86_64.whl size=808329 sha256=eb68f90279d1901433a7e56cb21a97a2ee471a7359aa91fc4c8f32e1c455b7a3
  Stored in directory: /root/.cache/pip/wheels/4f/9b/7e/0b256f2168511d8fa4dae4fae0200fdbd729eb424a912ad636
Successfully built lightfm
Installing collected packages: lightfm
Successfully installed lightfm-1.17


# Import Libraries

In [3]:
import os
import random
import numpy as np
import pandas as pd
from scipy import sparse

import lightfm
from lightfm import LightFM, cross_validation
from lightfm.evaluation import precision_at_k, auc_score
from sklearn.metrics.pairwise import cosine_similarity
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# Read Data

In [4]:
import pandas as pd
import random

df_playlist = pd.read_csv('/kaggle/input/cellphones-recommendations/cellphones ratings.csv', 
                          on_bad_lines='skip',  # Replaces error_bad_lines and warn_bad_lines
                          skiprows=lambda i: i > 0 and random.random() > 0.50)  # Corrected random condition

df_playlist

Unnamed: 0,user_id,cellphone_id,rating
0,0,30,1
1,0,5,3
2,0,9,3
3,0,23,2
4,0,22,1
...,...,...,...
484,257,20,8
485,258,22,9
486,258,29,6
487,258,26,7


In [5]:
df_playlist.head()

Unnamed: 0,user_id,cellphone_id,rating
0,0,30,1
1,0,5,3
2,0,9,3
3,0,23,2
4,0,22,1


In [6]:
df_playlist.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 489 entries, 0 to 488
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype
---  ------        --------------  -----
 0   user_id       489 non-null    int64
 1   cellphone_id  489 non-null    int64
 2   rating        489 non-null    int64
dtypes: int64(3)
memory usage: 11.6 KB


# Data preprocessing

In [7]:
df_playlist =df_playlist.groupby('cellphone_id').filter(lambda x: len(x)>=4)
df_playlist

Unnamed: 0,user_id,cellphone_id,rating
0,0,30,1
1,0,5,3
2,0,9,3
3,0,23,2
4,0,22,1
...,...,...,...
484,257,20,8
485,258,22,9
486,258,29,6
487,258,26,7


In [8]:
df_playlist = df_playlist[df_playlist.groupby('user_id').cellphone_id.transform('nunique')>=4]
df_playlist

Unnamed: 0,user_id,cellphone_id,rating
0,0,30,1
1,0,5,3
2,0,9,3
3,0,23,2
4,0,22,1
...,...,...,...
484,257,20,8
485,258,22,9
486,258,29,6
487,258,26,7


# Define functions

In [9]:
def create_interaction_matrix(df,user_col, item_col, rating_col, norm= False, threshold = None):
    '''
    Function to create an interaction matrix dataframe from transactional type interactions
    Required Input -
        - df = Pandas DataFrame containing user-item interactions
        - user_col = column name containing user's identifier
        - item_col = column name containing item's identifier
        - rating col = column name containing user feedback on interaction with a given item
        - norm (optional) = True if a normalization of ratings is needed
        - threshold (required if norm = True) = value above which the rating is favorable
    Expected output - 
        - Pandas dataframe with user-item interactions ready to be fed in a recommendation algorithm
    '''
    interactions = df.groupby([user_col, item_col])[rating_col] \
            .sum().unstack().reset_index(). \
            fillna(0).set_index(user_col)
    if norm:
        interactions = interactions.applymap(lambda x: 1 if x > threshold else 0)
    return interactions

In [10]:
def create_user_dict(interactions):
    '''
    Function to create a user dictionary based on their index and number in interaction dataset
    Required Input - 
        interactions - dataset create by create_interaction_matrix
    Expected Output -
        user_dict - Dictionary type output containing interaction_index as key and user_id as value
    '''
    user_id = list(interactions.index)
    user_dict = {}
    counter = 0 
    for i in user_id:
        user_dict[i] = counter
        counter += 1
    return user_dict

In [11]:
def create_item_dict(df,id_col,name_col):
    '''
    Function to create an item dictionary based on their item_id and item name
    Required Input - 
        - df = Pandas dataframe with Item information
        - id_col = Column name containing unique identifier for an item
        - name_col = Column name containing name of the item
    Expected Output -
        item_dict = Dictionary type output containing item_id as key and item_name as value
    '''
    item_dict ={}
    for i in range(df.shape[0]):
        item_dict[(df.loc[i,id_col])] = df.loc[i,name_col]
    return item_dict


In [12]:
def runMF(interactions, n_components=30, loss='warp', k=15, epoch=30,n_jobs = 4):
    '''
    Function to run matrix-factorization algorithm
    Required Input -
        - interactions = dataset create by create_interaction_matrix
        - n_components = number of embeddings you want to create to define Item and user
        - loss = loss function other options are logistic, brp
        - epoch = number of epochs to run 
        - n_jobs = number of cores used for execution 
    Expected Output  -
        Model - Trained model
    '''
    
    #uncommented for train test split
#     x = sparse.csr_matrix(interactions.values)
    model = LightFM(no_components= n_components, loss=loss,k=k)
    model.fit(x,epochs=epoch,num_threads = n_jobs)
    return model

In [13]:
def sample_recommendation_user(model, interactions, user_id, user_dict, 
                               item_dict,threshold = 0,nrec_items = 10, show = True):
    '''
    Function to produce user recommendations
    Required Input - 
        - model = Trained matrix factorization model
        - interactions = dataset used for training the model
        - user_id = user ID for which we need to generate recommendation
        - user_dict = Dictionary type input containing interaction_index as key and user_id as value
        - item_dict = Dictionary type input containing item_id as key and item_name as value
        - threshold = value above which the rating is favorable in new interaction matrix
        - nrec_items = Number of output recommendation needed
    Expected Output - 
        - Prints list of items the given user has already bought
        - Prints list of N recommended items  which user hopefully will be interested in
    '''
    n_users, n_items = interactions.shape
    user_x = user_dict[user_id]
    scores = pd.Series(model.predict(user_x,np.arange(n_items)))
    scores.index = interactions.columns
    scores = list(pd.Series(scores.sort_values(ascending=False).index))
    
    known_items = list(pd.Series(interactions.loc[user_id,:] \
                                 [interactions.loc[user_id,:] > threshold].index) \
                       .sort_values(ascending=False))
    
    scores = [x for x in scores if x not in known_items]
    return_score_list = scores[0:nrec_items]
    known_items = list(pd.Series(known_items).apply(lambda x: item_dict[x]))
    scores = list(pd.Series(return_score_list).apply(lambda x: item_dict[x]))
    if show == True:
        print("Known Likes:")
        counter = 1
        for i in known_items:
            print(str(counter) + '- ' + i)
            counter+=1

        print("\n Recommended Items:")
        counter = 1
        for i in scores:
            print(str(counter) + '- ' + i)
            counter+=1
    return return_score_list

# Create model inputs

In [14]:
interactions = create_interaction_matrix(df = df_playlist, user_col = "user_id", item_col = 'cellphone_id', rating_col = 'rating', norm= False, threshold = None)
interactions.head()

cellphone_id,0,1,2,3,4,5,6,7,8,9,...,23,24,25,26,27,28,29,30,31,32
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,3.0,...,2.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
6,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,8.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,10.0
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,8.0,0.0,8.0,0.0,6.0,0.0,0.0,9.0
10,0.0,0.0,0.0,9.0,9.0,3.0,7.0,0.0,0.0,0.0,...,9.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,0.0
12,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,0.0,0.0,...,6.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,5.0


In [15]:
user_dict = create_user_dict(interactions=interactions)
print(user_dict)

{0: 0, 6: 1, 8: 2, 10: 3, 12: 4, 16: 5, 24: 6, 26: 7, 27: 8, 28: 9, 29: 10, 30: 11, 32: 12, 33: 13, 35: 14, 36: 15, 37: 16, 38: 17, 52: 18, 53: 19, 60: 20, 79: 21, 84: 22, 85: 23, 91: 24, 98: 25, 99: 26, 100: 27, 104: 28, 106: 29, 110: 30, 111: 31, 112: 32, 113: 33, 116: 34, 119: 35, 120: 36, 123: 37, 124: 38, 126: 39, 129: 40, 137: 41, 140: 42, 143: 43, 145: 44, 148: 45, 152: 46, 154: 47, 156: 48, 160: 49, 162: 50, 164: 51, 169: 52, 178: 53, 183: 54, 194: 55, 200: 56, 203: 57, 204: 58, 208: 59, 211: 60, 226: 61, 230: 62, 231: 63, 234: 64, 235: 65, 236: 66, 237: 67, 238: 68, 240: 69, 242: 70, 243: 71, 244: 72, 245: 73, 246: 74, 251: 75, 252: 76, 255: 77, 256: 78, 257: 79, 258: 80}


In [16]:
df_playlist

Unnamed: 0,user_id,cellphone_id,rating
0,0,30,1
1,0,5,3
2,0,9,3
3,0,23,2
4,0,22,1
...,...,...,...
484,257,20,8
485,258,22,9
486,258,29,6
487,258,26,7


In [17]:
df_item=pd.read_csv('/kaggle/input/cellphones-recommendations/cellphones data.csv')

In [18]:
item_dict = create_item_dict(df=df_item, id_col='cellphone_id', name_col='model')
print(item_dict)

{0: 'iPhone SE (2022)', 1: 'iPhone 13 Mini', 2: 'iPhone 13', 3: 'iPhone 13 Pro', 4: 'iPhone 13 Pro Max', 5: 'iPhone XR', 6: 'Zenfone 8', 7: 'Galaxy A13', 8: 'Galaxy A32', 9: 'Galaxy A53', 10: 'Galaxy S22', 11: 'Galaxy S22 Plus', 12: 'Galaxy S22 Ultra', 13: 'Galaxy Z Flip 3', 14: 'Galaxy Z Fold 3', 15: 'Pixel 6 \xa0', 16: 'Pixel 6a', 17: 'Pixel 6 Pro\xa0', 18: 'Nord N20', 19: 'Nord 2T', 20: '10 Pro', 21: '10T', 22: 'Find X5 Pro', 23: 'X80 Pro', 24: 'Redmi Note 11', 25: '11T Pro', 26: '12 Pro', 27: 'Poco F4', 28: 'Xperia Pro', 29: 'Moto G Stylus (2022)', 30: 'Moto G Play (2021)', 31: 'Moto G Pure', 32: 'Moto G Power (2022)'}


In [19]:
x = sparse.csr_matrix(interactions.values)
train, test = lightfm.cross_validation.random_train_test_split(x, test_percentage=0.2, random_state=None)

## Build a model

In [20]:
%time
model = runMF(interactions = train,
                 n_components = 30,
                 loss = 'warp',
                 k = 15,
                 epoch = 30,
                 n_jobs = 4)

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 6.91 µs


In [21]:
train_auc = auc_score(model, train, num_threads=4).mean()
print('Train AUC: %s' % train_auc)

Train AUC: 0.9801597


In [22]:
test_auc = auc_score(model, test, train_interactions=train, num_threads=4).mean()
print('Test AUC: %s' % test_auc)

Test AUC: 1.0


In [23]:
train_precision = precision_at_k(model, train, k=10).mean()
test_precision = precision_at_k(model, test, k=10, train_interactions=train).mean()

In [24]:
print('train Precision %.2f, test Precision %.2f.' % (train_precision, test_precision))

train Precision 0.43, test Precision 0.16.


## Check available user IDs

In [25]:
print(user_dict.keys())  # Check available user IDs

dict_keys([0, 6, 8, 10, 12, 16, 24, 26, 27, 28, 29, 30, 32, 33, 35, 36, 37, 38, 52, 53, 60, 79, 84, 85, 91, 98, 99, 100, 104, 106, 110, 111, 112, 113, 116, 119, 120, 123, 124, 126, 129, 137, 140, 143, 145, 148, 152, 154, 156, 160, 162, 164, 169, 178, 183, 194, 200, 203, 204, 208, 211, 226, 230, 231, 234, 235, 236, 237, 238, 240, 242, 243, 244, 245, 246, 251, 252, 255, 256, 257, 258])


# Examples

In [26]:
rec_list = sample_recommendation_user(model = model, 
                                      interactions = interactions, 
                                      user_id = 6, 
                                      user_dict = user_dict,
                                      item_dict = item_dict, 
                                      threshold = 0,
                                      nrec_items = 10,
                                      show = True)

Known Likes:
1- Moto G Power (2022)
2- Poco F4
3- X80 Pro
4- Nord 2T
5- Galaxy Z Flip 3
6- iPhone 13 Mini

 Recommended Items:
1- Zenfone 8
2- Pixel 6a
3- 10T
4- 11T Pro
5- Galaxy S22 Plus
6- iPhone 13 Pro
7- Galaxy A53
8- Moto G Pure
9- iPhone 13
10- Pixel 6  


In [27]:
rec_list = sample_recommendation_user(model = model, 
                                      interactions = interactions, 
                                      user_id = 8, 
                                      user_dict = user_dict,
                                      item_dict = item_dict, 
                                      threshold = 0,
                                      nrec_items = 10,
                                      show = True)

Known Likes:
1- Moto G Power (2022)
2- Moto G Stylus (2022)
3- Poco F4
4- 11T Pro
5- Find X5 Pro
6- 10T
7- Galaxy S22 Plus

 Recommended Items:
1- X80 Pro
2- Galaxy Z Fold 3
3- Pixel 6 Pro 
4- Nord 2T
5- iPhone 13 Pro Max
6- Pixel 6  
7- Moto G Pure
8- Moto G Play (2021)
9- iPhone 13
10- Galaxy S22 Ultra
