<a href="https://colab.research.google.com/github/dhruthick/cse256project/blob/main/recommendation/lightfm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [16]:
!pip install lightfm

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting lightfm
  Downloading lightfm-1.17.tar.gz (316 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m316.4/316.4 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: lightfm
  Building wheel for lightfm (setup.py) ... [?25l[?25hdone
  Created wheel for lightfm: filename=lightfm-1.17-cp310-cp310-linux_x86_64.whl size=879183 sha256=92cdeec49c919ab5b92ef9da25fe741c169e320c199bbd3e24484e93b140eec1
  Stored in directory: /root/.cache/pip/wheels/4f/9b/7e/0b256f2168511d8fa4dae4fae0200fdbd729eb424a912ad636
Successfully built lightfm
Installing collected packages: lightfm
Successfully installed lightfm-1.17


In [30]:
import pandas as pd
import numpy as np
import math

import scipy.sparse as sp
from tqdm import tqdm

from lightfm import LightFM

In [19]:
data_path='/content/drive/MyDrive/cse256/project/data/'
all_interactions=pd.read_csv(data_path+'all_interactions.csv')
interactions_train=pd.read_csv(data_path+'interactions_train.csv')
interactions_val=pd.read_csv(data_path+'interactions_val.csv')

In [20]:
num_playlists=len(np.unique(interactions_train.gen_pid))
num_tracks=len(np.unique(all_interactions.tid))

In [21]:
X_train=sp.coo_matrix(
    (np.ones(len(interactions_train)), (interactions_train.gen_pid,interactions_train.tid)),
    shape=(num_playlists,num_tracks)
)

In [37]:
def get_scores(pid,N):
  all_tracks=np.unique(all_interactions.tid)
  scores=model.predict(user_ids=[pid for i in range(len(all_tracks))],
                item_ids=all_tracks)
  scores=[(scores[i],all_tracks[i]) for i in range(len(all_tracks))]
  scores.sort(reverse=True)
  return scores[:N]


In [40]:
def evaluate_playlist(pid,N):
  relevantTracks=set(interactions_val[interactions_val['gen_pid']==pid].tid.values)
  scores=get_scores(pid,N)
  recommendedTracks=set([t[1] for t in scores])
  rprc=len(recommendedTracks.intersection(relevantTracks))/len(relevantTracks)
  dcg=0
  for i in range(len(scores)):
    if scores[i][1] in relevantTracks:
      dcg+=math.log(2)/math.log(i+2)
  ndcg=dcg/len(relevantTracks)
  rec_click=int(N/10)+1
  for i in range(0,int(N/10)):
    recommendedTracks=set([t[1] for t in scores[i*10:(i*10+10)]])
    if len(recommendedTracks.intersection(relevantTracks))>0:
      rec_click=i+1
      break
  return rprc,ndcg,rec_click

In [47]:
def evaluate(k):
  val_pids=np.unique(interactions_val.gen_pid)
  rprcs,ndcgs,rec_clicks=[],[],[]
  for pid in tqdm(val_pids[:k]):
    rprc,ndcg,rec_click=evaluate_playlist(pid,N=500)
    rprcs.append(rprc)
    ndcgs.append(ndcg)
    rec_clicks.append(rec_click)
  print(f'\nAverage R-Precision: {np.average(rprcs)}')
  print(f'Average NDCG: {np.average(ndcgs)}')
  print(f'Average Recommendation Clicks: {np.average(rec_clicks)}')

In [46]:
model = LightFM(no_components=200, loss='warp', learning_rate=0.02, max_sampled=400, random_state=1, user_alpha=1e-05)
for i in range(5):
    model.fit_partial(X_train, epochs=5, verbose=True)
    evaluate(50)

Epoch: 100%|██████████| 5/5 [01:58<00:00, 23.70s/it]
100%|██████████| 50/50 [00:24<00:00,  2.06it/s]



Average R-Precision: 0.3181226316128693
Average NDCG: 0.060596290753668375
Average Recommendation Clicks: 22.12


Epoch: 100%|██████████| 5/5 [03:36<00:00, 43.38s/it]
100%|██████████| 50/50 [00:25<00:00,  1.97it/s]



Average R-Precision: 0.3834137716834809
Average NDCG: 0.07383883411282867
Average Recommendation Clicks: 16.6


Epoch: 100%|██████████| 5/5 [05:02<00:00, 60.47s/it]
100%|██████████| 50/50 [00:24<00:00,  2.03it/s]



Average R-Precision: 0.4074891241276788
Average NDCG: 0.07937240305828591
Average Recommendation Clicks: 16.48


Epoch: 100%|██████████| 5/5 [06:18<00:00, 75.72s/it]
100%|██████████| 50/50 [00:25<00:00,  1.97it/s]



Average R-Precision: 0.416872961616355
Average NDCG: 0.08001754731293055
Average Recommendation Clicks: 15.36


Epoch: 100%|██████████| 5/5 [07:21<00:00, 88.40s/it]
100%|██████████| 50/50 [00:24<00:00,  2.04it/s]


Average R-Precision: 0.4397930970364904
Average NDCG: 0.08318648939048401
Average Recommendation Clicks: 15.24





In [56]:
evaluate(7476)

100%|██████████| 7476/7476 [1:03:25<00:00,  1.96it/s]


Average R-Precision: 0.4342794348785031
Average NDCG: 0.08242533121734824
Average Recommendation Clicks: 14.194756554307116



