# LAB 4 : Collaborative Filtering on Last.fm Dataset

In this lab, we use the Last.fm Dataset (https://www.last.fm/)  - 360K Users (http://ocelma.net/MusicRecommendationDataset/lastfm-360K.html) \
The dataset contains <user, artist, plays> tuples of 360,000 users.\
The data format of our database is: <em> user-mboxshal \t musicbrainz-artist-id \t artist-name \t plays. </em>

Using the implicit.datasets module to download last.fm locally






In [None]:
import pandas as pd
import numpy as np
from implicit.datasets.lastfm import get_lastfm

# artists and users are the string arrays labeling each row and column of the artist_user_plays matrix

# The artist_user_plays matrix is a scipy sparse matrix representing the number of times each artist was played by users, 
# each row represents different artists, and each column represents different users.

artists, users, artist_user_plays = get_lastfm()
print(artist_user_plays)

Weight matrix before training a model 
- Reducing the impact of users who have played the same artist thousands of times.
- Reducing the weight given to popular items


In [None]:
from implicit.nearest_neighbours import bm25_weight

artist_user = bm25_weight(artist_user_plays, K1=100, B=0.8)
print(artist_user)

Train an ALS model using implicit

In [None]:
from implicit.als import AlternatingLeastSquares

model = AlternatingLeastSquares(factors=64, regularization=0.05, alpha=2.0)
# Implicit expect user-item (user-artist)
user_artist = artist_user.T.tocsr()

model.fit(user_artist)

The result

In [None]:
# userid = 12345

# ids, scores = model.recommend(userid, user_artist[userid], N=10, filter_already_liked_items=False)
# # print(ids)
# df = pd.DataFrame({"artist": artists[ids], "score": scores, "already_liked": np.in1d(ids, user_artist[userid].indices),})

In [None]:
import gradio as gr

def music_recommend(userid):
    userid = int(userid)
    ids, scores = model.recommend(userid, user_artist[userid], N=10, filter_already_liked_items=True)
    df = pd.DataFrame({"artist": artists[ids], "score": scores})
    return df


demo = gr.Interface(
    fn=music_recommend,
    inputs="text",
    outputs="dataframe",
)
demo.launch()


In [None]:
# itemid = list(artists).index("maroon 5")
# print(f"Artist ID {itemid} : {artists[itemid]}")
# ids, scores = model.similar_items(itemid)

# # display the results using pandas for nicer formatting
# pd.DataFrame({"artist": artists[ids], "score": scores})

In [None]:
import gradio as gr

def music_similarity(artist_name):
    itemid = list(artists).index(artist_name)
    print(f"Artist ID {itemid} : {artists[itemid]}")
    ids, scores = model.similar_items(itemid)
    df = pd.DataFrame({"artist": artists[ids], "score": scores})
    return df


demo = gr.Interface(
    fn=music_similarity,
    inputs="text",
    outputs="dataframe",
)
demo.launch()