In [1]:
import pandas as pd

In [2]:
# Ref: https://www.kaggle.com/datasets/harshal19t/lastfm-dataset?resource=download
df = pd.read_csv('Last.fm_data.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,Username,Artist,Track,Album,Date,Time
0,0,Babs_05,Isobel Campbell,The Circus Is Leaving Town,Ballad of the Broken Seas,31 Jan 2021,23:36
1,1,Babs_05,Isobel Campbell,Dusty Wreath,Ballad of the Broken Seas,31 Jan 2021,23:32
2,2,Babs_05,Isobel Campbell,Honey Child What Can I Do?,Ballad of the Broken Seas,31 Jan 2021,23:28
3,3,Babs_05,Isobel Campbell,It's Hard To Kill A Bad Thing,Ballad of the Broken Seas,31 Jan 2021,23:25
4,4,Babs_05,Isobel Campbell,Saturday's Gone,Ballad of the Broken Seas,31 Jan 2021,23:21


In [3]:
dfp = df[['Username', 'Artist', 'Track']].groupby(['Username', 'Artist']).agg('count').reset_index()

In [4]:
dfp.head()

Unnamed: 0,Username,Artist,Track
0,Babs_05,"""Demons""",1
1,Babs_05,"""Weird Al"" Yankovic",7
2,Babs_05,#1 Garth Brooks Tribute Band,2
3,Babs_05,$uicideboy$,48
4,Babs_05,'Come From Away' Band,1


In [5]:
dfp.Track.describe()

count    51790.000000
mean         3.208206
std          7.052209
min          1.000000
25%          1.000000
50%          1.000000
75%          2.000000
max        367.000000
Name: Track, dtype: float64

# Using implicit feedback to recommend

In [6]:
import pandas as pd
import numpy as np
from lightfm import LightFM
from lightfm.data import Dataset
from lightfm.evaluation import precision_at_k

In [7]:
df

Unnamed: 0.1,Unnamed: 0,Username,Artist,Track,Album,Date,Time
0,0,Babs_05,Isobel Campbell,The Circus Is Leaving Town,Ballad of the Broken Seas,31 Jan 2021,23:36
1,1,Babs_05,Isobel Campbell,Dusty Wreath,Ballad of the Broken Seas,31 Jan 2021,23:32
2,2,Babs_05,Isobel Campbell,Honey Child What Can I Do?,Ballad of the Broken Seas,31 Jan 2021,23:28
3,3,Babs_05,Isobel Campbell,It's Hard To Kill A Bad Thing,Ballad of the Broken Seas,31 Jan 2021,23:25
4,4,Babs_05,Isobel Campbell,Saturday's Gone,Ballad of the Broken Seas,31 Jan 2021,23:21
...,...,...,...,...,...,...,...
166148,265624,Orlenay,Kelly Lee Owens,Arpeggi,Inner Song,30 Jan 2021,18:30
166149,265625,Orlenay,Caterina Barbieri,Bow of Perception,Ecstatic Computation,30 Jan 2021,18:19
166150,265626,Orlenay,Caterina Barbieri,Pinnacles of You,Ecstatic Computation,30 Jan 2021,18:13
166151,265627,Orlenay,Caterina Barbieri,Arrows of Time,Ecstatic Computation,30 Jan 2021,18:08


In [8]:
# Load Last.fm data
# Optional: filter very rare interactions (recommended)
dfpf = dfp[dfp['Track'] >= 10]

dfpf['confidence'] = np.log1p(dfpf['Track'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfpf['confidence'] = np.log1p(dfpf['Track'])


In [9]:
dfpf.head()

Unnamed: 0,Username,Artist,Track,confidence
3,Babs_05,$uicideboy$,48,3.89182
5,Babs_05,'Come From Away' Company,10,2.397895
14,Babs_05,100 gecs,12,2.564949
15,Babs_05,13 & God,25,3.258097
57,Babs_05,747,32,3.496508


In [10]:
dfpf.shape

(3639, 4)

In [11]:
dataset = Dataset()

dataset.fit(
    users=df['Username'].unique(),
    items=df['Artist'].unique()
)

In [12]:
interactions, weights = dataset.build_interactions(
    [(u, i, w) for u, i, w in zip(
        dfpf['Username'],
        dfpf['Artist'],
        dfpf['confidence']
    )]
)


In [13]:
from lightfm.cross_validation import random_train_test_split

train_interactions, test_interactions = random_train_test_split(
    interactions,
    test_percentage=0.2,
    random_state=42
)

train_weights, test_weights = random_train_test_split(
    weights, 
    test_percentage=0.2, 
    random_state=42)

In [14]:
model = LightFM(
    loss='warp',           # best for implicit ranking
    no_components=50,
    learning_rate=0.05,
    random_state=42
)

In [15]:
model.fit(
    train_interactions,
    sample_weight=train_weights,
    epochs=20,
    num_threads=1
)

<lightfm.lightfm.LightFM at 0x7ee38d6c7980>

In [16]:
precision = precision_at_k(
    model,
    test_interactions,
    train_interactions=train_interactions,
    k=10
).mean()

print(f"Precision@10: {precision:.4f}")


Precision@10: 0.2364


In [17]:
precision = precision_at_k(
    model,
    test_interactions,
    train_interactions=train_interactions,
    k=5
).mean()

print(f"Precision@10: {precision:.4f}")


Precision@10: 0.2545


In [18]:
user_map, _, item_map, _ = dataset.mapping()
inv_item_map = {v: k for k, v in item_map.items()}

def recommend_artists(model, user_id, n=10):
    user_internal_id = user_map[user_id]
    n_items = len(item_map)

    scores = model.predict(
        user_internal_id,
        np.arange(n_items)
    )

    top_items = np.argsort(-scores)[:n]

    return [inv_item_map[i] for i in top_items]

# Example
recommended_artists = recommend_artists(model, user_id='Babs_05', n=10)
print("Recommended artist IDs:", recommended_artists)

Recommended artist IDs: ['Casino Versus Japan', 'Weyes Blood', 'Ramones', 'The Prodigy', 'Phil Collins', 'Schiller', 'Oceansize', 'Labyrinth', 'Death', 'Tiamat']


In [19]:
dfp[dfp['Username'] == 'Babs_05'].sort_values('Track', ascending=False).to_csv('Test.csv')