In [1]:
import pandas as pd
import numpy as np
from lightfm import LightFM
from lightfm.data import Dataset
from lightfm.evaluation import precision_at_k

In [2]:
train_df = pd.read_csv("Lastfm-train.csv")
test_df  = pd.read_csv("Lastfm-test.csv")

In [3]:
train_df.head()

Unnamed: 0,Username,Artist,Track
0,Babs_05,New Order,89
1,Orlenay,Junior Boys,14
2,eartle,The Jimi Hendrix Experience,15
3,Knapster01,Pettersson und Findus,14
4,massdosage,A Wilhelm Scream,20


In [4]:
test_df.head()

Unnamed: 0,Username,Artist,Track
0,Babs_05,$uicideboy$,48
1,Babs_05,A Wilhelm Scream,20
2,Babs_05,Amalee,10
3,Babs_05,Amaranthe,27
4,Babs_05,Ashnikko,18


In [5]:
train_df['confidence'] = np.log1p(train_df['Track'])
test_df['confidence']  = np.log1p(test_df['Track'])

In [6]:
dataset = Dataset()

dataset.fit(
    users=pd.concat([train_df['Username'], test_df['Username']]).unique(),
    items=pd.concat([train_df['Artist'], test_df['Artist']]).unique()
)


In [7]:
train_interactions, train_weights = dataset.build_interactions(
    [(u, i, c) for u, i, c in zip(
        train_df['Username'],
        train_df['Artist'],
        train_df['confidence']
    )]
)

In [8]:
test_interactions, test_weights = dataset.build_interactions(
    [(u, i, c) for u, i, c in zip(
        test_df['Username'],
        test_df['Artist'],
        test_df['confidence']
    )]
)

# Train a model with one set of parameters

In [9]:
model = LightFM(
    loss='warp',          
    no_components=25,
    learning_rate=0.05,
    user_alpha=1e-6,
    item_alpha=1e-6,
    random_state=42
)

model.fit(
    train_interactions,
    sample_weight=train_weights,
    epochs=50,
    num_threads=2
)

<lightfm.lightfm.LightFM at 0x7dfafab58e60>

In [10]:
precision = precision_at_k(
    model,
    test_interactions,
    train_interactions=train_interactions,
    k=5
).mean()

print(f"Precision@10: {precision:.4f}")

Precision@10: 0.3273


In [11]:
user_map, _, item_map, _ = dataset.mapping()
inv_item_map = {v: k for k, v in item_map.items()}

def recommend_items(model, user_id, n=10):
    user_internal_id = user_map[user_id]
    scores = model.predict(
        user_internal_id,
        np.arange(len(item_map))
    )
    top_items = np.argsort(-scores)[:n]
    return [inv_item_map[i] for i in top_items]

# Example
print(recommend_items(model, user_id='Orlenay', n=5))

['Róisín Murphy', 'Dorian Electra', 'Miley Cyrus', 'Luke Sital-Singh', 'Purple Mountains']


In [12]:
def eval_overlap(model, username):
    user_artists = set(test_df[test_df['Username'] == username]['Artist'])
    reco_artists = set(recommend_items(model, user_id=username, n=10))
    print ('~~~~~~~~~~~~~~~~~~~~~~~~')
    #print ('user artists in test', user_artists)
    #print ('recommended artists by model', reco_artists)
    print ('common among both', user_artists.intersection(reco_artists))

In [13]:
for user in set(test_df['Username']):
    print (user, eval_overlap(model, user))

~~~~~~~~~~~~~~~~~~~~~~~~
common among both set()
lobsterclaw None
~~~~~~~~~~~~~~~~~~~~~~~~
common among both set()
mremond None
~~~~~~~~~~~~~~~~~~~~~~~~
common among both set()
massdosage None
~~~~~~~~~~~~~~~~~~~~~~~~
common among both set()
Babs_05 None
~~~~~~~~~~~~~~~~~~~~~~~~
common among both set()
franhale None
~~~~~~~~~~~~~~~~~~~~~~~~
common among both set()
Knapster01 None
~~~~~~~~~~~~~~~~~~~~~~~~
common among both set()
isaac None
~~~~~~~~~~~~~~~~~~~~~~~~
common among both set()
eartle None
~~~~~~~~~~~~~~~~~~~~~~~~
common among both set()
jajo None
~~~~~~~~~~~~~~~~~~~~~~~~
common among both set()
jonocole None
~~~~~~~~~~~~~~~~~~~~~~~~
common among both set()
Orlenay None


In [14]:
model2 = LightFM(
    loss='logistic',          
    no_components=25,
    learning_rate=0.05,
    user_alpha=1e-6,
    item_alpha=1e-6,
    random_state=42
)

model2.fit(
    train_interactions,
    sample_weight=train_weights,
    epochs=50,
    num_threads=2
)

<lightfm.lightfm.LightFM at 0x7dfafab5a960>

In [15]:
precision = precision_at_k(
    model2,
    test_interactions,
    train_interactions=train_interactions,
    k=5
).mean()

print(f"Precision@10: {precision:.4f}")

Precision@10: 0.6909


In [16]:
for user in set(test_df['Username']):
    print (user, eval_overlap(model2, user))

~~~~~~~~~~~~~~~~~~~~~~~~
common among both set()
lobsterclaw None
~~~~~~~~~~~~~~~~~~~~~~~~
common among both set()
mremond None
~~~~~~~~~~~~~~~~~~~~~~~~
common among both {'MF DOOM', 'Jellyfish'}
massdosage None
~~~~~~~~~~~~~~~~~~~~~~~~
common among both set()
Babs_05 None
~~~~~~~~~~~~~~~~~~~~~~~~
common among both {'Purple Mountains'}
franhale None
~~~~~~~~~~~~~~~~~~~~~~~~
common among both {'Purple Mountains', 'Róisín Murphy'}
Knapster01 None
~~~~~~~~~~~~~~~~~~~~~~~~
common among both set()
isaac None
~~~~~~~~~~~~~~~~~~~~~~~~
common among both {'Purple Mountains', 'James Blake', 'MF DOOM'}
eartle None
~~~~~~~~~~~~~~~~~~~~~~~~
common among both set()
jajo None
~~~~~~~~~~~~~~~~~~~~~~~~
common among both {'Luke Sital-Singh'}
jonocole None
~~~~~~~~~~~~~~~~~~~~~~~~
common among both {'James Blake'}
Orlenay None
