In [2]:
from implicit.als import AlternatingLeastSquares
import scipy.sparse as sparse
import pandas as pd



df = pd.read_csv('song_dataset.csv')

# renamed to fit LightFM
df.rename(columns={
    'user': 'user_id',
    'song': 'song_id',
    'play_count': 'play_count',
    'title': 'title',
    'release': 'album',
    'artist_name': 'artist',
    'year': 'year'
}, inplace=True)



# changing play count to int format 
df['play_count'] = pd.to_numeric(df['play_count'], errors='coerce')
df['play_count'] = df['play_count'].astype(int)


# grouping alike users and songs in int format.
df['user_id'] = pd.factorize(df['user_id'])[0]  
df['user_id'] = df['user_id'].astype(int)

df['song_id'] = pd.factorize(df['song_id'])[0]  
df['song_id'] = df['song_id'].astype(int)


#print(df['user_id'].nunique(), df['song_id'].nunique())



# for recommendation we put data into matrix form
user_item_matrix = sparse.coo_matrix((df['play_count'], (df['user_id'], df['song_id']))).tocsr() # has to be csr format for recommendation

# Initialize and train the ALS model
model = AlternatingLeastSquares(factors=50, regularization=0.01, iterations=20)
model.fit(user_item_matrix)




100%|██████████| 20/20 [00:04<00:00,  4.73it/s]


In [3]:

def recommend_songs(user_id, model, user_item_matrix, df, n_songs=1):
     # Get recommendations (item IDs and scores)
    item_ids, scores = model.recommend(user_id, user_item_matrix[user_id], N=n_songs)
    
    # Collect song titles and their corresponding scores
    songs = []
    for item_id, score in zip(item_ids, scores):
        song = df[df['song_id'] == item_id]
        if not song.empty:
            songs.append((song.iloc[0]['title'], score))
    return songs



recommended_songs = recommend_songs(0, model, user_item_matrix, df)
for title, score in recommended_songs:
    print(f"{title}: {score}")

A Beggar On A Beach Of Gold: 0.20915170013904572
