In [25]:
import pandas as pd
import numpy as np
import time 

from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings("ignore")

In [26]:
#Makes using jupyter notebook on laptops much easier
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

In [27]:
# Create sample user data
users = pd.DataFrame({
    'user_id': [1, 2, 3],
    'country': ['USA', 'UK', 'USA']
})

In [28]:
# Create sample song data
songs = pd.DataFrame({
    'song_id': [1, 2, 3, 4, 5],
    'title': ['Song A', 'Song B', 'Song C', 'Song D', 'Song E'],
    'genre': ['Pop', 'Rock', 'Pop', 'Rock', 'Electronic'],
    'artist': ['Artist A', 'Artist B', 'Artist C', 'Artist D', 'Artist E'],
    'duration': [3.5, 4.2, 3.8, 4.1, 5.2],
    'tempo': [120, 130, 125, 135, 140],
    'key': ['C', 'G', 'C', 'D', 'F']
})

In [29]:
# Create sample interaction data
interactions = pd.DataFrame({
    'user_id': [1, 1, 1, 2, 2, 3, 3],
    'song_id': [1, 2, 3, 1, 4, 2, 5],
    'listen_count': [5, 2, 3, 1, 4, 5, 2]
})

In [30]:
# Loop through different random seeds
random_state = int(time.time())

# Split data into training and test sets with a different random seed each time
train, test = train_test_split(interactions, test_size=0.2, random_state=random_state)
    
# Calculate song similarity matrix based on features
song_features = pd.concat([
        songs[['duration', 'tempo']],
        pd.get_dummies(songs['genre'], prefix='genre'),
        pd.get_dummies(songs['key'], prefix='key')
    ], axis=1)
song_similarity_matrix = cosine_similarity(song_features)

In [31]:
# Define function to recommend songs to a user
def recommend_songs(user_id, n=5):
    user_interactions = test[test['user_id'] == user_id]
    known_song_ids = user_interactions['song_id'].values
    known_song_idx = np.searchsorted(songs['song_id'].values, known_song_ids)
    scores = np.sum(song_similarity_matrix[known_song_idx], axis=0)
    top_song_idxs = np.argsort(-scores)[:n]
    recommended_songs = songs.loc[top_song_idxs]['title'].values
    return recommended_songs


In [32]:
# Loop through the user IDs and print the recommended songs for each user
for user_id in users['user_id']:
    print(f"Recommended songs for user {user_id}")
    print(recommend_songs(user_id))

Recommended songs for user 1
['Song A' 'Song C' 'Song D' 'Song B' 'Song E']
Recommended songs for user 2
['Song A' 'Song B' 'Song C' 'Song D' 'Song E']
Recommended songs for user 3
['Song E' 'Song B' 'Song D' 'Song C' 'Song A']
