# Sample Dataset: User-song interactions

In [24]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

data = {
    "user_id": [1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5],
    "song_id": [101, 102, 103, 101, 104, 105, 102, 106, 107, 103, 104, 108, 105, 107, 109],
    "genre": ["Pop", "Rock", "HipHop", "Pop", "Jazz", "Classical", "Rock", "Blues", "Pop",
              "HipHop", "Jazz", "R&B", "Classical", "Pop", "Rock"],
    "rating": [5, 4, 3, 5, 3, 4, 4, 3, 5, 2, 5, 4, 3, 4, 5]
}

df = pd.DataFrame(data)
df

Unnamed: 0,user_id,song_id,genre,rating
0,1,101,Pop,5
1,1,102,Rock,4
2,1,103,HipHop,3
3,2,101,Pop,5
4,2,104,Jazz,3
5,2,105,Classical,4
6,3,102,Rock,4
7,3,106,Blues,3
8,3,107,Pop,5
9,4,103,HipHop,2


# Step 1: Create User-Item Matrix

In [25]:
user_item_matrix = df.pivot_table(index="user_id", columns="song_id", values="rating", fill_value=0)
user_item_matrix

song_id,101,102,103,104,105,106,107,108,109
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,5.0,4.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0
2,5.0,0.0,0.0,3.0,4.0,0.0,0.0,0.0,0.0
3,0.0,4.0,0.0,0.0,0.0,3.0,5.0,0.0,0.0
4,0.0,0.0,2.0,5.0,0.0,0.0,0.0,4.0,0.0
5,0.0,0.0,0.0,0.0,3.0,0.0,4.0,0.0,5.0


# Step 2: Compute User Similarity using Cosine Similarity

In [26]:
user_similarity = cosine_similarity(user_item_matrix)
user_similarity

array([[1.        , 0.5       , 0.32      , 0.12649111, 0.        ],
       [0.5       , 1.        , 0.        , 0.31622777, 0.24      ],
       [0.32      , 0.        , 1.        , 0.        , 0.4       ],
       [0.12649111, 0.31622777, 0.        , 1.        , 0.        ],
       [0.        , 0.24      , 0.4       , 0.        , 1.        ]])

In [27]:
# Convert to DataFrame for easy manipulation
user_similarity_df = pd.DataFrame(user_similarity, index=user_item_matrix.index, columns=user_item_matrix.index)
user_similarity_df

user_id,1,2,3,4,5
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,1.0,0.5,0.32,0.126491,0.0
2,0.5,1.0,0.0,0.316228,0.24
3,0.32,0.0,1.0,0.0,0.4
4,0.126491,0.316228,0.0,1.0,0.0
5,0.0,0.24,0.4,0.0,1.0


# Function to get song recommendations based on similar users


In [45]:
def recommend_songs(user_id, user_item_matrix, num_recommendations=5):
    if user_id not in user_item_matrix.index:
        raise ValueError(f"User {user_id} not found in dataset")

    # Find most similar user (excluding self)
    similar_users = user_similarity_df[user_id].drop(user_id).sort_values(ascending=False)
    most_similar_user = similar_users.index[0]

    # Get songs rated by the most similar user but not by the target user
    similar_user_ratings = user_item_matrix.loc[most_similar_user]
    target_user_ratings = user_item_matrix.loc[user_id]

    # Find songs the similar user liked that the target user hasn't rated
    # similar_user_ratings value van be changed from 1-5
    unseen_songs = similar_user_ratings[(target_user_ratings == 0) & (similar_user_ratings > 3)].index

    # Recommend songs based on highest ratings
    recommended_songs = similar_user_ratings[unseen_songs].sort_values(ascending=False).head(num_recommendations)

    return recommended_songs.index.tolist()


# Example usage


In [46]:
user_to_recommend = 5
recommended_songs = recommend_songs(user_to_recommend, user_item_matrix)

print(f"Recommended songs for User {user_to_recommend}: {recommended_songs}")


Recommended songs for User 5: [102]
