In [None]:
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from sklearn.metrics.pairwise import cosine_similarity

# --- Step 1: Load Datasets ---
interaction_file = "piki_filtered.csv" 
metadata_file = "spotify_songs.csv"     

# Load interactions
df = pd.read_csv(interaction_file)
# Load song metadata
songs_meta = pd.read_csv(metadata_file)

print("Dataset Loaded Successfully")
print(df.head())

# --- Step 2: Build User-Item Sparse Matrix ---
user_codes, user_idx = pd.factorize(df["user_id"])
song_codes, song_idx = pd.factorize(df["song_id"])

user_item_matrix = csr_matrix(
    (df["liked"], (user_codes, song_codes)),
    shape=(len(user_idx), len(song_idx))
)

print("\nUser窶的tem Sparse Matrix Created")
print("Shape:", user_item_matrix.shape)

# --- Step 3: Compute Item-Item Similarity (Sparse) ---
item_similarity = cosine_similarity(user_item_matrix.T, dense_output=False)
print("\nItem窶的tem Similarity Matrix Computed")

# Helper: Map song_id to track_name, artist, album, release date
song_map = songs_meta.set_index("song_id")[["track_name", "track_artist", "track_album_name", "track_album_release_date"]].to_dict("index")

# --- User-level Recommendation ---
def recommend_for_user(user_id, n=10):
    try:
        if user_id not in user_idx:
            return f"User ID {user_id} not found in dataset."

        user_internal_id = np.where(user_idx == user_id)[0][0]
        user_vector = user_item_matrix[user_internal_id].toarray().flatten()
        liked_songs = np.where(user_vector > 0)[0]

        if len(liked_songs) == 0:
            return f"User {user_id} has not liked any songs yet."

        # Sum similarities of all liked songs
        sim_scores = item_similarity[liked_songs].sum(axis=0).A1
        sim_scores[liked_songs] = -1  # remove already liked songs
        sim_scores = sim_scores / len(liked_songs)  # normalize

        top_indices = np.argsort(-sim_scores)[:n]
        top_songs = song_idx[top_indices]

        recommendations = pd.DataFrame({
            "Song_ID": top_songs,
            "Track_Name": [song_map.get(sid, {}).get("track_name", "Unknown") for sid in top_songs],
            "Artist": [song_map.get(sid, {}).get("track_artist", "Unknown") for sid in top_songs],
            "Album_Name": [song_map.get(sid, {}).get("track_album_name", "Unknown") for sid in top_songs],
            "Album_Release_Date": [song_map.get(sid, {}).get("track_album_release_date", "Unknown") for sid in top_songs]
        })

        return recommendations.reset_index(drop=True)
    except Exception as e:
        return f"Error: {str(e)}"

# --- Example Usage ---
print("\nTop 10 personalized recommendations for user_id = 3721095:")
print(recommend_for_user(3721095, n=10))


Dataset Loaded Successfully
        timestamp  user_id  song_id  liked  personalized  spotify_popularity  \
0  19/6/2019 9:22  3721089    68077      1             1                  50   
1  19/6/2019 9:23  3721089    34803      1             1                  61   
2  19/6/2019 9:28  3721095    68077      1             1                  50   
3  19/6/2019 9:28  3721095     7778      1             1                  52   
4  19/6/2019 9:28  3721095  1750572      0             1                  78   

   treatment_group  
0               -1  
1               -1  
2               -1  
3               -1  
4               -1  

User窶的tem Sparse Matrix Created
Shape: (8548, 32833)

Item窶的tem Similarity Matrix Computed

Top 10 personalized recommendations for user_id = 3721095:
   Song_ID                                  Track_Name           Artist  \
0  4137222                    Until the Light Takes Us       Ghostemane   
1  1807582                             Salida 3 - Live      Mig

: 