# Imports

In [None]:
# Imports
import os
import pandas as pd
import numpy as np
import sklearn as skl
from sklearn.metrics.pairwise import cosine_similarity

  from .autonotebook import tqdm as notebook_tqdm


# Functions

## Load and preprocess the data


In [2]:
def load_data(file_path):
    """
    Load the music data from a CSV file and preprocess it.
    
    Args:
    file_path (str): Path to the CSV file containing music data.
    
    Returns:
    pd.DataFrame: Preprocessed DataFrame containing music features.
    """
    # Read the CSV file
    df = pd.read_csv(file_path)
    
    # Select relevant features for recommendation
    features = ['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 
                'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']
    
    return df[['user_id', 'artistname', 'trackname'] + features]

## Calculate similarity between songs


In [3]:
def calculate_song_similarity(df):
    """
    Calculate cosine similarity between songs based on their features.
    
    Args:
    df (pd.DataFrame): DataFrame containing song features.
    
    Returns:
    np.ndarray: Similarity matrix between songs.
    """
    # Extract feature columns
    features = df.iloc[:, 3:].values
    
    # Calculate cosine similarity
    return cosine_similarity(features)


## Get top N similar songs

In [4]:
def get_similar_songs(df, song_index, similarity_matrix, n=5):
    """
    Get top N similar songs for a given song.
    
    Args:
    df (pd.DataFrame): DataFrame containing song information.
    song_index (int): Index of the target song.
    similarity_matrix (np.ndarray): Similarity matrix between songs.
    n (int): Number of similar songs to return.
    
    Returns:
    list: List of tuples containing similar song information (artist, track, similarity score).
    """
    # Get similarity scores for the target song
    similarity_scores = list(enumerate(similarity_matrix[song_index]))
    
    # Sort by similarity score in descending order
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    
    # Get top N similar songs (excluding the target song itself)
    similar_songs = similarity_scores[1:n+1]
    
    # Return artist name, track name, and similarity score
    return [(df.iloc[i]['artistname'], df.iloc[i]['trackname'], score) for i, score in similar_songs]


## Collaborative filtering recommendation

In [5]:
def collaborative_filtering_recommendation(df, target_user, similarity_matrix, n=5):
    """
    Generate song recommendations for a target user using collaborative filtering.
    
    Args:
    df (pd.DataFrame): DataFrame containing user-song interactions and song features.
    target_user (str): User ID for whom to generate recommendations.
    similarity_matrix (np.ndarray): Similarity matrix between songs.
    n (int): Number of recommendations to generate.
    
    Returns:
    list: List of tuples containing recommended song information (artist, track).
    """
    # Get songs the target user has interacted with
    user_songs = df[df['user_id'] == target_user].index.tolist()
    
    # Calculate average similarity to user's songs for all other songs
    avg_similarity = np.mean(similarity_matrix[user_songs], axis=0)
    
    # Sort songs by average similarity
    song_scores = list(enumerate(avg_similarity))
    song_scores = sorted(song_scores, key=lambda x: x[1], reverse=True)
    
    # Filter out songs the user has already interacted with
    recommendations = [
        (df.iloc[i]['artistname'], df.iloc[i]['trackname'])
        for i, _ in song_scores
        if i not in user_songs
    ][:n]
    
    return recommendations

In [6]:
def main():
    # Load and preprocess the data
    df = load_data('spotify_data.csv')
    
    # Calculate similarity matrix
    similarity_matrix = calculate_song_similarity(df)
    
    target_song_index = 0  # Index of the target song in the DataFrame
    similar_songs = get_similar_songs(df, target_song_index, similarity_matrix)
    
    print("Similar songs to", df.iloc[target_song_index]['trackname'], "by", df.iloc[target_song_index]['artistname'])
    for artist, track, score in similar_songs:
        print(f"- {track} by {artist} (Similarity: {score:.2f})")
    
    print("\nCollaborative Filtering Recommendations:")
    # Example: Generate recommendations for a specific user
    target_user = 'c1a6910ecac9fd5e5348326675fb6ca6'
    recommendations = collaborative_filtering_recommendation(df, target_user, similarity_matrix)
    
    for artist, track in recommendations:
        print(f"- {track} by {artist}")

In [7]:
if __name__ == "__main__":
    main()

Similar songs to Abandon Window by Jon Hopkins
- Abandon Window by Jon Hopkins (Similarity: 1.00)
- Abandon Window by Jon Hopkins (Similarity: 1.00)
- (I Believe In) Travellin' Light by Belle & Sebastian (Similarity: 1.00)
- Blue Eyes of a Millionaire by Belle & Sebastian (Similarity: 1.00)
- Desperation Made a Fool of Me by Belle & Sebastian (Similarity: 1.00)

Collaborative Filtering Recommendations:
- Stay Alive by José González
- Fun City by Soft Cell
- Bullet In The Gun - Nat Monday Remix by Planet Perfecto
- Bullet In The Gun [Mix Cut] - Nat Monday Remix by Planet Perfecto
- Just Tell Me by Toots & The Maytals
