In [82]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from scipy.spatial.distance import cdist
import sqlite3
import pandas as pd
import numpy as np

In [83]:
# SQLite database file
DB_FILE = "music_data.db"

### barebones content recommender

In [84]:
def get_data():
    conn = sqlite3.connect(DB_FILE)
    cursor = conn.cursor()
    
    # Load relevant data
    query = """
        SELECT track_id, title, genre, artist_name, artist_latitude, artist_longitude 
        FROM music_data 
        WHERE artist_latitude IS NOT NULL 
        AND artist_longitude IS NOT NULL
        AND genre IS NOT NULL;
    """
    df = pd.read_sql_query(query, conn)
    conn.close()
    return df

In [85]:
def process_genres(df):
    # Process text-based genre similarity
    tfidf = TfidfVectorizer()
    genre_matrix = tfidf.fit_transform(df['genre'].fillna(''))
    return genre_matrix

In [86]:
def compute_similarity(df, genre_matrix):
    # Process location-based similarity using Euclidean distance
    location_matrix = df[['artist_latitude', 'artist_longitude']].values
    
    # Compute similarity
    genre_similarity = cosine_similarity(genre_matrix)
    location_similarity = 1 - cdist(location_matrix, location_matrix, metric='euclidean')
    location_similarity = np.nan_to_num(location_similarity)  # Handle NaNs
    
    # Combine similarities (weighted sum)
    combined_similarity = 0.7 * genre_similarity + 0.3 * location_similarity
    return combined_similarity

In [87]:
def get_recommendations(df, combined_similarity, track_id, top_n=5):
    # Get recommendations
    track_index = df.index[df['track_id'] == track_id]
    #print(track_index)
    similar_indices = np.argsort(combined_similarity[track_index])[0][::-1][0:top_n+1]
    if track_index[0] in similar_indices:
        similar_indices = np.delete(similar_indices, np.where(similar_indices == track_index[0]))
    else:
        similar_indices = similar_indices[:-1]
    #print(similar_indices)
    recommendations = df.iloc[similar_indices][['title', 'genre','artist_name']]
    
    return recommendations

In [88]:
#def recommend_similar_tracks(track_id, top_n=5):

# Load data
df = get_data()

# Process genre data
genre_matrix = process_genres(df)

# Compute similarity
combined_similarity = compute_similarity(df, genre_matrix)

In [89]:
example_track = "TRARRQO128F427B5F5"  # Example track ID
print(f"Recommendations for {df[df['track_id'] == example_track][['title','genre','artist_name']]}: \n\
{get_recommendations(df, combined_similarity, example_track)}")

Recommendations for                                 title     genre artist_name
0  You Eclipsed By Me (Album Version)  Pop_Rock      Atreyu: 
                                                title     genre  \
420                                  Our Darkest Days  Pop_Rock   
93                                       Kettle Black  Pop_Rock   
74                     Nevada's Grace (Album Version)  Pop_Rock   
43                            Gallows (Album Version)  Pop_Rock   
506  Blood Children (an Introduction) (Album Version)  Pop_Rock   

          artist_name  
420  Eighteen Visions  
93      BLEED THE SKY  
74             Atreyu  
43             Atreyu  
506            Atreyu  
