In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
import random

In [2]:
# Load the CSV file into a DataFrame
file_path = "data/Holiday_Songs_Spotify.csv"  
df = pd.read_csv(file_path)
df.head()

Unnamed: 0.1,Unnamed: 0,track_uri,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,...,tempo,duration_ms,time_signature,key_mode,playlist_name,playlist_img,track_name,artist_name,album_name,album_img
0,1,00IqwkT0PZhJ86PJajRCqk,0.195,0.348,A#,-10.106,major,0.0332,0.82,0.0,...,166.824,213107,3,A# major,new_holiday_songs,https://mosaic.scdn.co/640/5f79f928a45b878579e...,Silver Bells,Johnny Mathis,Merry Christmas,https://i.scdn.co/image/b878b9e27201163be07e74...
1,2,01h424WG38dgY34vkI3Yd0,0.225,0.248,A,-15.871,major,0.0337,0.912,0.000143,...,96.013,183613,4,A major,new_holiday_songs,https://mosaic.scdn.co/640/5f79f928a45b878579e...,White Christmas,Bing Crosby,White Christmas,https://i.scdn.co/image/3bb0daf5f87a737ce67ace...
2,3,08BhfyKUXxZrnyHrDavNHP,0.444,0.288,F#,-11.793,major,0.0326,0.911,7e-06,...,108.043,199093,3,F# major,new_holiday_songs,https://mosaic.scdn.co/640/5f79f928a45b878579e...,The Christmas Waltz,Tony Bennett,A Swingin' Christmas Featuring The Count Basie...,https://i.scdn.co/image/96aa4fb09e7fe9d38599c8...
3,4,095XSaT8I2uI6Uldj2QrSl,0.687,0.496,A,-4.708,major,0.0339,0.434,0.00144,...,97.575,178680,4,A major,new_holiday_songs,https://mosaic.scdn.co/640/5f79f928a45b878579e...,Stop the Cavalry,Jona Lewie,On The Other Hand There's A Fist (Remastered),https://i.scdn.co/image/50c0ea35cacbf7c5d495c7...
4,5,09b2gJR45Pyip2rx9CnXW1,0.477,0.841,F#,-5.172,major,0.0358,0.000165,0.0971,...,119.954,203404,4,F# major,new_holiday_songs,https://mosaic.scdn.co/640/5f79f928a45b878579e...,I Don't Want to Go Home for Christmas,Independent Counsel of Funk,I Don't Want to Go Home for Christmas,https://i.scdn.co/image/77eb7c17cafe55037a1ab2...


In [3]:
# Preprocess the data 
df["track_name"] = df["track_name"].str.lower()
df["artist_name"] = df["artist_name"].str.lower()
df["album_name"] = df["album_name"].str.lower()

pd.set_option('display.max_colwidth', None)

In [4]:
# Combine relevant columns into a single column 
df["combined_features"] = df["track_name"] + '|' + df["artist_name"] + '|' + df["album_name"] 

In [5]:
# Create a TF-IDF vectorizer
tfidf_vectorizer = TfidfVectorizer(stop_words='english')

In [6]:
# Fit and transform the text data
tfidf_matrix = tfidf_vectorizer.fit_transform(df['combined_features'])

In [7]:
# Compute the cosine similarity matrix
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [8]:
import random

def get_recommendations(song_input, danceability_category, cosine_sim=cosine_sim):
    index = df[df["track_name"].str.lower() == song_input].index[0]

    danceability_mapping = {
        'low': 0.2,
        'medium': 0.5,
        'high': 0.8
    }

    danceability_threshold = danceability_mapping.get(danceability_category.lower())

    if danceability_threshold is None:
        return "Invalid danceability category. Please choose 'low', 'medium', or 'high'."

    # Filter songs based on danceability threshold
    filtered_df = df[(df['danceability'] >= danceability_threshold) & (df.index != index)]

    # If no songs meet the threshold, return a message
    if filtered_df.empty:
        return f"No songs found with {danceability_category} danceability."

    # Shuffle the indices for randomness
    shuffled_indices = list(filtered_df.index)
    random.shuffle(shuffled_indices)

    # Get recommendations based on the shuffled indices
    recommendations = []
    for shuffled_index in shuffled_indices:
        sim_scores = list(enumerate(cosine_sim[shuffled_index]))
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
        sim_scores = sim_scores[:15]  # Get top 15 recommendations
        song_indices = [i[0] for i in sim_scores]
        recommendations.extend(df[["track_name", "artist_name", "album_name"]].iloc[song_indices].values)

    return recommendations[:15]  # Limit the final recommendations to 15


In [None]:
# Main loop to get user input and provide recommendations
while True:
    song_input = input("Merry Christmas! Enter the name of a Christmas song (type 'exit' to quit): ").lower()

    if song_input == 'exit':
        break

    if song_input not in df["track_name"].values:
        print(f"Song '{song_input}' not found in the dataset. Please check the spelling or enter a different Christmas song name.")
        continue

    danceability_input = input("Enter danceability category ('low', 'medium', 'high'): ")

    recommendations = get_recommendations(song_input, danceability_input)
    print(f"Recommended songs for '{song_input}' with {danceability_input} danceability:")
    
    if recommendations:
        for recommendation in recommendations:
            print(recommendation)
    else:
        print("No recommendations found.")


Merry Christmas! Enter the name of a Christmas song (type 'exit' to quit): jingle bells
Enter danceability category ('low', 'medium', 'high'): low
Recommended songs for 'jingle bells' with low danceability:
["it's the most wonderful time of the year" 'david campbell'
 "baby it's christmas"]
["it's the most wonderful time of the year" 'andy williams'
 'christmas hits']
['deck the halls' 'mark vincent' 'the most wonderful time of the year']
['christmas time is here again' 'weeping willows'
 'christmas time has come']
["baby! it's christmas" 'jessie james decker' "baby! it's christmas"]
['christmas every day' 'david archuleta' 'winter in the air']
['christmas time' 'backstreet boys' 'christmas hits']
['my only wish (this year)' 'britney spears' 'christmas hits']
['christmas (baby please come home)' 'mariah carey' 'merry christmas']
['christmas (baby please come home)' 'darlene love' 'christmas hits']
["baby, it's cold outside" 'lydia liza' "baby, it's cold outside"]
['santa baby' 'samanth