In [None]:
import tkinter as tk
from tkinter import ttk
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from tkinter import messagebox

In [None]:
# Loading CSV file
data = pd.read_csv('../data/spotify-2023.csv', encoding='latin1')

In [None]:
# Preprocessing
data['streams'] = pd.to_numeric(data['streams'], errors='coerce')
data['danceability_%'] = pd.to_numeric(data['danceability_%'], errors='coerce')
data['energy_%'] = pd.to_numeric(data['energy_%'], errors='coerce')
data['acousticness_%'] = pd.to_numeric(data['acousticness_%'], errors='coerce')
data['instrumentalness_%'] = pd.to_numeric(data['instrumentalness_%'], errors='coerce')
data = data.dropna(subset=['streams', 'danceability_%', 'energy_%', 'acousticness_%', 'instrumentalness_%'])

In [None]:
# Window initialization
bg_color = '#121212'
frame_bg = '#1f1f1f'
accent_color = '#03dac6'
text_color = '#ffffff'

app = tk.Tk()
app.title("Music Song Recommender")
app.geometry('1200x600')

style = ttk.Style()
style.theme_use('clam')
style.configure('TLabel', background=bg_color, foreground=text_color)
style.configure('TButton', background=frame_bg, foreground=text_color)
style.configure('Treeview', background=bg_color, foreground=text_color, fieldbackground=bg_color)
style.map('Treeview', background=[('selected', accent_color)])

paned_window = ttk.PanedWindow(app, orient=tk.HORIZONTAL)
paned_window.pack(fill=tk.BOTH, expand=True)

left_frame = tk.Frame(paned_window, bg=bg_color)
right_frame = tk.Frame(paned_window, bg=bg_color)
paned_window.add(left_frame)
paned_window.add(right_frame)

In [None]:
# User Interface elements and their functions
song_table = ttk.Treeview(right_frame, columns=("track_name", "artist", "streams", "danceability", "energy", "acousticness", "instrumentalness", "cluster"), show="headings")
for col in song_table["columns"]:
    song_table.heading(col, text=col.title())
song_table.pack(fill=tk.BOTH, expand=True, padx=20, pady=20)

def create_slider(label_text, variable, avg_value):
    frame = tk.Frame(left_frame, bg=bg_color)
    frame.pack(pady=5)
    ttk.Label(frame, text=label_text).pack(side=tk.LEFT)
    
    slider = ttk.Scale(frame, from_=0, to=100, orient="horizontal", command=lambda val: update_value(variable, slider, entry))
    slider.set(avg_value)
    slider.pack(side=tk.LEFT, padx=10)
    
    entry = ttk.Entry(frame, width=5, textvariable=variable)
    entry.pack(side=tk.LEFT)
    entry.bind("<Return>", lambda event: update_slider_from_entry(variable, slider))

    variable.set(avg_value)

    return slider
    
def update_value(variable, slider, entry):
    value = float(slider.get())
    variable.set(value)
    entry.delete(0, tk.END)
    entry.insert(0, f"{value:.2f}")
    
def update_slider_from_entry(variable, slider):
    try:
        value = float(variable.get())
        if 0 <= value <= 100:
            slider.set(value)
        else:
            raise ValueError("Value out of bounds")
    except ValueError:
        variable.set(slider.get())

danceability_var = tk.DoubleVar()
energy_var = tk.DoubleVar()
acousticness_var = tk.DoubleVar()
instrumentalness_var = tk.DoubleVar()

create_slider("Danceability (%)", danceability_var, data['danceability_%'].mean())
create_slider("Energy (%)", energy_var, data['energy_%'].mean())
create_slider("Acousticness (%)", acousticness_var, data['acousticness_%'].mean())
create_slider("Instrumentalness (%)", instrumentalness_var, data['instrumentalness_%'].mean())

danceability_check = tk.BooleanVar(value=True)
energy_check = tk.BooleanVar(value=True)
acousticness_check = tk.BooleanVar(value=True)
instrumentalness_check = tk.BooleanVar(value=True)

ttk.Checkbutton(left_frame, text="Consider Danceability", variable=danceability_check).pack(pady=5)
ttk.Checkbutton(left_frame, text="Consider Energy", variable=energy_check).pack(pady=5)
ttk.Checkbutton(left_frame, text="Consider Acousticness", variable=acousticness_check).pack(pady=5)
ttk.Checkbutton(left_frame, text="Consider Instrumentalness", variable=instrumentalness_check).pack(pady=5)

In [1]:
def display_top_songs():
    top_songs = data.nlargest(10, 'streams')
    song_table.delete(*song_table.get_children())
    for _, row in top_songs.iterrows():
        song_table.insert('', 'end', values=(
            row['track_name'], row['artist(s)_name'], row['streams'],
            row['danceability_%'], row['energy_%'], row['acousticness_%'], row['instrumentalness_%'], "-"
        ))

In [None]:
def determine_optimal_clusters(data):
    features = data[['danceability_%', 'energy_%', 'acousticness_%', 'instrumentalness_%']]
    inertia_values = [KMeans(n_clusters=k, random_state=42).fit(features).inertia_ for k in range(1, 11)]
    return inertia_values.index(min(inertia_values[2:], key=lambda x: abs(x - inertia_values[1]))) + 2

In [None]:
def update_clusters():
    conditions = []
    
    if danceability_check.get():
        conditions.append(data['danceability_%'] >= danceability_var.get())
    if energy_check.get():
        conditions.append(data['energy_%'] >= energy_var.get())
    if acousticness_check.get():
        conditions.append(data['acousticness_%'] >= acousticness_var.get())
    if instrumentalness_check.get():
        conditions.append(data['instrumentalness_%'] >= instrumentalness_var.get())

    filtered_data = data[np.logical_and.reduce(conditions)] if conditions else data

    if filtered_data.empty:
        song_table.delete(*song_table.get_children())
        messagebox.showinfo("No Songs Found", "No songs match the selected criteria.")
        return

    optimal_clusters = determine_optimal_clusters(filtered_data)
    features = filtered_data[['danceability_%', 'energy_%', 'acousticness_%', 'instrumentalness_%']]
    kmeans = KMeans(n_clusters=optimal_clusters, random_state=42)
    filtered_data['cluster'] = kmeans.fit_predict(features)

    most_common_cluster = filtered_data['cluster'].mode()[0]
    clustered_data = filtered_data[filtered_data['cluster'] == most_common_cluster]

    if clustered_data.empty:
        song_table.delete(*song_table.get_children())
        messagebox.showinfo("No Songs Found", "No songs found in the most common cluster.")        return

    song_table.delete(*song_table.get_children())
    for _, row in clustered_data.iterrows():
        song_table.insert('', 'end', values=(
            row['track_name'], row['artist(s)_name'], row['streams'],
            row['danceability_%'], row['energy_%'], row['acousticness_%'], row['instrumentalness_%'], row['cluster']
        ))

In [None]:
# Buttons
ttk.Button(left_frame, text="Update", command=update_clusters).pack(pady=10)
ttk.Button(left_frame, text="Show Top 10 Songs", command=display_top_songs).pack(pady=10)

In [None]:
# Start with Top 10 Songs
display_top_songs()

In [None]:
# Start the Tkinter mainloop
app.mainloop()