# Song recommender project #

 Garcinuño Feliciano Angela <br>
 Huima Klaara <br>
 Massot Lucas 

In [247]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import matplotlib.colors as colors
from mpl_toolkits.mplot3d import axes3d

In [248]:
# Settings
k = 10 #number of songs to recommend
n = 10 #length of input playlist



In [249]:
data = pd.read_csv("output.csv")
data['similar'] = data['similar'].apply(lambda x: x.split('/'))
data['artist_terms'] = data['artist_terms'].apply(lambda x: str(x).split('/'))
data['artist_terms_weights'] = data['artist_terms_weights'].apply(lambda x: str(x).split('/'))

l_max = data['loudness'].max()
l_min = data['loudness'].min()

t_max = data['tempo'].max()
t_min = data['tempo'].min()

data['tempo'] = (data['tempo'] - t_min)/(t_max - t_min)
data['loudness'] = (data['loudness'] - l_min)/(l_max - l_min)

In [250]:
def distance(song1, song2, alphas = [1,1,1,1,1]):
    """
    song1, song2 : python native lists    format : [artist, title, album, similar, hottness, terms, terms-weights, loudness, tempo]
    alphas : python native list
    
    """
    
    alpha_hot, alpha_loud, alpha_tempo, alpha_similar, alpha_terms = alphas
    artist, title1, album1, similar1, hot1, terms1, weights1, loud1, tempo1 = song1
    artist2, title2, album2, similar2, hot2, terms2, weights2, loud2, tempo2 = song2
    
    distance = 0

    # hottness, loudness and tempo 
    distance += alpha_hot*abs(hot1-hot2) + alpha_loud*abs(loud1-loud2) + alpha_tempo*abs(tempo1-tempo2)
    # similar artists 
    distance += alpha_similar*(1 - len([singer for singer in similar1 if singer in similar2])/100)

    # artist terms (and weights)
    shared_terms = [term for term in terms1 if (term in terms2 and term != '')]
    shared_weights1, shared_weights2 = [float(weights1[terms1.index(term)]) for term in shared_terms], [float(weights2[terms2.index(term)]) for term in shared_terms]
    distance -= alpha_terms*sum([0.5*(shared_weights1[i] + shared_weights2[i]) for i in range(len(shared_terms)) ])
    
    #print("hot: " + str(alpha_hot*abs(hot1-hot2) )+ " loud: " + str(alpha_loud*abs(loud1-loud2)) + " tempo: ", + str(alpha_tempo*abs(tempo1-tempo2) )+ "similar: " + 
    #str(alpha_similar*(1 - len([singer for singer in similar1 if singer in similar2])/100))+ "terms: " + str(alpha_terms*sum([0.5*(shared_weights1[i] + shared_weights2[i]) for i in range(len(shared_terms)) ])))
    
    return distance

In [251]:
def build_distances(input: list, df: pd.core.frame.DataFrame):
    N = len(input)
    M = len(df)
    distances = [[0 for i in range(N)] for j in range(M)]
    total_d = [0]*M
    for j in range(M):
        for i in range(N):
            distances[j][i] = distance(df.iloc[j], input[i])
        total_d[j] = sum(distances[j])

    return distances, total_d

In [252]:
def build_df(input: list, df: pd.core.frame.DataFrame):
    input_songs = [x.upper() for x in input]

    indexes = []
    result = []
    for i in range(len(df)):
        if str(df.iloc[i]['title']).upper() in input_songs:
            result.append(df.iloc[i])
            indexes.append(i)
    for i in indexes:
        df.drop(i, inplace=True)
    return result
    

In [253]:
def recommend(song_list: list, df: pd.core.frame.DataFrame):
    input = build_df(song_list, df)
    distances, total_d = build_distances(input, df)
    distances = pd.DataFrame(distances)
    results = np.argpartition(total_d, k)[:k]
    output = []
    d = []
    for i in results:
        output.append(df.iloc[i])
        d.append(total_d[i])
    output_df = pd.DataFrame(output)
    output_df = output_df[['artist_name', 'title','release']]
    output_df['total distance'] = d
    display(output_df)


In [256]:
# TEST
import random

input = [str(data.iloc[j]['title']) for j in random.sample(range(len(data)), n)]
recommend(input, data.copy())

['Special', 'Spend the Night featuring Rock Money and Paul Law', 'Moonlight Boy', 'I Want You Back', 'Stuff Like That', 'Cumberland Gap', "He's A Rebel", 'I Want Your Soul For Christmas', 'Banho De Amor', 'Lost Again']


Unnamed: 0,artist_name,title,release,total distance
9011,Jackie Wilson,One Moment With You,Reet Petite,-20.448579
3190,Jackie Wilson,I'm Wanderin',The Jackie Wilson Genesis,-21.626353
6994,Jackie Wilson,Each Time (I Love You More),Lonely Teardrops,-21.270018
1312,Thomas Dolby,One Of Our Submarines,Premium Gold Collection,-19.923544
7992,Jackie Wilson,It's So Fine,Lonely Teardrops,-21.913683
4342,Jackie Wilson,If I Can't Have You,Lonely Teardrops,-21.11349
2164,John Hiatt,Memphis In The Meantime,Greatest Hits: The A&M Years '87- '94,-19.943438
1212,Damone,Child Of The Night,Out Here All Night,-23.298317
8409,Jackie Wilson,Why Can't You Be Mine,Reet Petite,-20.599696
8585,Thomas Dolby,She Blinded Me With Science,Blinded By Science,-19.751479
