In [None]:
import pandas as pd
import numpy as np
df = pd.read_csv('./cleaned_labeled_recom.csv')
#Print all the features (or columns) of the DataFrame
df.columns

In [None]:
df.head()

In [None]:
df['explicit'] = df['explicit'].replace([False, True], [0, 1])


df.head()

In [None]:
#Import the literal_eval function from ast
from ast import literal_eval
#Define a stringified list and output its type
#Convert all NaN into stringified empty lists
df['all_artists'] = df['all_artists'].replace(['[', ']'], ["\'[\'", "\']\'"])
df['all_artists'] = df['all_artists'].fillna('[]')
#Apply literal_eval to convert to the list object
df['all_artists'] = df['all_artists'].apply(literal_eval)

#Convert list of dictionaries to a list of strings
df['all_artists'] = df['all_artists'].apply(lambda x: [i for i in x] if isinstance(x, list) else [])
df.head()
#Create a new feature by exploding genres
s = df.apply(lambda x: pd.Series(x['all_artists']),axis=1).stack().reset_index(level=1, drop=True)
#Name the new feature as 'genre'
s.name = 'artist'
#Create a new dataframe gen_df which by dropping the old 'genres' feature and adding the new 'genre'.
gen_df = df.drop('all_artists', axis=1).join(s)
#Print the head of the new gen_df
gen_df.head(10)

In [None]:
gen_df = gen_df[['title', 'artist', 'release_date', 'genre', 'explicit', 'popularity',
                 'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
                 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
                 'time_signature', 'emotion']]
gen_df.head(10)

In [None]:
# gen_df.shape
# sec_df = gen_df[gen_df.isna()]
# print(sec_df.iloc[:, 2:-1])
gen_df.isnull().sum()
new_df = gen_df.drop(columns=['title', 'artist', 'release_date', 'genre', 'emotion'])
new_df.corr()

In [None]:
from sklearn import preprocessing
from sklearn.cluster import KMeans

x = new_df.values #returns a numpy array
min_max_scaler = preprocessing.MinMaxScaler()
x_scaled = min_max_scaler.fit_transform(x)
norm_df = pd.DataFrame(x_scaled)
print(norm_df.head())

kmeans = KMeans(n_clusters=7)
features = kmeans.fit_predict(norm_df)
gen_df['features'] = features
MinMaxScaler(gen_df['features'])
print(gen_df.head())
gen_df.to_csv('./finalUsable.csv')

In [None]:
import pandas as pd
import numpy as np

gen_df = pd.read_csv('./finalUsable.csv')
gen_df.columns

In [None]:
from tqdm import tqdm

def recommend_based_on_song_name(gen_df, song_name, amount=1):
    distance = []
    song = gen_df[gen_df.title.str.lower() == song_name.lower()].head(1).values[0]
    emotion = gen_df[gen_df.title.str.lower() == song_name.lower()].head(1).values[0][-2]
    print(emotion)
    rec = gen_df[gen_df.title.str.lower() != song_name.lower()]
    for songs in tqdm(rec.values):
        d = 0
        for col in np.arange(len(rec.columns)):
            if not col in [0, 1, 2, 3, 4, 6, 11, 19]:
                d = d + np.absolute(float(song[col]) - float(songs[col]))
        distance.append(d)
    rec['distance'] = distance
    rec = rec.sort_values('distance')
    rec = rec.loc[rec['emotion'] == emotion]
    columns = ['index', 'title', 'artist']
    return rec[columns][:amount]

something = recommend_based_on_song_name(gen_df, 'Everybody Needs Somebody To Love', 5)
something.values.tolist()

In [None]:
from tqdm import tqdm

def recommend_based_on_genre(gen_df, genre, amount=1):
    distance = []
    genre_df = gen_df[gen_df.genre.str.lower() == genre.lower()].head(1).values[0]
    emotion = gen_df[gen_df.genre.str.lower() == genre.lower()].head(1).values[0][-2]
    rec = gen_df[gen_df.genre.str.lower() != genre.lower()]
    for songs in tqdm(rec.values):
        d = 0
        for col in np.arange(len(rec.columns)):
            if not col in [0, 1, 2, 3, 4, 6, 11, 19]:
                d = d + np.absolute(float(genre_df[col]) - float(songs[col]))
        distance.append(d)
    rec['distance'] = distance
    rec = rec.sort_values('distance')
    rec = rec.loc[rec['emotion'] == emotion]
    columns = ['index', 'title', 'artist']
    return rec[columns][:amount]

recommend_based_on_genre(gen_df, 'dance pop', 5)

In [None]:
from tqdm import tqdm

def recommend_based_on_genre_year(gen_df, genre, year_min, amount=1):
    distance = []
    genre_df = gen_df[gen_df.genre.str.lower() == genre.lower()].head(1).values[0]
    emotion = gen_df[gen_df.genre.str.lower() == genre.lower()].head(1).values[0][-2]
    rec = gen_df[gen_df.genre.str.lower() != genre.lower()]
    for songs in tqdm(rec.values):
        d = 0
        for col in np.arange(len(rec.columns)):
            if not col in [0, 1, 2, 3, 4, 6, 11, 19]:
                d = d + np.absolute(float(genre_df[col]) - float(songs[col]))
        distance.append(d)
    rec['distance'] = distance
    rec = rec.sort_values('distance')
    rec = rec.loc[(rec['emotion'] == emotion) & (rec['release_date'] > year_min)]
    rec = rec.drop_duplicates(subset='title')
    columns = ['title', 'artist']
    return rec[columns][:amount]

recommend_based_on_genre_year(gen_df, 'classic swedish pop', 2000, 5)

In [None]:
from tqdm import tqdm

def recommend_mix(gen_df, song_name, artist, genre, year_min, emotion, amount=1):
    distance = []
    mix_df = gen_df[(gen_df.genre.str.lower() == genre.lower()) |
                    (gen_df.artist.str.lower() == artist.lower()) |
                    (gen_df.title.str.lower() == song_name.lower())].head(1).values[0]
    rec = gen_df[(gen_df.genre.str.lower() != genre.lower()) &
                (gen_df.title.str.lower() != song_name.lower()) &
                (gen_df.artist.str.lower() != artist.lower())]
    for songs in tqdm(rec.values):
        d = 0
        for col in np.arange(len(rec.columns)):
            if not col in [0, 1, 2, 3, 4, 6, 11, 19]:
                d = d + np.absolute(float(mix_df[col]) - float(songs[col]))
        distance.append(d)
    rec['distance'] = distance
    rec = rec.sort_values('distance')
    rec = rec.loc[(rec['emotion'] == emotion) & (rec['release_date'] > year_min)]
    rec = rec.drop_duplicates(subset='title')
    columns = ['title', 'artist']
    return rec[columns][:amount]

something = recommend_mix(gen_df, 'The Wire', 'HAIM', 'classic swedish pop', 2000, 'Angry',5)

In [None]:
row1 = gen_df.sample(n = 1)
row1
emotion_list = ['Happy', 'Surprise', 'Neutral', 'Angry', 'Sad', 'Disgust', 'Fear']
my_list = []
total = 0
for emotion in emotion_list:
    a = gen_df[df['emotion'] == emotion]['emotion'].count()
    print(emotion, a)
    my_list.append(a)
    total += a

print(total, my_list)
for i in range(len(my_list)):
    my_list[i] = float(my_list[i]) / float(total) * 100.0
    
print(my_list, sum(my_list))
start_angle = 0
end_angle = 0
for i in range(len(my_list)):
    end_angle = start_angle + 360.0 * my_list[i] / 100.0
    print(start_angle, end_angle, emotion_list[i])
    start_angle = end_angle


In [None]:
# good emotions
# happy, surprise, neutral
# bad emotions
# angry, sad, disgust, fear

In [None]:
import pandas as pd
import numpy as np
df = pd.read_csv('./finalUsable.csv')
df.head()

In [None]:
song_id = [0, 1, 2, 3, 4, 5, 77]
liked_emotion = []
for si in song_id:
    songid = df[df['index'] == si].index
    songid = songid[0]
    emotion = df.at[songid, 'emotion']
    liked_emotion.append(emotion)
print(liked_emotion)

In [None]:
df.sort_values(by=["popularity"], ascending=False).iloc[:4, :].values.tolist()

In [None]:
import urllib.request
import re

my_list = [[674, 'After Midnight', 'Fastway', 22], [747, 'How Bizarre', 'OMC', 0], [847, 'Wake Me Up', 'Avicii', 10], [622, "Livin' On A Prayer", 'Bon Jovi', 0]]

def returnable(df_to_list):
    id_list = []
    song_hash = []
    for item in df_to_list:
        id_list.append(item[0])
        name = item[1] + ' ' + item[2]
        name = name.replace(' ', '+')
        print(name)
        html = urllib.request.urlopen("https://www.youtube.com/results?search_query={0}".format(name.encode('utf-8')))
        video_ids = re.findall(r"watch\?v=(\S{11})", html.read().decode())
        song_hash.append(video_ids[0])
        print('https://www.youtube.com/watch?v=' + video_ids[0])
    return id_list, song_hash
id_list, song_hash = returnable(my_list)
print(id_list, '\n', song_hash)