In [2]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.semi_supervised import LabelPropagation
from sklearn.model_selection import train_test_split

In [3]:
df = pd.read_csv('data.csv')
df.head()

Unnamed: 0,track_id,track_name,playlist_id,danceability,energy,acousticness,valence,tempo
0,2I0G8OVqgjLvY5fBC9Qc0J,Mountains and Molehills - feat. Turin Brakes,0,0.595,0.763,0.0105,0.038,139.983
1,295nnPutAyvfXpVsfwJbga,Sweet Shop,0,0.592,0.886,0.00487,0.105,139.913
2,3k3DZvTCSD4HKvj26dQDk8,Jump Ya Body - Dub Mix,0,0.734,0.969,0.0372,0.226,140.024
3,2XbF32Nwvi5Kz7BRR4cIGz,Thundergun,0,0.527,0.846,4.8e-05,0.0631,140.008
4,5gaZEkMg6D9QuOnmAVpYVX,The Great Divide - Seven Lions Dub Mix,0,0.49,0.739,0.00201,0.184,140.019


## Goals:
- Develop a new algorithm for separating songs
- Prove its effectiveness

In [4]:
data = df.values
propagated, unlabeled = train_test_split(data, test_size=30)

In [5]:
unlabeled_df = pd.DataFrame(unlabeled)
unlabeled_df.columns = df.columns
unlabeled_df

Unnamed: 0,track_id,track_name,playlist_id,danceability,energy,acousticness,valence,tempo
0,6pazDyDn0XF0te3wSjozhC,Hold On - Sub Focus Remix,14,0.382,0.928,0.00676,0.0571,173.975
1,5JWpYWuGnK4Jy2KQpqRFRh,The Hardest Mistakes [Popkong Mix] [feat. Cass...,3,0.439,0.873,0.000544,0.175,124.963
2,2KNsflubHgQQJJ1KEGvFNv,Avaritia,3,0.756,0.727,0.0229,0.0316,128.025
3,2KlZexJjJPuNWcN5uAG1GU,Gold (feat. Yuna),2,0.479,0.675,0.0319,0.18,145.022
4,4bs7bgxoxnkPAiuGM4rpcp,Promises (feat. Nessakay),0,0.476,0.737,0.0359,0.244,140.01
5,27mhCGdAA8gM7b33KIiB3k,Cyclone,10,0.641,0.72,0.0102,0.574,77.969
6,2ZICF8SFI5tHttaLJKMY1X,False Pretense,5,0.509,0.93,0.00137,0.764,154.95
7,4avinp5hlc6W1wtAtqPaHH,Diesel Not Petrol,7,0.813,0.723,0.00989,0.652,139.997
8,6JfmjCGy7nYztlgmWrXdsE,TKO,19,0.544,0.995,0.00545,0.463,87.999
9,5KK05UNvYyH2QMyBlHGKtW,Roses - Zaxx Remix,13,0.677,0.881,0.00445,0.53,128.121


## Moods:
    0: Depressed
    1: Excited
    2: Aggressive
    3: Relaxing
    4: Cheerful

In [7]:
labels = []
for song in unlabeled_df['track_name']:
    labels.append(input(song + ': '))

Hold On - Sub Focus Remix: 2
The Hardest Mistakes [Popkong Mix] [feat. Cassadee Pope]: 4
Avaritia: 1
Gold (feat. Yuna): 3
Promises (feat. Nessakay): 1
Cyclone: 2
False Pretense: 1
Diesel Not Petrol: 3
TKO: 2
Roses - Zaxx Remix: 4
Ultra DJ's/Me & You - Spencer & Hill Remix: 1
I Cry: 1
I Still Can't Stop: 2
The Island - Pt. I (Dawn): 3
Brainless: 3
Higher: 4
To The Sky - Original Mix: 1
Doomsday: 1
Dusk to Dawn: 1
Sex Apeal (Max Farenthide Remix) (Feat. Mike W.): 2
Don't Let Me Down: 1
Forever - Album Version (Edited): 2
Reasons feat. Andrew Allen: 4
Raise Your Weapon: 3
21 Guns: 0
Cozza Frenzy: 2
Llove (Dada Life Remix): 1
Hotline Bling: 0
Our Story - Radio Edit: 4
Put On: 2


In [8]:
temp_df = pd.DataFrame(labels)
temp_df.columns = ['mood']
final_df = pd.concat([unlabeled_df, temp_df], axis=1)
final_df

Unnamed: 0,track_id,track_name,playlist_id,danceability,energy,acousticness,valence,tempo,mood
0,6pazDyDn0XF0te3wSjozhC,Hold On - Sub Focus Remix,14,0.382,0.928,0.00676,0.0571,173.975,2
1,5JWpYWuGnK4Jy2KQpqRFRh,The Hardest Mistakes [Popkong Mix] [feat. Cass...,3,0.439,0.873,0.000544,0.175,124.963,4
2,2KNsflubHgQQJJ1KEGvFNv,Avaritia,3,0.756,0.727,0.0229,0.0316,128.025,1
3,2KlZexJjJPuNWcN5uAG1GU,Gold (feat. Yuna),2,0.479,0.675,0.0319,0.18,145.022,3
4,4bs7bgxoxnkPAiuGM4rpcp,Promises (feat. Nessakay),0,0.476,0.737,0.0359,0.244,140.01,1
5,27mhCGdAA8gM7b33KIiB3k,Cyclone,10,0.641,0.72,0.0102,0.574,77.969,2
6,2ZICF8SFI5tHttaLJKMY1X,False Pretense,5,0.509,0.93,0.00137,0.764,154.95,1
7,4avinp5hlc6W1wtAtqPaHH,Diesel Not Petrol,7,0.813,0.723,0.00989,0.652,139.997,3
8,6JfmjCGy7nYztlgmWrXdsE,TKO,19,0.544,0.995,0.00545,0.463,87.999,2
9,5KK05UNvYyH2QMyBlHGKtW,Roses - Zaxx Remix,13,0.677,0.881,0.00445,0.53,128.121,4


In [11]:
fit_X = final_df.iloc[:, 3:8].values
fit_y = final_df['mood'].values
lp = LabelPropagation()
lp.fit(fit_X, fit_y)
propagated_labels = lp.predict(propagated[:, 3:])

In [15]:
propagated_df = pd.concat([ pd.DataFrame(propagated), pd.DataFrame(propagated_labels)], axis=1)
propagated_df.columns = final_df.columns
propagated_df

Unnamed: 0,track_id,track_name,playlist_id,danceability,energy,acousticness,valence,tempo,mood
0,7k9iVpyeR5ybSPSkvmAP6K,All Around Me,5,0.479,0.884,0.0338,0.447,146.922,3
1,7sm1xuahSGG5qFa9pFXWVs,Just Dance - RedOne Remix,7,0.893,0.812,0.0034,0.815,119.021,1
2,3mtukCAgd0mE260QcBfXAX,Love Song,7,0.625,0.686,0.0154,0.542,139.817,3
3,00HFpTTjUHfMmJMZ6HiSHi,Midnight City - Man Without Country Remix,17,0.517,0.423,5.24e-05,0.111,105.045,2
4,3VrI4JYoS75hyLPwG6Z9tn,Remember The Name (feat. Styles Of Beyond),37,0.687,0.805,0.0661,0.893,84.858,2
5,16Of7eeW44kt0a1M0nitHM,You Make Me Feel... - feat. Sabi,13,0.668,0.857,0.0191,0.746,131.959,2
6,75Liz40xlgtL7H9vsDF9tW,Die Young REMIX,7,0.767,0.699,0.0355,0.659,127.995,4
7,56T1QA7202iW7fZes2PTUe,I Can't Stop Drinking About You - Chainsmokers...,3,0.637,0.664,0.0451,0.358,128.009,4
8,50ju2h9EUXaFB5f7ajOrfT,Piece of Me - JKAY Remix,7,0.752,0.658,0.00131,0.781,124.035,4
9,3xozZw4YNCv777ONrbdHsy,Stay The Night - featuring Hayley Williams of ...,2,0.6,0.728,0.109,0.441,127.926,4


In [19]:
propagated_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 696 entries, 0 to 695
Data columns (total 9 columns):
track_id        696 non-null object
track_name      696 non-null object
playlist_id     696 non-null object
danceability    696 non-null object
energy          696 non-null object
acousticness    696 non-null object
valence         696 non-null object
tempo           696 non-null object
mood            696 non-null object
dtypes: object(9)
memory usage: 49.0+ KB
