In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils
from sklearn.model_selection import cross_val_score, KFold
from sklearn.metrics import accuracy_score, confusion_matrix

scaler = MinMaxScaler()
encoder = LabelEncoder()

# get this from the csv, ex a couple cells up
X = pd.read_csv('/content/Train.csv')
X = X[X['mood'].isin(['energetic', 'happy', 'sad', 'calm'])]
X['name'] = X['name'].apply(lambda name : "{} BY {}...".format(name, list(X[X['name'] == name]['artist'])[0][:10]))

artists = X['artist']
names = X['name']
y = X['mood']
yLabels = y.copy()

X = X.drop(['mood','uri','artist', 'genres', 'length', 'timeSignature'], axis=1)
X = X.set_index('name')
X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns, index=X.index)
y = encoder.fit_transform(y)
X_train,X_test,Y_train,Y_test = train_test_split(X,y,test_size=0.2,random_state=15, shuffle=True)

display(pd.DataFrame({'mood' : yLabels, 'encoding' : y}).drop_duplicates().set_index('mood'))
# --------
def base_model():
    #Create the model
    model = Sequential()
#Add 1 layer with 8 nodes,input of 4 dim with relu function
    model.add(Dense(8,input_dim=9,activation='relu'))
#Add 1 layer with output 3 and softmax function
    model.add(Dense(4,activation='softmax'))
#Compile the model using logistic loss function and adam     optimizer, accuracy correspond to the metric displayed
    model.compile(loss='categorical_crossentropy',optimizer='adam',
              metrics=['accuracy'])
    return model

estimator = KerasClassifier(build_fn=base_model,epochs=250, batch_size=200, verbose=1)

kfold = KFold(n_splits=10,shuffle=True)
results = cross_val_score(estimator,X,y,cv=kfold, error_score='raise')
print("%.2f%% (%.2f%%)" % (results.mean()*100,results.std()*100))

# #Train the model with the train data
estimator.fit(X_train,Y_train)

# pickle the model
# with open('./Data/Model/Trained.pkl', 'wb') as f:
#     pickle.dump(estimator, f)

#Predict the model with the test
y_preds = estimator.predict_proba(X_test)
discretePredictions = np.apply_along_axis(lambda arr : np.argmax(arr), 1, y_preds)

#Show the accuracy score
print("Accuracy Score",accuracy_score(Y_test,discretePredictions))
cm = confusion_matrix(Y_test,discretePredictions)

In [None]:
#Show the accuracy score
print("Accuracy Score",accuracy_score(Y_test,discretePredictions))
cm = confusion_matrix(Y_test,discretePredictions)

Accuracy Score 0.7170868347338936


In [None]:
X_test = pd.read_csv('/content/af_test.csv', index_col=[0]).drop(['album', 'release_date', 'popularity', 'key'], axis=1)
X_test['name'] = X_test['name'].apply(lambda name : "{} BY {}".format(name, list(X_test[X_test['name'] == name]['artist'])[0][:10]))

namesToUri = pd.DataFrame({'uri' : X_test['uri'].values}, index=X_test['name'].values)
artists = X_test['artist']
names = X_test['name']
playlist = X_test['playlist']



X_test = X_test.drop(['playlist','uri','artist', 'genres', 'length', 'timeSignature'], axis=1)
X_test = X_test.set_index('name')
X_test = pd.DataFrame(scaler.fit_transform(X_test), columns=X.columns, index=X_test.index)

y_preds = estimator.predict_proba(X_test)
discretePredictions = np.apply_along_axis(lambda arr : np.argmax(arr), 1, y_preds)


predictionDF = X_test.copy()
predictionDF['Calm'] = y_preds[:, 0]
predictionDF['Energetic'] = y_preds[:, 1]
predictionDF['Happy'] = y_preds[:, 2]
predictionDF['Sad'] = y_preds[:, 3]

pcts = predictionDF.loc[:, 'Calm' : 'Sad']
pcts['maxPct'] = list(pcts.max(axis=1))

calmDF = predictionDF[predictionDF['Calm'] == pcts['maxPct']]
energeticDF = predictionDF[predictionDF['Energetic'] == pcts['maxPct']]
happyDF = predictionDF[predictionDF['Happy'] == pcts['maxPct']]
sadDF = predictionDF[predictionDF['Sad'] == pcts['maxPct']]

# display(calmDF)
# display(energeticDF)
# display(happyDF)
# display(sadDF)



In [None]:
predictionDF.head()

Unnamed: 0_level_0,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,tempo,valence,Calm,Energetic,Happy,Sad
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
To the Bone BY Pamungkas,0.517097,0.633803,0.728405,1.9e-05,0.082834,0.750815,0.004627,0.524768,0.967945,0.082661,0.055299,0.767401,0.09464
Runtuh BY Feby Putri,0.881286,0.545775,0.279151,0.000137,0.102458,0.505743,0.006672,0.452345,0.168715,0.263303,0.004448,0.007622,0.724627
Hati-Hati di Jalan BY Tulus,0.706234,0.60446,0.431284,0.089617,0.098327,0.573996,0.013989,0.121234,0.771343,0.544857,0.017735,0.208849,0.228559
Bertaut BY Nadin Amiz,0.938631,0.401408,0.119869,0.00247,0.09213,0.501075,0.014527,0.399268,0.087509,0.251659,0.003185,0.007141,0.738015
Monokrom BY Tulus,0.576454,0.476526,0.457831,6e-06,0.080149,0.645843,0.010653,0.21661,0.519179,0.314276,0.044408,0.195206,0.446111


In [None]:
predictionDF.shape

(4474, 13)

In [None]:
namesToUri

Unnamed: 0,uri
To the Bone BY Pamungkas,3pCt2wRdBDa2kCisIdHWgF
Runtuh BY Feby Putri,6Hii26x3qDErVitnGW8QtO
Hati-Hati di Jalan BY Tulus,2hHeGD57S0BcopfVcmehdl
Bertaut BY Nadin Amiz,630DpnzdfjdVqv2yLfPbAX
Monokrom BY Tulus,4GfK1qOF3uBWidbPlTCQRL
...,...
dRuNk BY ZAYN,0TC3wZupelwzKhuueGSGZu
Candyman BY Zedd,0NWQTyapmz4GuDTSN9xTB7
Adelaide Sky BY Adhitia So,3843qh8Ih59FwSCDzPigNU
Maaf BY Ghaitsa Ke,3LVQYpXaAgFsyBNB3L6NSi


In [None]:
df_songs = predictionDF.reset_index().join(namesToUri.reset_index(), how='left')

In [None]:
df_songs['track'] = df_songs['name'].str.split(' BY ').str[0]
df_songs['artist'] = df_songs['name'].str.split(' BY ').str[1]
df_songs = df_songs.drop(['name', 'index'], axis=1)

In [None]:
df_songs.head()

Unnamed: 0,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,tempo,valence,Calm,Energetic,Happy,Sad,uri,track,artist
0,0.517097,0.633803,0.728405,1.9e-05,0.082834,0.750815,0.004627,0.524768,0.967945,0.082661,0.055299,0.767401,0.09464,3pCt2wRdBDa2kCisIdHWgF,To the Bone,Pamungkas
1,0.881286,0.545775,0.279151,0.000137,0.102458,0.505743,0.006672,0.452345,0.168715,0.263303,0.004448,0.007622,0.724627,6Hii26x3qDErVitnGW8QtO,Runtuh,Feby Putri
2,0.706234,0.60446,0.431284,0.089617,0.098327,0.573996,0.013989,0.121234,0.771343,0.544857,0.017735,0.208849,0.228559,2hHeGD57S0BcopfVcmehdl,Hati-Hati di Jalan,Tulus
3,0.938631,0.401408,0.119869,0.00247,0.09213,0.501075,0.014527,0.399268,0.087509,0.251659,0.003185,0.007141,0.738015,630DpnzdfjdVqv2yLfPbAX,Bertaut,Nadin Amiz
4,0.576454,0.476526,0.457831,6e-06,0.080149,0.645843,0.010653,0.21661,0.519179,0.314276,0.044408,0.195206,0.446111,4GfK1qOF3uBWidbPlTCQRL,Monokrom,Tulus


In [None]:
df_songs['mood'] = df_songs[['Calm', 'Energetic', 'Happy', 'Sad']].idxmax(axis=1)

In [None]:
df_songs.head()

Unnamed: 0,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,tempo,valence,Calm,Energetic,Happy,Sad,uri,track,artist,mood
0,0.517097,0.633803,0.728405,1.9e-05,0.082834,0.750815,0.004627,0.524768,0.967945,0.082661,0.055299,0.767401,0.09464,3pCt2wRdBDa2kCisIdHWgF,To the Bone,Pamungkas,Happy
1,0.881286,0.545775,0.279151,0.000137,0.102458,0.505743,0.006672,0.452345,0.168715,0.263303,0.004448,0.007622,0.724627,6Hii26x3qDErVitnGW8QtO,Runtuh,Feby Putri,Sad
2,0.706234,0.60446,0.431284,0.089617,0.098327,0.573996,0.013989,0.121234,0.771343,0.544857,0.017735,0.208849,0.228559,2hHeGD57S0BcopfVcmehdl,Hati-Hati di Jalan,Tulus,Calm
3,0.938631,0.401408,0.119869,0.00247,0.09213,0.501075,0.014527,0.399268,0.087509,0.251659,0.003185,0.007141,0.738015,630DpnzdfjdVqv2yLfPbAX,Bertaut,Nadin Amiz,Sad
4,0.576454,0.476526,0.457831,6e-06,0.080149,0.645843,0.010653,0.21661,0.519179,0.314276,0.044408,0.195206,0.446111,4GfK1qOF3uBWidbPlTCQRL,Monokrom,Tulus,Sad


In [None]:
df_songs.columns

Index(['acousticness', 'danceability', 'energy', 'instrumentalness',
       'liveness', 'loudness', 'speechiness', 'tempo', 'valence', 'Calm',
       'Energetic', 'Happy', 'Sad', 'uri', 'track', 'artist', 'mood'],
      dtype='object')

In [None]:
df_songs_fix = df_songs[['uri', 'track', 'artist', 'mood', 'Calm', 'Energetic', 'Happy', 'Sad']]

In [None]:
df_songs_fix.head()

Unnamed: 0,uri,track,artist,mood,Calm,Energetic,Happy,Sad
0,3pCt2wRdBDa2kCisIdHWgF,To the Bone,Pamungkas,Happy,0.082661,0.055299,0.767401,0.09464
1,6Hii26x3qDErVitnGW8QtO,Runtuh,Feby Putri,Sad,0.263303,0.004448,0.007622,0.724627
2,2hHeGD57S0BcopfVcmehdl,Hati-Hati di Jalan,Tulus,Calm,0.544857,0.017735,0.208849,0.228559
3,630DpnzdfjdVqv2yLfPbAX,Bertaut,Nadin Amiz,Sad,0.251659,0.003185,0.007141,0.738015
4,4GfK1qOF3uBWidbPlTCQRL,Monokrom,Tulus,Sad,0.314276,0.044408,0.195206,0.446111


In [None]:
df_songs_fix['mood'].value_counts()

Happy        1613
Sad          1380
Energetic    1237
Calm          244
Name: mood, dtype: int64

In [None]:
df_songs_fix.shape

(4474, 8)

In [None]:
df_songs_fix.to_csv("audio_mood.csv")