# Music Genre Prediction using Logistic Regression

#### Loading and Displaying data

In [45]:
import pandas as pd
from sklearn import preprocessing
import numpy as np

In [46]:
df = pd.read_csv("Data/train.csv", sep = r',', skipinitialspace = True)

df.head()

Unnamed: 0,Artist Name,Track Name,Popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_in min/ms,time_signature,Class
0,Bruno Mars,That's What I Like (feat. Gucci Mane),60.0,0.854,0.564,1.0,-4.964,1,0.0485,0.0171,,0.0849,0.899,134.071,234596.0,4,5
1,Boston,Hitch a Ride,54.0,0.382,0.814,3.0,-7.23,1,0.0406,0.0011,0.00401,0.101,0.569,116.454,251733.0,4,10
2,The Raincoats,No Side to Fall In,35.0,0.434,0.614,6.0,-8.334,1,0.0525,0.486,0.000196,0.394,0.787,147.681,109667.0,4,6
3,Deno,Lingo (feat. J.I & Chunkz),66.0,0.853,0.597,10.0,-6.528,0,0.0555,0.0212,,0.122,0.569,107.033,173968.0,4,5
4,Red Hot Chili Peppers,Nobody Weird Like Me - Remastered,53.0,0.167,0.975,2.0,-4.279,1,0.216,0.000169,0.0161,0.172,0.0918,199.06,229960.0,4,10


#### Cleaning data

In [47]:
# Replacing NaN with default valuesb
df = df.fillna(value = -1)

# Dropping columns
del df["Track Name"]

# Typecasting
df["Artist Name"] = df["Artist Name"].astype(str)

# Initializing Encoder
number = preprocessing.LabelEncoder()

# Encoding
df["Artist Name"] = number.fit_transform(df["Artist Name"])

df.head()

Unnamed: 0,Artist Name,Popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_in min/ms,time_signature,Class
0,1182,60.0,0.854,0.564,1.0,-4.964,1,0.0485,0.0171,-1.0,0.0849,0.899,134.071,234596.0,4,5
1,1092,54.0,0.382,0.814,3.0,-7.23,1,0.0406,0.0011,0.00401,0.101,0.569,116.454,251733.0,4,10
2,7899,35.0,0.434,0.614,6.0,-8.334,1,0.0525,0.486,0.000196,0.394,0.787,147.681,109667.0,4,6
3,1987,66.0,0.853,0.597,10.0,-6.528,0,0.0555,0.0212,-1.0,0.122,0.569,107.033,173968.0,4,5
4,6283,53.0,0.167,0.975,2.0,-4.279,1,0.216,0.000169,0.0161,0.172,0.0918,199.06,229960.0,4,10


In [48]:
df.shape

(17996, 16)

#### Defining X and y and Splitting data

In [56]:
from sklearn.model_selection import train_test_split

# Columns used as predictors
X = df.drop(["Class"], axis = 1).values

y = df["Class"].values

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 0, test_size = 0.2)

#### Scaling data

In [57]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler.fit(X_train)

X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

#### One-vs-One

In [58]:
# from sklearn.linear_model import LogisticRegression
# from sklearn.multiclass import OneVsOneClassifier

# model = OneVsOneClassifier(LogisticRegression())
# model.fit(X_train, y_train)

# print(model.score(X_test, y_test))

In [63]:

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras import optimizers


import tensorflow.keras.backend as K
K.clear_session()

nin = X_train.shape[1]
# nh = (int)((2/3)*nin + nout)
nh = 0.5*(nin+nout)
nout = np.max(y_train) + 1
model = Sequential()
model.add(Dense(units=nh, input_shape=(nin,), activation='sigmoid',name='hidden'))
model.add(Dense(units=nout, activation='softmax', name='output'))

opt = optimizers.Adam(learning_rate=1e-3) 
model.compile(optimizer=opt,
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

hist = model.fit(X_train, y_train, epochs=1000, batch_size=100, validation_data=(X_test,y_test), verbose=False)
print(hist.history['val_accuracy'])
print(np.max(hist.history['val_accuracy']))
tr_accuracy = hist.history['accuracy']
val_accuracy = hist.history['val_accuracy']


[0.2638888955116272, 0.289166659116745, 0.3105555474758148, 0.36694443225860596, 0.40416666865348816, 0.42027777433395386, 0.42916667461395264, 0.43638888001441956, 0.4394444525241852, 0.44388890266418457, 0.4483333230018616, 0.4516666531562805, 0.45722222328186035, 0.4625000059604645, 0.4697222113609314, 0.47333332896232605, 0.4797222316265106, 0.48444443941116333, 0.48472222685813904, 0.49138888716697693, 0.4933333396911621, 0.4952777922153473, 0.4983333349227905, 0.496111124753952, 0.4977777898311615, 0.4975000023841858, 0.5016666650772095, 0.5, 0.5019444227218628, 0.5005555748939514, 0.5008333325386047, 0.503333330154419, 0.5022222399711609, 0.5052777528762817, 0.5063889026641846, 0.507777750492096, 0.507777750492096, 0.5069444179534912, 0.5088889002799988, 0.5074999928474426, 0.5086110830307007, 0.5102777481079102, 0.5091666579246521, 0.5083333253860474, 0.5102777481079102, 0.5108333230018616, 0.5108333230018616, 0.5105555653572083, 0.511388897895813, 0.5111111402511597, 0.5116666

In [62]:
print(tr_accuracy[-1])
print(val_accuracy[-1])

0.4868713617324829
0.4983333349227905


In [38]:
df = pd.read_csv("Data/test.csv", sep = r',', skipinitialspace = True)
df.head(5)

Unnamed: 0,Artist Name,Track Name,Popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_in min/ms,time_signature
0,David Bowie,Space Oddity - 2015 Remaster,73.0,0.31,0.403,,-13.664,1,0.0326,0.0726,9.3e-05,0.139,0.466,134.48,318027.0,4
1,Crimson Sun,Essence of Creation,34.0,0.511,0.955,1.0,-5.059,1,0.129,0.0004,9e-06,0.263,0.291,151.937,220413.0,4
2,P!nk,Raise Your Glass,78.0,0.7,0.709,7.0,-5.006,1,0.0839,0.0048,,0.0289,0.625,122.019,202960.0,4
3,Shawn Mendes,Wonder,80.0,0.333,0.637,1.0,-4.904,0,0.0581,0.131,1.8e-05,0.149,0.132,139.898,172693.0,4
4,Backstreet Boys,Helpless When She Smiles - Radio Version,48.0,0.393,0.849,11.0,-4.114,1,0.0459,0.00421,,0.162,0.222,74.028,4.093117,4


In [39]:
# Replacing NaN with default valuesb
df = df.fillna(value = -1)

# Dropping columns
del df["Track Name"]

# Typecasting
df["Artist Name"] = df["Artist Name"].astype(str)

# Initializing Encoder
number = preprocessing.LabelEncoder()

# Encoding
df["Artist Name"] = number.fit_transform(df["Artist Name"])

df.head()

Unnamed: 0,Artist Name,Popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_in min/ms,time_signature
0,1016,73.0,0.31,0.403,-1.0,-13.664,1,0.0326,0.0726,9.3e-05,0.139,0.466,134.48,318027.0,4
1,912,34.0,0.511,0.955,1.0,-5.059,1,0.129,0.0004,9e-06,0.263,0.291,151.937,220413.0,4
2,3165,78.0,0.7,0.709,7.0,-5.006,1,0.0839,0.0048,-1.0,0.0289,0.625,122.019,202960.0,4
3,3749,80.0,0.333,0.637,1.0,-4.904,0,0.0581,0.131,1.8e-05,0.149,0.132,139.898,172693.0,4
4,363,48.0,0.393,0.849,11.0,-4.114,1,0.0459,0.00421,-1.0,0.162,0.222,74.028,4.093117,4


In [42]:
Xts = df.values
yhat = model.predict(Xts)
accuracy = np.mean(yhat == )