# Neural Network Lab
A slimmer notebook to experiment with neural network parameters

## Copy processing from master notebook

In [1]:
# import standard libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
# import data
df = pd.read_csv('data/data.csv')

In [3]:
to_drop = ['id', 'release_date', 'name', 'artists', 'instrumentalness']
df.drop(to_drop, axis=1, inplace=True)

In [4]:
df.drop(df[df['popularity']==0].index, inplace=True)
df.drop(df[df['tempo']==0].index, inplace=True)
df.drop(df[df['duration_ms']>1000000].index, inplace=True)
df.reset_index(inplace=True, drop=True)

In [5]:
# one hot encode
key_names = {0:'C', 1:'C#/Db', 2:'D', 3:'D#/Eb',
             4:'E', 5:'F', 6:'F#/Gb', 7:'G',
             8:'G#/Ab', 9:'A', 10:'A#/Bb', 11:'B'}
df['key'] = df['key'].map(lambda x: key_names[x])

category_columns = ['explicit', 'key', 'mode']
category_df = pd.get_dummies(df[category_columns], drop_first=True)

df.drop(category_columns, axis=1, inplace=True)
df = pd.concat([df, category_df], axis=1)

In [6]:
# train test split
from sklearn.model_selection import train_test_split
X = df.drop('popularity', axis=1)
y = df.popularity
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [7]:
# standardization
from sklearn.preprocessing import StandardScaler

# separate categorical and continuous columns so continuous variables can be scaled
one_hot_columns = category_df.columns

# training data
X_train_cat = X_train[one_hot_columns].reset_index(drop=True)
X_train_cont = X_train.drop(one_hot_columns, axis=1).reset_index(drop=True)

# testing data
X_test_cat = X_test[one_hot_columns].reset_index(drop=True)
X_test_cont = X_test.drop(one_hot_columns, axis=1).reset_index(drop=True)

In [8]:
# fit-transform scaler to training data and transform testing data. convert to pd dataframe
std = StandardScaler()
X_train_scaled = std.fit_transform(X_train_cont)
X_train_scaled = pd.DataFrame(X_train_scaled, columns=X_train_cont.columns)

X_test_scaled = std.transform(X_test_cont)
X_test_scaled = pd.DataFrame(X_test_scaled, columns=X_test_cont.columns)

In [9]:
# remerge scaled continuous variables with categorical variables
X_train = pd.concat([X_train_scaled, X_train_cat], axis=1)
X_test = pd.concat([X_test_scaled, X_test_cat], axis=1)

## Neural Network using Keras

In [11]:
from keras import models
from keras import layers
from keras import optimizers

In [12]:
model = models.Sequential()

In [13]:
model.add(layers.Dense(10, activation='tanh', input_shape=(23,)))
model.add(layers.Dense(8, activation='tanh'))
model.add(layers.Dense(5, activation='tanh'))
model.add(layers.Dense(2, activation='tanh'))
model.add(layers.Dense(1, activation='relu'))

In [14]:
model.compile(optimizer='SGD', loss='mse')

In [15]:
model.fit(X_train, y_train/100, epochs=250, batch_size=5, validation_split=0.25)

Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 10/250
Epoch 11/250
Epoch 12/250
Epoch 13/250
Epoch 14/250
Epoch 15/250
Epoch 16/250
Epoch 17/250
Epoch 18/250
Epoch 19/250
Epoch 20/250
Epoch 21/250
Epoch 22/250
Epoch 23/250
Epoch 24/250
Epoch 25/250
Epoch 26/250
Epoch 27/250
Epoch 28/250
Epoch 29/250
Epoch 30/250
Epoch 31/250
Epoch 32/250
Epoch 33/250
Epoch 34/250
 2869/14973 [====>.........................] - ETA: 5s - loss: 0.0238

KeyboardInterrupt: 

In [None]:
from sklearn.metrics import r2_score, mean_squared_error

In [None]:
r2_score(y_test/100 ,model.predict(X_test))

In [None]:
mean_squared_error(y_test/100, model.predict(X_test), squared=False)

In [None]:
y_train/100