In [None]:
import torch
import matplotlib.pyplot as plt
import torch.nn as nn
import pandas as pd

from sklearn.model_selection import train_test_split
from ydata_profiling import ProfileReport
from torchinfo import summary

from IPython import display
display.set_matplotlib_formats('svg')

### Exploratory Data Analysis

In [None]:
df_songs = pd.read_csv(r'../data/data.csv')
# profile = ProfileReport(df_songs, title="Pandas Profiling Report")
# profile.to_widgets()

### Data Pre-Processing

In [None]:

# Drop useless columns (High Cardinality)
df_songs = df_songs.drop(['Artist Name', 'Track Name'], axis=1)

# Fill empty values in 'popularity' with the mean
df_songs['Popularity'] = df_songs['Popularity'].fillna(df_songs['Popularity'].mean())
df_songs['key'] = df_songs['key'].fillna(df_songs['key'].mode()[0])
df_songs['instrumentalness'] = df_songs['instrumentalness'].fillna(df_songs['instrumentalness'].mean())

# Normalization
df_norm = df_songs[['Popularity', 'danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_in min/ms']]
mean_norm = df_norm.mean()
std_norm = df_norm.std()
df_norm = (df_norm-mean_norm) / std_norm

# Categorical Features
df_categorical = df_songs[['key', 'mode', 'time_signature', 'Class']]

# Put everything together
df_preprocessed = df_norm.merge(df_categorical, how='inner', left_index=True, right_index=True)

### Train/Test Splitting

In [None]:
X = df_preprocessed.drop('Class', axis=1)
y = df_preprocessed['Class']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
print('Training Set: {0}, Test Set: {1}'.format(len(X_train), len(y_test)))

### Artificial Neural Network Modelling

In [None]:
# Connect to the GPU
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# Create PyTorch Tensors
inputs = torch.tensor(X_train.values).float()
outputs = torch.tensor(y_train.values)

print('Inputs: {0}, Outputs: {1}'.format(X_train.shape[1], len(y_train.unique())))

In [None]:
# Model Architecture
ann_genres = nn.Sequential(
    nn.Linear(14, 256), # Input Layer
    nn.ReLU(), # Activation Function
    nn.Linear(256, 256), # Hidden Layer
    nn.ReLU(), # Activation Function
    nn.Linear(256, 256), # Hidden Layer
    nn.ReLU(), # Activation Function
    nn.Linear(256, 256), # Hidden Layer
    nn.ReLU(), # Activation Function
    nn.Linear(256, 256), # Hidden Layer
    nn.ReLU(), # Activation Function
    nn.Linear(256, 11) # Output Layer
)

# Loss Function
loss_function = nn.CrossEntropyLoss()

# Optimizer
optimizer = torch.optim.SGD(ann_genres.parameters(), lr=0.1)

In [None]:
# Send the model and data to GPU
ann_genres.to(device)
inputs = inputs.to(device)
outputs = outputs.to(device)

In [None]:
inputs

In [None]:
summary(ann_genres, input_data=inputs, device='cuda:0')

### Training

In [None]:
num_epochs = 20000

# Initialize the Losses
losses = torch.zeros(num_epochs)
epoch_accuracy = []

# For each Epoch
for epoch_i in range(num_epochs):
    # Forward Pass
    y_hat = ann_genres(inputs)

    # Compute the Loss
    loss = loss_function(y_hat, outputs)
    losses[epoch_i] = loss

    # Backpropagation
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Compute Accuracy
    matches = torch.argmax(y_hat, axis=1) == outputs
    matches_numeric = matches.float()
    accuracy_pct = 100 * torch.mean(matches_numeric)
    epoch_accuracy.append(accuracy_pct)

# Final Forward Pass
predictions = ann_genres(inputs)

pred_labels =  torch.argmax(predictions, axis=1)
total_acc = 100*torch.mean((pred_labels == outputs).float())

In [None]:
# report accuracy
print('Final accuracy: %g%%' %total_acc)

fig,ax = plt.subplots(1,2,figsize=(13,4))

ax[0].plot(losses.detach())
ax[0].set_ylabel('Loss')
ax[0].set_xlabel('epoch')
ax[0].set_title('Losses')

# ax[1].plot(epoch_accuracy)
# ax[1].set_ylabel('accuracy')
# ax[1].set_xlabel('epoch')
# ax[1].set_title('Accuracy')
plt.show()