# NN Classification

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

## Pre-processing

In [2]:
file_path = "./data/features_30_sec.csv"

Load the dataset:

In [3]:
df = pd.read_csv(file_path)

Set the correct datatypes:

In [21]:
dtypes = {col:np.float32 for col in df.columns}
dtypes["filename"] = "string"
dtypes["length"] = np.int32
dtypes["label"] = "category"
df = df.astype(dtypes)

## Train-validation split

In [23]:
y = df["label"].to_numpy()

In [24]:
X = df.drop(["filename", "label"], axis=1).to_numpy()

Shuffle the data:

In [26]:
np.random.seed(0)
random_idxs = np.arange(len(y))
np.random.shuffle(random_idxs)

# Reorder the indexes
X = X[random_idxs]
y = y[random_idxs]

Split the data into training set and validation set:

In [32]:
# Split threshold
num_train = int(len(y) * 0.8)

# Training
X_train = X[:num_train]
y_train = y[:num_train]

# Validation
X_valid = X[num_train:]
y_valid = y[num_train:]

## Data Normalization

Mean and the standard deviation should be computed w.r.t. the training set only:

In [50]:
mean =  X_train.mean()
std = X_train.std()

In [51]:
X_train_norm = (X_train - mean) / std

Check the results:

In [52]:
print(f"Mean: {X_train_norm.mean():.3f}")
print(f"Standard deviation: {X_train_norm.std()}")

Mean: 0.000
Standard deviation: 1.0


# Neural Network setup

In [58]:
X.shape, y.shape

((1000, 58), (1000,))

We have 58 input features, therefore the input layer will consist in 58 neurons.

There is only one output, therefore only one output neuron.

Given that this is a classification problem, we will also add a softmax layer.

# Training

To train the NN, we will use AdaGrad or RMSProp.

In [54]:
# Hyper-parameters