In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.model_selection import train_test_split

import numpy as np
import pandas as pd
import sklearn
from encoding import encode

Load data

In [136]:
df = pd.read_csv("data/features.csv")

X = df.iloc[:, 2:-1]  # skip index and name

y = df["label"]  # 10 genres
y, code = encode(y)
y = np.array(y)

In [137]:
from sklearn.model_selection import train_test_split

#### NORMALIZE X ####
# Normalize so everything is on the same scale.

cols = X.columns
std_scaler = sklearn.preprocessing.StandardScaler()
np_scaled = std_scaler.fit_transform(X)

# new data frame with the new scaled data. 
X = pd.DataFrame(np_scaled, columns = cols)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30)

X_train = torch.FloatTensor(X_train.to_numpy())
X_test = torch.FloatTensor(X_test.to_numpy())
y_train = torch.LongTensor(y_train)
y_test = torch.LongTensor(y_test)

In [148]:
params = {
    "input_features": 29,
    "hidden_size": 100,
    "num_classes": 10,
    "epochs": 1000,
    "learning_rate": 0.01,
}

In [149]:
# creating the network
class Net(nn.Module):
    def __init__(self, params):
        super(Net, self).__init__()

        # hyperparameter
        self.params = params
        self.input_features = params["input_features"]
        self.hidden_size = params["hidden_size"]
        self.num_classes = params["num_classes"]
        self.epochs = params["epochs"]
        self.learning_rate = params["learning_rate"]

        # net
        self.fc1 = nn.Linear(self.input_features, self.hidden_size)
        self.fc2 = nn.Linear(self.hidden_size, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, self.num_classes)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.out(x)
        return x

    def fit(self, X_train, y_train, verbose=False):
        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)

        # training
        losses = []
        for i in range(self.epochs):
            y_pred = self.forward(X_train)
            loss = criterion(y_pred, y_train)
            losses.append(loss)
            if verbose:
                print(f"epoch: {i:2}  loss: {loss.item():10.8f}")

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

    def predict(self, x):
        self.eval()
        y_pred = self(x)
        return y_pred.argmax().item()

    def score(self, X_test, y_test):
        preds = []
        for x in X_test:
            preds.append(self.predict(x))

        y_pred = torch.LongTensor(preds)
        acc = torch.sum(y_pred == y_test) / X_test.shape[0]
        return float(acc)


In [150]:
model = Net(params)
model.fit(X_train, y_train)

In [151]:
model.score(X_test, y_test)

0.6499999761581421