In [127]:
import pandas as pd
import numpy as np
import torch
from torch import nn
from torch.autograd import Variable
from torch.utils.tensorboard import SummaryWriter

from util.encoding import encode

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [146]:
# tensorboard
writer = SummaryWriter()

In [164]:
df = pd.read_csv("data/features.csv")

In [165]:
X = df.iloc[:, 2:-1]  # skip index and name

y = df["label"]  # 10 genres
y, code = encode(y)
y = np.array(y)

#### NORMALIZE X ####

cols = X.columns
std_scaler = StandardScaler()
np_scaled = std_scaler.fit_transform(X)

# new data frame with the new scaled data.
X = pd.DataFrame(np_scaled, columns=cols)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=42)


In [166]:
# convert to tensors
X_train_t = Variable(torch.FloatTensor(X_train.to_numpy()))
X_val_t = Variable(torch.FloatTensor(X_val.to_numpy()))
X_test_t = Variable(torch.FloatTensor(X_test.to_numpy()))

y_train_t = Variable(torch.LongTensor(y_train))
y_val_t = Variable(torch.LongTensor(y_val))
y_test_t = Variable(torch.LongTensor(y_test))


In [167]:
# reshaping to rows, timestamps, features
def transform(X):
    return torch.reshape(X, (X.shape[0], 1, X.shape[1]))


X_train_t_final = transform(X_train_t)
X_val_t_final = transform(X_val_t)
X_test_t_final = transform(X_test_t)


In [174]:
class LSTMNet(nn.Module):
    def __init__(self, num_classes, input_size, hidden_size, num_layers, seq_length, dropout=0.2):
        super(LSTMNet, self).__init__()
        self.num_classes = num_classes  # number of classes
        self.num_layers = num_layers  # number of layers
        self.input_size = input_size  # input size
        self.hidden_size = hidden_size  # hidden state
        self.seq_length = seq_length  # sequence length

        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
        )  # lstm
        self.fc_1 = nn.Linear(hidden_size, 128)  # fully connected 1
        self.fc = nn.Linear(128, num_classes)  # fully connected last layer
        self.dropout = nn.Dropout(dropout)
        self.relu = nn.ReLU()

    def forward(self, x):
        h_0 = Variable(
            torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        )  # hidden state
        c_0 = Variable(
            torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        )  # internal state
        x = self.dropout(x)
        # Propagate input through LSTM
        output, (hn, cn) = self.lstm(
            x, (h_0, c_0)
        )  # lstm with input, hidden, and internal state
        hn = hn.view(-1, self.hidden_size)  # reshaping the data for Dense layer next
        out = self.relu(hn)
        out = self.fc_1(out)  # first Dense
        out = self.relu(out)  # relu
        out = self.fc(out)  # Final Output
        return out

    def fit(self, X_train, y_train, X_val, y_val, learning_rate=0.01, verbose=False):
        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(self.parameters(), lr=learning_rate)

        self.train()
        for epoch in range(num_epochs):
            self.train()
            # train data
            outputs = self.forward(X_train)  # forward pass
            optimizer.zero_grad()  # caluclate the gradient, manually setting to 0

            # obtain the loss function
            loss = criterion(outputs, y_train)
            writer.add_scalar("loss/train", loss, epoch)
            loss.backward()  # calculates the loss of the loss function

            optimizer.step()  # improve from loss, i.e backprop
            if epoch % 100 == 0 and verbose:
                print("Epoch: %d, loss: %1.5f" % (epoch, loss.item()))

            # validation data
            out_val = self.forward(X_val)  # forward pass

            # obtain the loss function
            loss_val = criterion(out_val, y_val)
            writer.add_scalar("loss/val", loss_val, epoch)

        
        writer.flush()
        writer.close()

    def predict(self, X_test):
        self.eval()
        preds_t = self(X_test)
        y_pred = torch.argmax(preds_t, axis=1)
        return y_pred.detach().numpy()

    def score(self, X_test, y_test):
        self.eval()
        preds_t = self(X_test)
        y_pred = torch.argmax(preds_t, axis=1)
        y_test_t = torch.Tensor(y_test)
        acc = torch.sum(y_pred == y_test_t) / y_pred.shape[0]
        return float(acc)


In [187]:
# hyperparamaters
num_epochs = 2000
learning_rate = 0.01
dropout = 0.3

input_size = 29  # number of features
hidden_size = 12  # number of features in hidden state
num_layers = 1  # number of stacked lstm layers

num_classes = 10  # number of output classes


In [188]:
model = LSTMNet(
    num_classes, input_size, hidden_size, num_layers, X_train_t_final.shape[1], dropout=dropout
)
model.fit(X_train_t_final, y_train_t, X_val_t_final, y_val_t, learning_rate=learning_rate, verbose=True)


Epoch: 0, loss: 2.30422
Epoch: 100, loss: 1.02639
Epoch: 200, loss: 0.89935
Epoch: 300, loss: 0.82396
Epoch: 400, loss: 0.75610
Epoch: 500, loss: 0.76413
Epoch: 600, loss: 0.70796
Epoch: 700, loss: 0.66902
Epoch: 800, loss: 0.70074
Epoch: 900, loss: 0.66837
Epoch: 1000, loss: 0.69433
Epoch: 1100, loss: 0.68270
Epoch: 1200, loss: 0.62629
Epoch: 1300, loss: 0.64127
Epoch: 1400, loss: 0.59341
Epoch: 1500, loss: 0.63627
Epoch: 1600, loss: 0.64593
Epoch: 1700, loss: 0.61435
Epoch: 1800, loss: 0.55676
Epoch: 1900, loss: 0.60808


In [121]:
y_pred = model.predict(X_test_t_final)
y_pred

array([6, 9, 0, 1, 5, 1, 8, 3, 5, 8, 5, 0, 0, 9, 6, 1, 9, 1, 3, 6, 6, 0,
       1, 8, 9, 7, 8, 6, 8, 0, 0, 8, 8, 6, 4, 5, 3, 7, 6, 6, 5, 8, 4, 6,
       1, 8, 8, 9, 5, 7, 5, 1, 1, 0, 8, 2, 5, 9, 0, 1, 5, 1, 6, 3, 0, 3,
       4, 3, 1, 1, 7, 3, 0, 6, 5, 4, 0, 6, 8, 9, 7, 8, 3, 6, 7, 8, 7, 7,
       4, 2, 3, 6, 5, 5, 0, 4, 3, 7, 0, 6, 0, 2, 5, 9, 1, 6, 4, 9, 6, 6,
       7, 3, 3, 3, 5, 4, 2, 4, 0, 5, 5, 9, 9, 3, 1, 4, 4, 0, 0, 8, 0, 0,
       4, 4, 5, 0, 7, 3, 6, 3, 5, 9, 3, 4, 8, 5, 4, 8, 4, 1, 5, 3, 7, 7,
       2, 9, 4, 8, 1, 0, 9, 1, 1, 0, 0, 5, 9, 9, 4, 8, 5, 8, 1, 7, 8, 3,
       2, 7, 1, 0, 3, 1, 2, 9, 1, 2, 0, 7, 4, 5, 2, 3, 3, 8, 3, 6, 4, 5,
       4, 8, 8, 1, 1, 3, 9, 8, 5, 1, 5, 3, 9, 1, 5, 4, 9, 8, 9, 4, 1, 1,
       6, 0, 3, 7, 1, 9, 9, 8, 8, 0, 4, 6, 3, 0, 8, 7, 8, 0, 2, 3, 9, 2,
       2, 7, 0, 3, 6, 9, 2, 2, 9, 9, 1, 2, 2, 3, 6, 4, 3, 6, 2, 8, 8, 4,
       2, 4, 4, 8, 3, 6, 5, 5, 5, 0, 2, 7, 2, 3, 6, 2, 0, 8, 0, 6, 9, 6,
       7, 5, 2, 1, 2, 3, 1, 9, 5, 4, 2, 7, 4, 9])

In [189]:
model.score(X_test_t_final, y_test)

0.7200000286102295