In [None]:
import csv
import os
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch.utils.data import Dataset, DataLoader
from matplotlib import pyplot as plt

In [None]:
class datasetMusic(Dataset):
    def __init__(self, path=None, sliced=None) -> None:
        super().__init__()
        self.path = path
        self.sliced = sliced
        with open(self.path) as f:
            self.data = np.genfromtxt(self.path, delimiter=',')
            self.data = self.data[1:, :].astype(np.float64)
        self.len = len(self.data)
        
    def __getitem__(self, index: int):
        inData  = self.data[index][:-1]
        outData = self.data[index][-1:]

        if self.sliced != None and len(self.sliced) != 0:
            inData = inData[self.sliced]    

        return inData, outData

    def __len__(self) -> int:
        return self.len

In [None]:
temp    = 0
key     = 1
mode    = 2
loud    = 3
time_s  = 4
dura    = 5
dance   = 6
acoust  = 7
speech  = 8
live    = 9
energy  = 10
instru  = 11


sliced = [loud, dance, energy, speech, acoust]

In [None]:
train_ds = datasetMusic(path = 'train.csv', sliced=sliced)
train_dl = DataLoader(dataset=train_ds, shuffle=True, batch_size=256)

In [None]:
valid_ds = datasetMusic(path = 'valid.csv', sliced=sliced)
valid_dl = DataLoader(dataset=valid_ds, shuffle=False, batch_size=1)

In [None]:
class Net(nn.Module):
    def __init__(self, in_dim=11, layer=3, hiddenlayer=16) -> None:
        super().__init__()
        self.in_dim = in_dim
        self.layer  = layer
        self.hidden = hiddenlayer
        self.DRP    = nn.Dropout(0.1).double()
        self.SOF    = nn.Softmax(dim=1).double()
        self.RELU   = nn.ReLU().double()
        self.SIG    = nn.Sigmoid().double()

        self.PRELN0 = nn.Linear(in_dim, 1024).double()
        self.PREBN  = nn.BatchNorm1d(1024).double()
        self.PRELN1 = nn.Linear(1024, self.hidden).double()

        self.BN1    = nn.BatchNorm1d(self.hidden).double()
        self.LN1    = nn.Linear(self.hidden, 16).double()
        self.BN2    = nn.BatchNorm1d(16).double()
        self.LN2    = nn.Linear(16, self.hidden).double()

        self.FC     = nn.Linear(self.hidden, 1).double()

    def forward(self, x):

        x = self.PRELN0(x)
        x = self.PREBN(x)
        x = self.PRELN1(x)
        x = self.SIG(x)
        og = x

        for i in range(self.layer):
            y = self.LN1(self.BN1(x))
            y = self.DRP(y)
            y = self.RELU(y)
            y = self.LN2(self.BN2(y))
            y = self.SIG(y)
            x = x + y

        x  = x + og
        x = self.FC(x)
        return x

In [None]:
# basic setup
device      = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
lr          = 1e-5
model       = Net(in_dim=len(sliced), layer=6, hiddenlayer=16).to(device)
criterion   = nn.MSELoss()
optimizer   = optim.SGD(model.parameters(), lr=lr) 
epochs      = 10000

In [None]:
# training (regression for valence)
losses = []
model.train()
for epoch in range(epochs):
    for idx, data in enumerate(train_dl):
        inData  = data[0].to(device) 
        outData = data[1].to(device)

        preds   = model(inData)       
        loss    = criterion(outData, preds)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # if idx % 10 == 0:
        losses.append(loss)
        print(f'Epoch: {str(epoch):5s}, idx: {str(idx):3s}, loss: {loss:.4f}')

plt.plot(losses)
plt.show()

In [None]:
# validating
model.eval()
correct = 0
total = 0
threshold = 0.3
for idx, data in enumerate(valid_dl):
    inData  = data[0].to(device)
    outData = data[1].to(device)

    preds   = model(inData)
    if (outData > 0.75 or outData < 0.25):
        total += 1
    if (preds > 0.5 and outData > 0.75) or (preds < 0.5 and outData < 0.25):
        correct += 1
print(f'Accuracy in valid datasets: {(100 * correct / total):.2f} %')

correct = 0
total = 0
train_dl = DataLoader(dataset=train_ds, shuffle=False, batch_size=1)
for idx, data in enumerate(train_dl):
    inData  = data[0].to(device)
    outData = data[1].to(device)

    preds   = model(inData)

    if (outData > 0.75 or outData < 0.25):
        total += 1
    if (preds > 0.5 and outData > 0.75) or (preds < 0.5 and outData < 0.25):
        correct += 1

print(f'Accuracy in train datasets: {(100 * correct / total):.2f} %')
    

In [None]:
write_file = input('save model or not (y/n): ')
if write_file == 'y':
    torch.save(model, 'pretrained_Net.pth')

In [None]:
valence = np.genfromtxt('valid.csv', delimiter=',')[1:, -1]
print(f'Valence mean in valid set: {valence.mean()}')