In [100]:
import numpy as np
import pandas as pd
import torch
import csv
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset, random_split
torch.set_printoptions(edgeitems=5, precision=2, linewidth=75)

In [101]:
wine_path = "/home/bartek/dlwpt-code-master/data/data-unversioned/winequality-red.csv"
wine_numpy = np.loadtxt(wine_path, dtype=np.float32, delimiter=",",skiprows=1)

In [102]:
col_list = next(csv.reader(open(wine_path),delimiter=";"))

wineq=torch.from_numpy(wine_numpy)
data=wineq[:,:-1]
target=wineq[:,-1]
target=target.long()

target_onehot=torch.zeros(target.shape[0],10)
target_onehot=torch.zeros(target.shape[0],10)
target=target.unsqueeze(1)
target_onehot.scatter_(1,target,1)

data_mean=torch.mean(data,dim=0)
data_var=torch.var(data,dim=0)
data_normalized=(data-data_mean)/torch.sqrt(data_var)
data_normalized.shape

x = data_normalized.clone().detach().requires_grad_(True)
x = x.to(device='cuda')

y = target_onehot.clone().detach().requires_grad_(True)
y = y.to(device = 'cuda')

In [103]:
dataset = TensorDataset(x, y)
train_ds, val_ds = torch.utils.data.random_split(dataset,[int(wineq.shape[0]*0.85),wineq.shape[0]-int(wineq.shape[0]*0.85)])
batch_size=wineq.shape[0]
train_loader = DataLoader(train_ds,batch_size,shuffle=False)
val_loader = DataLoader(val_ds,batch_size)

In [104]:
class Net(nn.Module):
    def __init__(self):
        
        self.device = torch.device("cuda")
        super(Net, self).__init__()
        self.input_linear=nn.Linear(11,25)
        self.midle_linear1=nn.Linear(25,50)
        self.midle_linear9=nn.Linear(50,25)
        self.output_linear=nn.Linear(25,10)
    
    def forward(self, x):
        h_relu = self.input_linear(x)
        h_relu = torch.relu(self.midle_linear1(h_relu))
        h_relu = torch.relu(self.midle_linear9(h_relu))
        y_pred = self.output_linear(h_relu)

        return y_pred

In [105]:
model = Net().to(device="cuda")
criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=0.0001)

In [106]:
def train(n_epoch,train_loader, avg_y):
    for epoch in range(n_epoch):
        for wine_x, wine_y in train_loader:
            batch_size = wine_x.shape[0]
            y_pred = model(wine_x.view(batch_size,-1))
            loss = criterion(y_pred,wine_y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        y_pred_max=torch.argmax(y_pred, dim=1).unsqueeze(1).to(device='cuda')
        y_pred_onehot=torch.zeros(y_pred_max.shape[0],10).to(device='cuda')
        y_pred_onehot.scatter_(1,y_pred_max,1)
        bledy = sum(sum(torch.abs(y_pred_onehot-wine_y)))/2

        if epoch%20 == 0 or epoch == n_epoch-1 or epoch == 2 or epoch == 3 or epoch == 4:
            print("epoch = %d, loss = %d, blad = %d" % (epoch, int(loss.item()), int(bledy.item())))
        if epoch == n_epoch-1:
            print("avg loss = %f%%" % (int(loss.item())/y_pred_max.shape[0]/avg_y*100))
            third_tensor = torch.cat((y_pred_max, torch.argmax(wine_y,1).unsqueeze(1)), 1)
            third_tensor = third_tensor.to(device = 'cpu')
            third_tensor_np = third_tensor.numpy()
            third_tensor_df = pd.DataFrame(third_tensor_np)
            third_tensor_df.columns=["y_pred_max","torch.argmax(wine_y,1).unsqueeze(1)"]
            third_tensor_df.to_csv('/home/bartek/third_tensor.csv')

In [107]:
with torch.no_grad():
    def test(val_loader,avg_y):
        for wine_x, wine_y in val_loader:
            batch_size = wine_x.shape[0]
            y_pred = model(wine_x.view(batch_size,-1))
            loss = criterion(y_pred,wine_y)

        y_pred_max=torch.argmax(y_pred, dim=1).unsqueeze(1).to(device='cuda')
        y_pred_onehot=torch.zeros(y_pred_max.shape[0],10).to(device='cuda')
        y_pred_onehot.scatter_(1,y_pred_max,1)
        bledy = sum(sum(torch.abs(y_pred_onehot-wine_y)))/2

        print("loss = %d, blad = %d" % (int(loss.item()), int(bledy.item())))
        print("avg loss = %f%%" % (int(loss.item())/y_pred_max.shape[0]/avg_y*100))

In [108]:
n_epoch = 300
train(n_epoch,train_loader,avg_y)

epoch = 0, loss = 1860, blad = 1338
epoch = 2, loss = 1009, blad = 763
epoch = 3, loss = 925, blad = 712
epoch = 4, loss = 888, blad = 667
epoch = 20, loss = 784, blad = 583
epoch = 40, loss = 730, blad = 552
epoch = 60, loss = 716, blad = 527
epoch = 80, loss = 707, blad = 505
epoch = 100, loss = 700, blad = 506
epoch = 120, loss = 694, blad = 506
epoch = 140, loss = 689, blad = 505
epoch = 160, loss = 684, blad = 498
epoch = 180, loss = 679, blad = 503
epoch = 200, loss = 674, blad = 497
epoch = 220, loss = 670, blad = 496
epoch = 240, loss = 665, blad = 494
epoch = 260, loss = 661, blad = 492
epoch = 280, loss = 657, blad = 491
epoch = 299, loss = 653, blad = 490
avg loss = 8.525523%


In [109]:
test(val_loader,avg_y)

loss = 120, blad = 94
avg loss = 8.871505%
