In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset, random_split
from matplotlib import pyplot as plt
%matplotlib inline

In [2]:

filename = "./data/wine.csv"
df = pd.read_csv(filename)
df.sample(frac=1).reset_index(drop=True) # Shuffle dataframe
df.head()

Unnamed: 0,index,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [3]:
# drop col index
df = df.drop(['index'],axis = 1)

In [4]:
input_cols = list(df.columns)[:-1]
output_cols = ['quality']

def dataframe_to_arrays(df):
    # Make a copy of the original dataframe
    df1 = df.copy(deep=True)
    # Extract input & outupts as numpy arrays
    inputs_array = df1[input_cols].to_numpy()
    targets_array = df1[output_cols].to_numpy()
    return inputs_array, targets_array

inputs_array, targets_array = dataframe_to_arrays(df)

inputs = torch.Tensor(inputs_array)
targets = torch.Tensor(targets_array)

print(inputs_array.shape,targets_array.shape)

(6497, 11) (6497, 1)


In [5]:
inputs = torch.Tensor(inputs_array)
targets = torch.Tensor(targets_array)
dataset = TensorDataset(inputs, targets)

In [6]:

#dataset = TensorDataset(torch.tensor(X_train), torch.tensor(y_train))
#test_dataset = TensorDataset(torch.tensor(X_test), torch.tensor(y_test))


In [7]:
num_classes = len(pd.unique(df['quality']))
input_size = len(input_cols)

In [8]:
size = len(df)    # 6497
split = 0.2                     
val_size = int(size*split)      # 1299
train_size = size - val_size    # 5198
 
train, val = random_split(dataset, [train_size, val_size])
print(size,val_size,train_size,dataset)

6497 1299 5198 <torch.utils.data.dataset.TensorDataset object at 0x7ff015f65a50>


In [9]:
batch_size = 20
train_loader = DataLoader(train, batch_size, shuffle = True)
val_loader = DataLoader(val, batch_size)
#test_loader = DataLoader(test_dataset, batch_size)

nb_hidden_neurons = 10
nb_classes = len(pd.unique(df['quality']))

In [10]:
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        #layers of the model
        self.l1 = nn.Linear(input_size, nb_hidden_neurons)
        self.l2 = nn.Linear(nb_hidden_neurons, nb_classes)
        self.flat = nn.Flatten()
 
    def forward(self, x):
        #one forward pass
        out = self.flat(x)
        out = F.relu(self.l1(out))
        out = self.l2(out)
        return out
 
    def training_step(self, batch):
        wines, labels  = batch
        out = self(wines)
        loss = F.l1_loss(out, labels) 
        return loss
 
    def validation_step(self, batch):
        wines, labels = batch
        out = self(wines)
        loss = F.cross_entropy(out, labels)
        _, pred = torch.max(out, 1)
        accuracy = torch.tensor(torch.sum(pred==labels).item()/len(pred))
        return [loss.detach(), accuracy.detach()]

In [11]:
def evaluate(model, loader):
    outputs = [model.validation_step(batch) for batch in loader]
    outputs = torch.tensor(outputs).T
    loss, accuracy = torch.mean(outputs, dim=1)
    return {"loss": loss.item(), "accuracy": accuracy.item()}

In [12]:
def fit(model, train_loader, val_loader, epochs, lr, optimizer_function = torch.optim.SGD):
    history = []
    optimizer = optimizer_function(model.parameters(), lr)
    for epoch in range(epochs):
        print("Epoch ", epoch)
        #Train
        for batch in train_loader:
            loss = model.training_step(batch)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
        #Validate
        for batch in val_loader:
            result = evaluate(model, val_loader)
        print("loss: ", result["loss"], "accuracy: ", result["accuracy"], "\n")
        history.append(result)
         
    return history

In [13]:
model = Network()
model.parameters

<bound method Module.parameters of Network(
  (l1): Linear(in_features=11, out_features=10, bias=True)
  (l2): Linear(in_features=10, out_features=7, bias=True)
  (flat): Flatten(start_dim=1, end_dim=-1)
)>

In [14]:
history = fit(model, train_loader, val_loader, 10, 0.1)
evaluate(model, test_loader)

Epoch  0


RuntimeError: 1D target tensor expected, multi-target not supported