In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
from torch.utils.data import DataLoader

### Get and process the dataset

In [None]:
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
wine = pd.read_csv(url, sep=';')

# remove some rows with outliers
data = wine[wine['total sulfur dioxide'] < 200]
data

In [None]:
for column in data.keys():
    d = pd.to_numeric(
        data[column])  # force to numeric (addresses some data-format issues)
    data[column] = (d - d.mean()) / d.std(ddof=1)

In [None]:
data

In [None]:
input_columns = wine.keys().drop("residual sugar")
nan_indexes = np.random.choice(range(len(data)), size=10, replace=False)

train_data = torch.tensor(data[input_columns].values).float()
train_labels = torch.tensor(data["residual sugar"].values).float()
train_labels[
    nan_indexes] = np.nan  # not really necessary, as we are going to disregard these indexes
non_nan_indexes = (~train_labels.isnan()).nonzero(as_tuple=True)[0]
print(non_nan_indexes)
train_data = train_data[non_nan_indexes, :]
train_labels = train_labels[non_nan_indexes, None]  # transform to matrix

test_data = torch.tensor(data[input_columns].values).float()
test_data = test_data[nan_indexes, :]
test_labels = torch.tensor(data["residual sugar"].values).float()
test_labels = test_labels[nan_indexes, None]  # transform to matrix

In [None]:
print(train_data.shape)
print(train_data)
print(train_labels.shape)
print(train_labels)
print(train_labels[nan_indexes])

In [None]:
print(test_data.shape)
print(test_data)
print(test_labels.shape)
print(test_labels)

### Convert to DataLoader

In [None]:
# convert into PyTorch Datasets
train_data = torch.utils.data.TensorDataset(train_data, train_labels)
test_data = torch.utils.data.TensorDataset(test_data, test_labels)

# translate into dataloader objects
batch_size = 16
train_loader = DataLoader(train_data,
                          batch_size=batch_size,
                          shuffle=True,
                          drop_last=True)
test_loader = DataLoader(test_data, batch_size=test_data.tensors[0].shape[0])

### Make the net

In [None]:
class Net(nn.Module):

    def __init__(self):
        super().__init__()
        self.input = nn.Linear(11, 16)
        self.fc1 = nn.Linear(16, 32)
        self.fc2 = nn.Linear(32, 16)
        self.output = nn.Linear(16, 1)

    def forward(self, x):
        x = nn.functional.relu(self.input(x))
        x = nn.functional.relu(self.fc1(x))
        x = nn.functional.relu(self.fc2(x))
        return self.output(x)

In [None]:
class NNPipeline():

    def __init__(self, train_loader, test_loader):
        self._net = Net()
        self._lossfun = nn.MSELoss()
        self._optimizer = torch.optim.Adam(self._net.parameters(), lr=.01)
        self._train_loader = train_loader
        self._test_loader = test_loader

    @property
    def net(self):
        return self._net

    @property
    def losses(self):
        return self._losses

    def train(self, num_epochs=100):
        self._losses = torch.zeros(num_epochs)

        for epochi in range(num_epochs):
            batch_loss = []
            for X, y in self._train_loader:
                # forward pass and loss
                y_hat = self._net(X)
                loss = self._lossfun(y_hat, y)

                # backprop
                self._optimizer.zero_grad()
                loss.backward()
                self._optimizer.step()

                # loss from this batch
                batch_loss.append(loss.item())

            # and get average losses across the batches
            self._losses[epochi] = np.mean(batch_loss)

    def predict(self, loader):
        with torch.no_grad():
            y_hat = self._net(loader.dataset.tensors[0])
        return y_hat.detach()

### Run the net

In [None]:
neural_network = NNPipeline(train_loader, test_loader)
neural_network.train(num_epochs=100)

In [None]:
train_results = neural_network.predict(train_loader)
test_results = neural_network.predict(test_loader)

In [None]:
neural_network.losses

In [None]:
print(train_results.T)
print(test_results.T)

### Plot the results

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(15, 5))

ax[0].plot(neural_network.losses)
ax[0].set_ylabel('Loss')
ax[0].set_xlabel('Epochs')
ax[0].set_title('Losses')

ax[1].plot(train_results, train_labels, 'ro')
ax[1].plot(test_results, test_labels, 'b^')
ax[1].set_title('Sugar predictions vs observations')
ax[1].set_xlabel('Predictions')
ax[1].set_ylabel('Observations')

training_correlations = np.corrcoef(train_results.T, train_labels.T)[1, 0]
test_correlations = np.corrcoef(test_results.T, test_labels.T)[1, 0]
ax[1].legend([
    f'Train r={training_correlations:.3f}', f'Test r={test_correlations:.3f}'
])

plt.show()