## Zrób to sam. Polecane przećwiczyć przed kolejnymi ćwiczeniami.

Spróbujmy przewidzieć ocenę wina na podstawie jego parametrów

In [491]:
import torch

In [492]:
seed = 42

In [493]:
device = torch.device("cpu")

torch.manual_seed(seed)
torch.use_deterministic_algorithms(True)

if torch.cuda.is_available():
    device = torch.device("cuda")
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    
    # Ensure deterministic behavior on CUDA
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
elif torch.backends.mps.is_available():
    device = torch.device("mps")
    torch.mps.manual_seed(seed)

device

device(type='mps')

In [494]:
import pandas as pd
df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv', delimiter=";")

In [495]:
df.shape

(4898, 12)

In [496]:
df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8,6
1,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5,6
2,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1,6
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6


In [497]:
# all quality scores

quals = df['quality'].unique()
quals.sort()

quals, len(quals)

(array([3, 4, 5, 6, 7, 8, 9]), 7)

#### ... Jakieś wstępne przetwarzanie danych?

In [498]:
import numpy as np

feature_columns = df.columns[:-1]
label_column = df.columns[-1]

x = df[feature_columns].values
y = df[label_column].values

x_mean = np.mean(x, axis=0)
x_std = np.std(x, axis=0)

x = (x - x_mean) / x_std

df[feature_columns] = x

df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,0.172097,-0.08177,0.21328,2.821349,-0.035355,0.569932,0.744565,2.331512,-1.246921,-0.349184,-1.393152,6
1,-0.657501,0.215896,0.048001,-0.944765,0.147747,-1.253019,-0.149685,-0.009154,0.740029,0.001342,-0.824276,6
2,1.475751,0.017452,0.543838,0.100282,0.193523,-0.312141,-0.973336,0.358665,0.475102,-0.436816,-0.336667,6
3,0.409125,-0.478657,-0.117278,0.415768,0.559727,0.687541,1.121091,0.525855,0.01148,-0.787342,-0.499203,6
4,0.409125,-0.478657,-0.117278,0.415768,0.559727,0.687541,1.121091,0.525855,0.01148,-0.787342,-0.499203,6


In [499]:
# random state is a seed value
train = df.sample(frac=0.8, random_state=seed)
test=df.drop(train.index)

In [500]:
import torch.utils.data as data

In [None]:
train_dataset = data.TensorDataset(torch.from_numpy(train.values[:,:-1]),torch.from_numpy(train.values[:, -1]))
test_dataset = data.TensorDataset(torch.from_numpy(test.values[:, :-1]), torch.from_numpy(test.values[:, -1]))

next(iter(train_dataset))

(tensor([-1.0130,  0.1167,  0.6265,  0.8693,  0.1020,  1.1580,  0.2504, -0.1095,
         -0.6508,  0.8777,  0.3677], dtype=torch.float64),
 tensor(7., dtype=torch.float64))

## Neural Network Architecture

In [502]:
import torch.nn as nn

In [503]:
class WineClassifier(nn.Module):
    def __init__(self, input_size = 11, hidden_size=32, output_size = 7) -> None:
        super().__init__()
        self.activation = nn.ReLU()

        self.linear1 = nn.Linear(in_features=input_size, out_features=16)
        self.linear2 = nn.Linear(in_features=16, out_features=32)
        self.linear3 = nn.Linear(in_features=32, out_features=20)
        self.linear4 = nn.Linear(in_features=20, out_features=16)


        self.linear5 = nn.Linear(in_features=16, out_features=output_size)
    
    def forward(self, x):
        x = self.linear1(x)
        x = self.activation(x)

        x = self.linear2(x)
        x = self.activation(x)

        x = self.linear3(x)
        x = self.activation(x)

        x = self.linear4(x)
        x = self.activation(x)
        
        x = self.linear5(x)

        return x

In [504]:
model = WineClassifier()
model.to(device)

WineClassifier(
  (activation): ReLU()
  (linear1): Linear(in_features=11, out_features=16, bias=True)
  (linear2): Linear(in_features=16, out_features=32, bias=True)
  (linear3): Linear(in_features=32, out_features=20, bias=True)
  (linear4): Linear(in_features=20, out_features=16, bias=True)
  (linear5): Linear(in_features=16, out_features=7, bias=True)
)

## Training

In [505]:
import torch.nn.functional as F

In [506]:
learning_rate = 0.001
epochs = 30

In [507]:
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
loss = nn.CrossEntropyLoss()

In [508]:
model.train()

for epoch in range(epochs):
    total_loss = 0.0
    batch_count = 0

    correct_preds = 0
    total_samples = 0

    for train_features, train_labels in train_dataset:
        train_labels = train_labels.to(torch.long) - 3

        if device.type == "mps":
            train_features = train_features.to(dtype=torch.float32)
            train_labels = train_labels.to(dtype=torch.float32)

        train_features = train_features.to(device)
        train_labels = train_labels.to(device)

        preds = model(train_features)
        output = loss(preds, train_labels)

        optimizer.zero_grad()
        output.backward()

        optimizer.step()

        total_loss += output.item()
        batch_count += 1

        preds_classes = preds.unsqueeze(0).argmax(dim=1)
        correct_preds += (preds_classes == train_labels).sum().item()
        total_samples += train_labels.unsqueeze(0).size(0)

    avg_loss = total_loss / batch_count
    accuracy = 100 * correct_preds / total_samples
    
    print(f"Epoch {epoch + 1}, Loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%")

Epoch 1, Loss: 1.2368, Accuracy: 47.01%
Epoch 2, Loss: 1.0917, Accuracy: 52.42%
Epoch 3, Loss: 1.0639, Accuracy: 53.19%
Epoch 4, Loss: 1.0469, Accuracy: 54.21%
Epoch 5, Loss: 1.0363, Accuracy: 54.95%
Epoch 6, Loss: 1.0269, Accuracy: 55.51%
Epoch 7, Loss: 1.0183, Accuracy: 56.28%
Epoch 8, Loss: 1.0113, Accuracy: 56.53%
Epoch 9, Loss: 1.0044, Accuracy: 56.92%
Epoch 10, Loss: 0.9984, Accuracy: 57.12%
Epoch 11, Loss: 0.9932, Accuracy: 57.22%
Epoch 12, Loss: 0.9863, Accuracy: 57.53%
Epoch 13, Loss: 0.9803, Accuracy: 57.55%
Epoch 14, Loss: 0.9744, Accuracy: 58.17%
Epoch 15, Loss: 0.9684, Accuracy: 58.30%
Epoch 16, Loss: 0.9631, Accuracy: 58.65%
Epoch 17, Loss: 0.9573, Accuracy: 58.81%
Epoch 18, Loss: 0.9523, Accuracy: 59.11%
Epoch 19, Loss: 0.9457, Accuracy: 59.09%
Epoch 20, Loss: 0.9393, Accuracy: 59.83%
Epoch 21, Loss: 0.9342, Accuracy: 60.29%
Epoch 22, Loss: 0.9291, Accuracy: 60.08%
Epoch 23, Loss: 0.9222, Accuracy: 60.59%
Epoch 24, Loss: 0.9181, Accuracy: 60.41%
Epoch 25, Loss: 0.9127, A

## Evaluation

In [510]:
model.eval()
true_preds, num_preds = 0, 0  # Integers, not floats

with torch.no_grad():
    for test_features, test_labels in test_dataset:
        test_labels = (test_labels - 3).to(torch.long).unsqueeze(0)

        if device.type == "mps":
            test_features = test_features.to(dtype=torch.float32)
        
        test_features = test_features.to(device)
        test_labels = test_labels.to(device)

        preds = model(test_features)
        preds_classes = preds.argmax(dim=0)

        true_preds += (preds_classes == test_labels).sum().item()
        num_preds += test_labels.size(0)

acc = true_preds / num_preds
print(f"Accuracy of the model: {100.0 * acc:.2f}%")

Accuracy of the model: 55.10%
