In [718]:
import numpy as np

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.datasets import load_diabetes

import matplotlib.pyplot as plt

# Датасет
Загружаем датасет Diabetes dataset. Он содержит 9 числовых признаков и 1 категориальный, таргетная переменная - количественная мера прогрессирования заболевания через год после начального уровня. Предстоит задача регрессии, надо эту переменную по имеющимся признакам предсказать.


In [719]:
dataset = load_diabetes()
dataset.feature_names

['age', 'sex', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']

In [720]:
len(dataset['data'])

442

Разбиваем на train/test выборки.

In [721]:
X_train, X_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=0.2)

Создаём torch-датасет, чтобы подготовить данные к обучению нейросети.

In [722]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class DiabetDataset(Dataset):
    def __init__(self, tensor_X, tensor_y, device):
        self.inputs = tensor_X.to(device)
        self.targets = tensor_y.to(device)
    def __len__(self):
        return len(self.targets)
    def __getitem__(self, idx):
        return self.inputs[idx], self.targets[idx]

diabet_train = DiabetDataset(torch.Tensor(X_train), torch.Tensor(y_train), device)
diabet_test = DiabetDataset(torch.Tensor(X_test), torch.Tensor(y_test), device)

diabet_train_dataloader = DataLoader(diabet_train, batch_size=32, shuffle=True)
diabet_test_dataloader = DataLoader(diabet_test, batch_size=32, shuffle=True)

Строим простую полносвязную нейронную сеть.

In [723]:
class SimpleNet(nn.Module):
    def __init__(self, in_features=10, out_features=1):
        super(SimpleNet, self).__init__()

        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.2)

        self.linear1 = nn.Linear(in_features=in_features, out_features=64)
        self.linear2 = nn.Linear(in_features=64, out_features=64)
        self.linear3 = nn.Linear(in_features=64, out_features=64)
        self.linear4 = nn.Linear(in_features=64, out_features=64)
        self.linear5 = nn.Linear(in_features=64, out_features=1)
        
    def forward(self, x):
        x = self.relu(self.linear1(x))
        self.dropout(x)
        x = self.relu(self.linear2(x))
        x = self.relu(self.linear3(x))
        x = self.relu(self.linear4(x))
        x = self.linear5(x)

        return x

Запускаем обучение.

In [724]:
EPOCHS = 400
lr = 1e-3

model = SimpleNet()
loss_fn = nn.MSELoss()
opt = torch.optim.Adam(model.parameters(), lr=lr)

for epoch in range(EPOCHS):
    train_loss, val_loss = 0, 0
    for data in diabet_train_dataloader:
        opt.zero_grad()
        
        output = model(data[0])
        loss = loss_fn(output, data[1].unsqueeze(1))
        train_loss += loss.item()

        loss.backward()

        opt.step()
    print(f'epoch: {epoch + 1}/{EPOCHS}, loss: {train_loss}')

epoch: 1/400, loss: 353199.294921875
epoch: 2/400, loss: 344160.3984375
epoch: 3/400, loss: 318558.7431640625
epoch: 4/400, loss: 314868.57568359375
epoch: 5/400, loss: 372118.962890625
epoch: 6/400, loss: 266093.7587890625
epoch: 7/400, loss: 202716.0841064453
epoch: 8/400, loss: 153778.71728515625
epoch: 9/400, loss: 61352.50390625
epoch: 10/400, loss: 56601.273986816406
epoch: 11/400, loss: 59312.80029296875
epoch: 12/400, loss: 50503.615325927734
epoch: 13/400, loss: 51144.402587890625
epoch: 14/400, loss: 52545.525634765625
epoch: 15/400, loss: 47289.923290252686
epoch: 16/400, loss: 54683.33154296875
epoch: 17/400, loss: 65769.70361328125
epoch: 18/400, loss: 45999.40966796875
epoch: 19/400, loss: 43187.952887535095
epoch: 20/400, loss: 43544.39489746094
epoch: 21/400, loss: 43584.09704589844
epoch: 22/400, loss: 45364.62939453125
epoch: 23/400, loss: 55893.445556640625
epoch: 24/400, loss: 46442.40478515625
epoch: 25/400, loss: 42285.593017578125
epoch: 26/400, loss: 43990.21362

Итоговые результаты точности:

In [725]:
y_pred_list = []
y_true = []

with torch.no_grad():
    for inputs, labels in diabet_test_dataloader:
        y_pred = model(inputs)
        for prediction in y_pred:
            prediction = prediction.numpy()
            y_pred_list.append(prediction)
        for label in labels:
            label = label.numpy()
            y_true.append(label)

print(f'Mean Absolute Error : {mean_absolute_error(y_true, y_pred_list)}')

Mean Absolute Error : 41.5276985168457
