<a href="https://colab.research.google.com/github/VVdovichev/PyTorch_GB/blob/main/HW_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Будем практиковаться на датасете недвижимости (sklearn.datasets.fetch_california_housing)

Ваша задача:

Создать Dataset для загрузки данных

Обернуть его в Dataloader

Написать архитектуру сети, которая предсказывает стоимость недвижимости. Сеть должна включать BatchNorm слои и Dropout (или НЕ включать, но нужно обосновать)

Сравните сходимость Adam, RMSProp и SGD, сделайте вывод по качеству работы модели

train-test разделение нужно сделать с помощью sklearn random_state=13, test_size = 0.25

In [218]:
import numpy as np
import pandas as pd

from sklearn.datasets import fetch_california_housing

from sklearn.model_selection import train_test_split

from torch.utils.data import Dataset, DataLoader

from torch import nn

from torch.optim import SGD, RMSprop, Adam

import torch.nn.functional as F

In [219]:
x, y = fetch_california_housing(return_X_y=True)
x.shape, y.shape

((20640, 8), (20640,))

In [220]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=25, random_state=13)
x_train.shape, x_test.shape, y_train.shape, y_test.shape

((20615, 8), (25, 8), (20615,), (25,))

In [221]:
class MyData(Dataset):
    def __init__(self, X, y=None, transform=None):
        self.X = X
        self.y = y
        self.transform = transform
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, index):
        if self.transform is not None and self.y is not None:
            return self.transform(self.X[index]), self.y[index]
        elif self.transform is not None and self.y is None:
            return self.transform(self.X[index])
        elif self.transform is None and self.y is not None:
            return self.X[index], self.y[index]
        elif self.transform is None and self.y is None:
            return self.X[index]
    

In [222]:
train_dataset = MyData(x_train, y_train)
train_loader = DataLoader(train_dataset,
                          batch_size=128,
                          shuffle=True)

In [223]:
test_dataset = MyData(x_test, y_test)
test_loader = DataLoader(test_dataset,
                          batch_size=128,
                          shuffle=True)

In [224]:
class FeedForward(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        
        self.fc_1 = nn.Linear(in_features=input_dim, out_features=input_dim*4)
        self.ac_1 = nn.LeakyReLU(negative_slope=0.1)
        self.bn_1 = nn.BatchNorm1d(num_features=input_dim*4)
        self.do_1 = nn.Dropout(0.25)
        self.fc_2 = nn.Linear(in_features=input_dim*4, out_features=input_dim*2)
        self.ac_2 = nn.LeakyReLU()
        self.bn_2 = nn.BatchNorm1d(num_features=input_dim*2)
        self.do_2 = nn.Dropout(0.25)
        self.fc_3 = nn.Linear(in_features=input_dim*2, out_features=1)
    
    
    def forward(self, x):
        x = self.fc_1(x)
        x = self.ac_1(x)
        x = self.bn_1(x)
        x = self.do_1(x)
        x = self.fc_2(x)
        x = self.ac_2(x)
        x = self.bn_2(x)
        x = self.do_2(x)
        x = self.fc_3(x)
        
        return x

In [225]:
def data_val(model, optimizer, epochs):
    criterion = nn.MSELoss()
    
    for epoch in range(epochs):
        
        for i, data_train in enumerate(train_loader):

            optimizer.zero_grad()

            pred_train = model(data_train[0].float())
            loss_train = criterion(pred_train, data_train[1].float())
            loss_train.backward()
            optimizer.step()

            if i % 50 == 0:
                model.eval()
                
                for _, data_test in enumerate(test_loader):
                    pred_test = model(data_test[0].float())
                    loss_test = criterion(pred_test, data_test[1].float())
                
                model.train()            
            
                print(
                    f'Epoch {epoch + 1}/{epochs}\t\t' \
                    f'train loss {round(loss_train.item(), 2)}\t\t' \
                    f'test loss {round(loss_test.item(), 2)}.'
                )

    print('Training is finished!')

In [226]:
model_sgd = FeedForward(x.shape[1])
optimizer_sgd = SGD(model_sgd.parameters(), lr=0.001)

In [227]:
%%time
data_val(model_sgd, optimizer_sgd, 10)

Epoch 1/10		train loss 7.13		test loss 3.49.
Epoch 1/10		train loss 5.74		test loss 4.26.
Epoch 1/10		train loss 4.74		test loss 3.48.
Epoch 1/10		train loss 4.75		test loss 2.89.
Epoch 2/10		train loss 4.23		test loss 2.73.
Epoch 2/10		train loss 2.95		test loss 2.35.
Epoch 2/10		train loss 3.36		test loss 1.94.
Epoch 2/10		train loss 2.62		test loss 1.71.
Epoch 3/10		train loss 2.8		test loss 1.62.
Epoch 3/10		train loss 2.44		test loss 1.52.
Epoch 3/10		train loss 2.27		test loss 1.39.
Epoch 3/10		train loss 2.76		test loss 1.32.
Epoch 4/10		train loss 1.72		test loss 1.31.
Epoch 4/10		train loss 1.66		test loss 1.25.
Epoch 4/10		train loss 1.32		test loss 1.2.
Epoch 4/10		train loss 1.72		test loss 1.18.
Epoch 5/10		train loss 1.32		test loss 1.17.
Epoch 5/10		train loss 1.81		test loss 1.16.
Epoch 5/10		train loss 1.31		test loss 1.16.
Epoch 5/10		train loss 1.33		test loss 1.16.
Epoch 6/10		train loss 1.81		test loss 1.16.
Epoch 6/10		train loss 1.59		test loss 1.16.
Epoch 6/10		

In [228]:
model_RMSprop = FeedForward(x.shape[1])
optimizer_RMSprop = RMSprop(model_RMSprop.parameters(), lr=0.001)

In [229]:
%%time
data_val(model_RMSprop, optimizer_RMSprop, 10)

Epoch 1/10		train loss 7.34		test loss 1.85.
Epoch 1/10		train loss 4.53		test loss 2.81.
Epoch 1/10		train loss 2.92		test loss 1.85.
Epoch 1/10		train loss 2.17		test loss 1.34.
Epoch 2/10		train loss 2.5		test loss 1.29.
Epoch 2/10		train loss 2.1		test loss 1.16.
Epoch 2/10		train loss 2.05		test loss 1.18.
Epoch 2/10		train loss 1.68		test loss 1.2.
Epoch 3/10		train loss 1.71		test loss 1.18.
Epoch 3/10		train loss 1.71		test loss 1.2.
Epoch 3/10		train loss 1.71		test loss 1.21.
Epoch 3/10		train loss 1.82		test loss 1.21.
Epoch 4/10		train loss 1.79		test loss 1.2.
Epoch 4/10		train loss 1.82		test loss 1.22.
Epoch 4/10		train loss 1.56		test loss 1.22.
Epoch 4/10		train loss 1.23		test loss 1.22.
Epoch 5/10		train loss 1.68		test loss 1.23.
Epoch 5/10		train loss 1.32		test loss 1.22.
Epoch 5/10		train loss 1.47		test loss 1.23.
Epoch 5/10		train loss 1.75		test loss 1.24.
Epoch 6/10		train loss 1.75		test loss 1.23.
Epoch 6/10		train loss 1.49		test loss 1.24.
Epoch 6/10		tra

In [230]:
model_Adam = FeedForward(x.shape[1])
optimizer_Adam = Adam(model_Adam.parameters(), lr=0.001)

In [231]:
%%time
data_val(model_Adam, optimizer_Adam, 10)

Epoch 1/10		train loss 7.7		test loss 3.48.
Epoch 1/10		train loss 6.63		test loss 4.65.
Epoch 1/10		train loss 4.56		test loss 3.88.
Epoch 1/10		train loss 3.94		test loss 3.17.
Epoch 2/10		train loss 4.93		test loss 2.88.
Epoch 2/10		train loss 3.67		test loss 2.06.
Epoch 2/10		train loss 2.32		test loss 1.51.
Epoch 2/10		train loss 1.82		test loss 1.24.
Epoch 3/10		train loss 1.51		test loss 1.22.
Epoch 3/10		train loss 2.42		test loss 1.16.
Epoch 3/10		train loss 1.76		test loss 1.18.
Epoch 3/10		train loss 2.11		test loss 1.18.
Epoch 4/10		train loss 1.48		test loss 1.17.
Epoch 4/10		train loss 1.29		test loss 1.21.
Epoch 4/10		train loss 1.78		test loss 1.19.
Epoch 4/10		train loss 1.57		test loss 1.19.
Epoch 5/10		train loss 1.34		test loss 1.2.
Epoch 5/10		train loss 1.31		test loss 1.22.
Epoch 5/10		train loss 1.57		test loss 1.21.
Epoch 5/10		train loss 1.57		test loss 1.21.
Epoch 6/10		train loss 1.94		test loss 1.21.
Epoch 6/10		train loss 1.52		test loss 1.23.
Epoch 6/10		

Вывод: все алгоритмы оптимизации хорошо отработали, но наша задача довольна простая для обучения. 
Поэтому стохастический градиентный спуск справился не хуже чем остальные оптимизаторы, но затратил на порядок меньше времени.