### 3. Dataset, Dataloader, BatchNorm, Dropout, Оптимизация<Br>
    
- Создать Dataset для загрузки данных (sklearn.datasets.fetch_california_housing)
- Обернуть его в Dataloader
- Написать архитектуру сети, которая предсказывает стоимость недвижимости. Сеть должна включать BatchNorm слои и Dropout (или НЕ включать, но нужно обосновать)<br>
- Сравните сходимость Adam, RMSProp и SGD, сделайте вывод по качеству работы модели train-test разделение нужно сделать с помощью sklearn random_state=13, test_size = 0.25

In [1]:
import numpy as np
import pandas as pd

import torch
from torch import nn
from torch.optim import Adam, RMSprop, SGD
from torch.utils.data import DataLoader, Dataset

from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
EPOCHES = 10
LR = 0.01

Создать Dataset для загрузки данных 

In [3]:
class real_estate_Dataset(Dataset):
    def __init__(self, *init_datasets):
        assert all(init_datasets[0].size(0) == init_dataset.size(0) for init_dataset in init_datasets)
        self._base_datasets = init_datasets
  
    def __len__(self):
        return self._base_datasets[0].size(0)
  
    def __getitem__(self, idx):
        return tuple(base_dataset[idx] for base_dataset in self._base_datasets)

Обернуть его в Dataloader.

Разделить датасет на тест и трейн, разделение нужно сделать с помощью sklearn random_state=13, test_size = 0.25

In [4]:
california_housing = fetch_california_housing()
X_train, X_test, y_train, y_test = train_test_split(california_housing.data,
                                                    california_housing.target, 
                                                    test_size=0.25, random_state=13)

In [5]:
# Нормализация 
scale = StandardScaler()
X_train_s = scale.fit_transform(X_train)
X_test_s = scale.transform(X_test)

In [6]:
train_xt = torch.from_numpy(X_train_s.astype(np.float32)).to(DEVICE)
train_yt = torch.from_numpy(y_train.astype(np.float32)).to(DEVICE)

test_xt = torch.from_numpy(X_test_s.astype(np.float32)).to(DEVICE)
test_yt = torch.from_numpy(y_test.astype(np.float32)).to(DEVICE)

In [7]:
train_dataset = real_estate_Dataset(train_xt, train_yt)
test_dataset = real_estate_Dataset(test_xt, test_yt)


train_loader = DataLoader(dataset=train_dataset, batch_size=16, shuffle=True, num_workers=0)

test_loader = DataLoader(dataset=test_dataset, batch_size=16, shuffle=True, num_workers=0)

Написать архитектуру сети, которая предсказывает стоимость недвижимости. Сеть должна включать BatchNorm слои и Dropout

In [8]:
class NewNet(nn.Module):
    def __init__(self) -> None:
        super(NewNet, self).__init__()
        self.block_1 = nn.Sequential(
            nn.Linear(in_features=8, out_features=100, bias=True),
            nn.Dropout(0.1),
            nn.BatchNorm1d(100),
            nn.ReLU())
        self.block_2 = nn.Sequential(
            nn.Linear(in_features=100, out_features=100, bias=True),
            nn.Dropout(0.2),
            nn.BatchNorm1d(100),
            nn.ReLU())
        self.block_3 = nn.Sequential(
            nn.Linear(in_features=100, out_features=60, bias=True),
            nn.Dropout(0.2),
            nn.BatchNorm1d(60),
            nn.ReLU())
        self.block_4 = nn.Sequential(
            nn.Linear(in_features=60, out_features=30),
            nn.Dropout(0.2),
            nn.BatchNorm1d(30),
            nn.ReLU())
        self.predict = nn.Sequential(
            nn.Linear(in_features=30, out_features=1, bias=True),
            nn.BatchNorm1d(1),
            nn.ReLU())
  
    def forward(self, inp):
        out = self.block_1(inp)
        out = self.block_2(out)
        out = self.block_3(out)
        out = self.block_4(out)
        out = self.predict(out)
        return out[:, 0]

In [9]:
def train_loop(train_loader, test_loader, net, optimizer):
    loss_fn = nn.MSELoss()
    best_acc = {'train': None, 'test': None}
    net.train()
    for epoch in range(EPOCHES):
        running_loss, running_items, running_right = 0.0, 0.0, 0.0
        for i, (inputs, labels) in enumerate(train_loader):
        
            outputs = net(inputs)
            loss = loss_fn(outputs, labels)

            optimizer.zero_grad() #обнулим градиент
            loss.backward()
            optimizer.step()

            running_loss += loss.item() #выводим статистику о процессе обучения
            running_items += len(labels)
        
            if i % 100 == 0 or (i + 1) == len(train_loader):    #вывод по 100 mini-batches
                net.eval()

                test_loss, test_running_total, test_loss  = 0.0, 0.0, 0.0
                for y, (out_test, lbl_test) in enumerate(test_loader):
                    test_outputs = net(out_test)
                    test_loss += loss_fn(test_outputs, lbl_test)
                    test_running_total += len(lbl_test)
            
                res_loss_train = running_loss / running_items
                res_loss_test = test_loss / test_running_total
            
                if best_acc['train'] is None or res_loss_train < best_acc['train']:
                    best_acc['train'] = res_loss_train
            
                if best_acc['test'] is None or res_loss_test < best_acc['test']:
                    best_acc['test'] = res_loss_train

                print(f'Epoch [{epoch + 1}/{EPOCHES}]. ' \
                      f'Step [{i + 1}/{len(train_loader)}]. ' \
                      f'Loss: {res_loss_train:.3f}. '\
                      f'Test acc: {res_loss_test:.3f}.')
            
                running_loss, running_items = 0.0, 0.0
                net.train()
    print(f"Best acc train: {best_acc['train']:.3f}. Best acc test: {best_acc['test']:.3f}")
 #  print('Training is finished!')

Сравните сходимость Adam, RMSProp и SGD, сделайте вывод по качеству работы модели

In [23]:
metrics_df = pd.DataFrame(columns=['optimizer', 'acc train', 'acc test'])
metrics_df

Unnamed: 0,optimizer,acc train,acc test


In [11]:
# Подбор оптимизатора. Adam
net = NewNet().to(DEVICE)
optimizer = Adam(net.parameters(), lr=LR)

In [12]:
#%%time
train_loop(train_loader, test_loader, net, optimizer)

Epoch [1/10]. Step [1/968]. Loss: 0.210. Test acc: 0.332.
Epoch [1/10]. Step [101/968]. Loss: 0.154. Test acc: 0.074.
Epoch [1/10]. Step [201/968]. Loss: 0.068. Test acc: 0.052.
Epoch [1/10]. Step [301/968]. Loss: 0.055. Test acc: 0.037.
Epoch [1/10]. Step [401/968]. Loss: 0.046. Test acc: 0.037.
Epoch [1/10]. Step [501/968]. Loss: 0.041. Test acc: 0.033.
Epoch [1/10]. Step [601/968]. Loss: 0.046. Test acc: 0.032.
Epoch [1/10]. Step [701/968]. Loss: 0.042. Test acc: 0.034.
Epoch [1/10]. Step [801/968]. Loss: 0.042. Test acc: 0.057.
Epoch [1/10]. Step [901/968]. Loss: 0.039. Test acc: 0.046.
Epoch [1/10]. Step [968/968]. Loss: 0.042. Test acc: 0.041.
Epoch [2/10]. Step [1/968]. Loss: 0.066. Test acc: 0.040.
Epoch [2/10]. Step [101/968]. Loss: 0.040. Test acc: 0.033.
Epoch [2/10]. Step [201/968]. Loss: 0.039. Test acc: 0.033.
Epoch [2/10]. Step [301/968]. Loss: 0.037. Test acc: 0.028.
Epoch [2/10]. Step [401/968]. Loss: 0.032. Test acc: 0.032.
Epoch [2/10]. Step [501/968]. Loss: 0.034. T

In [24]:
best_acc = {'train': 0.009, 'test': 0.013}
name='Adam'
metrics_df = metrics_df.append({
    'optimizer': name,
    'acc train': best_acc['train'],
    'acc test': best_acc['test'],
}, ignore_index=True)

metrics_df

  metrics_df = metrics_df.append({


Unnamed: 0,optimizer,acc train,acc test
0,Adam,0.009,0.013


In [14]:
net = NewNet().to(DEVICE)
optimizer = RMSprop(net.parameters(), lr=LR)

In [15]:
%%time
train_loop(train_loader, test_loader, net, optimizer)

Epoch [1/10]. Step [1/968]. Loss: 0.242. Test acc: 0.354.
Epoch [1/10]. Step [101/968]. Loss: 0.085. Test acc: 0.041.
Epoch [1/10]. Step [201/968]. Loss: 0.048. Test acc: 0.043.
Epoch [1/10]. Step [301/968]. Loss: 0.048. Test acc: 0.035.
Epoch [1/10]. Step [401/968]. Loss: 0.047. Test acc: 0.062.
Epoch [1/10]. Step [501/968]. Loss: 0.041. Test acc: 0.037.
Epoch [1/10]. Step [601/968]. Loss: 0.040. Test acc: 0.033.
Epoch [1/10]. Step [701/968]. Loss: 0.042. Test acc: 0.030.
Epoch [1/10]. Step [801/968]. Loss: 0.032. Test acc: 0.030.
Epoch [1/10]. Step [901/968]. Loss: 0.033. Test acc: 0.030.
Epoch [1/10]. Step [968/968]. Loss: 0.034. Test acc: 0.030.
Epoch [2/10]. Step [1/968]. Loss: 0.036. Test acc: 0.031.
Epoch [2/10]. Step [101/968]. Loss: 0.031. Test acc: 0.028.
Epoch [2/10]. Step [201/968]. Loss: 0.032. Test acc: 0.036.
Epoch [2/10]. Step [301/968]. Loss: 0.031. Test acc: 0.027.
Epoch [2/10]. Step [401/968]. Loss: 0.033. Test acc: 0.029.
Epoch [2/10]. Step [501/968]. Loss: 0.031. T

In [25]:
best_acc = {'train': 0.009, 'test': 0.021}
name='RMSprop'
metrics_df = metrics_df.append({
    'optimizer': name,
    'acc train': best_acc['train'],
    'acc test': best_acc['test'],
}, ignore_index=True)

metrics_df

  metrics_df = metrics_df.append({


Unnamed: 0,optimizer,acc train,acc test
0,Adam,0.009,0.013
1,RMSprop,0.009,0.021


In [26]:
net = NewNet().to(DEVICE)
optimizer = SGD(net.parameters(), lr=LR)

In [27]:
%%time

train_loop(train_loader, test_loader, net, optimizer)

Epoch [1/10]. Step [1/968]. Loss: 0.289. Test acc: 0.354.
Epoch [1/10]. Step [101/968]. Loss: 0.141. Test acc: 0.060.
Epoch [1/10]. Step [201/968]. Loss: 0.065. Test acc: 0.044.
Epoch [1/10]. Step [301/968]. Loss: 0.053. Test acc: 0.041.
Epoch [1/10]. Step [401/968]. Loss: 0.053. Test acc: 0.039.
Epoch [1/10]. Step [501/968]. Loss: 0.039. Test acc: 0.035.
Epoch [1/10]. Step [601/968]. Loss: 0.040. Test acc: 0.044.
Epoch [1/10]. Step [701/968]. Loss: 0.034. Test acc: 0.033.
Epoch [1/10]. Step [801/968]. Loss: 0.038. Test acc: 0.031.
Epoch [1/10]. Step [901/968]. Loss: 0.038. Test acc: 0.032.
Epoch [1/10]. Step [968/968]. Loss: 0.034. Test acc: 0.030.
Epoch [2/10]. Step [1/968]. Loss: 0.036. Test acc: 0.030.
Epoch [2/10]. Step [101/968]. Loss: 0.036. Test acc: 0.034.
Epoch [2/10]. Step [201/968]. Loss: 0.036. Test acc: 0.032.
Epoch [2/10]. Step [301/968]. Loss: 0.036. Test acc: 0.031.
Epoch [2/10]. Step [401/968]. Loss: 0.035. Test acc: 0.029.
Epoch [2/10]. Step [501/968]. Loss: 0.035. T

In [28]:
best_acc = {'train': 0.016, 'test': 0.021}
name='SGD'
metrics_df = metrics_df.append({
    'optimizer': name,
    'acc train': best_acc['train'],
    'acc test': best_acc['test'],
}, ignore_index=True)

metrics_df

  metrics_df = metrics_df.append({


Unnamed: 0,optimizer,acc train,acc test
0,Adam,0.009,0.013
1,RMSprop,0.009,0.021
2,SGD,0.016,0.021


In [29]:
net = NewNet().to(DEVICE)
optimizer = SGD(net.parameters(), lr=LR, momentum=0.5)

In [30]:
%%time
train_loop(train_loader, test_loader, net, optimizer)

Epoch [1/10]. Step [1/968]. Loss: 0.247. Test acc: 0.354.
Epoch [1/10]. Step [101/968]. Loss: 0.096. Test acc: 0.049.
Epoch [1/10]. Step [201/968]. Loss: 0.052. Test acc: 0.040.
Epoch [1/10]. Step [301/968]. Loss: 0.040. Test acc: 0.037.
Epoch [1/10]. Step [401/968]. Loss: 0.042. Test acc: 0.033.
Epoch [1/10]. Step [501/968]. Loss: 0.036. Test acc: 0.031.
Epoch [1/10]. Step [601/968]. Loss: 0.038. Test acc: 0.032.
Epoch [1/10]. Step [701/968]. Loss: 0.037. Test acc: 0.031.
Epoch [1/10]. Step [801/968]. Loss: 0.036. Test acc: 0.038.
Epoch [1/10]. Step [901/968]. Loss: 0.032. Test acc: 0.028.
Epoch [1/10]. Step [968/968]. Loss: 0.033. Test acc: 0.029.
Epoch [2/10]. Step [1/968]. Loss: 0.021. Test acc: 0.029.
Epoch [2/10]. Step [101/968]. Loss: 0.035. Test acc: 0.042.
Epoch [2/10]. Step [201/968]. Loss: 0.034. Test acc: 0.036.
Epoch [2/10]. Step [301/968]. Loss: 0.032. Test acc: 0.028.
Epoch [2/10]. Step [401/968]. Loss: 0.032. Test acc: 0.029.
Epoch [2/10]. Step [501/968]. Loss: 0.036. T

In [31]:
best_acc = {'train': 0.018, 'test': 0.021}
name='SGD + Momentum (0,5)'
metrics_df = metrics_df.append({
    'optimizer': name,
    'acc train': best_acc['train'],
    'acc test': best_acc['test'],
}, ignore_index=True)

metrics_df

  metrics_df = metrics_df.append({


Unnamed: 0,optimizer,acc train,acc test
0,Adam,0.009,0.013
1,RMSprop,0.009,0.021
2,SGD,0.016,0.021
3,"SGD + Momentum (0,5)",0.018,0.021


Если подобрать последний критерий, то можно значительно улучшить точность с оптимизатором SGD