## Домашнее задание

Будем практиковаться на датасете недвижимости (sklearn.datasets.fetch_california_housing)

Ваша задача:
1. Создать Dataset для загрузки данных
2. Обернуть его в Dataloader
3. Написать архитектуру сети, которая предсказывает стоимость недвижимости. Сеть должна включать BatchNorm слои и Dropout (или НЕ включать, но нужно обосновать)
4. Сравните сходимость Adam, RMSProp и SGD, сделайте вывод по качеству работы модели

train-test разделение нужно сделать с помощью sklearn random_state=13, test_size = 0.25

In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F
import torch.nn as nn
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_california_housing
import torchvision.transforms as transforms

pd.set_option('display.float_format', lambda x: '%0.4f' % x)
np.set_printoptions(suppress=True)

In [2]:
california_housing = fetch_california_housing(as_frame=True)

In [3]:
print(california_housing.DESCR)

.. _california_housing_dataset:

California Housing dataset
--------------------------

**Data Set Characteristics:**

    :Number of Instances: 20640

    :Number of Attributes: 8 numeric, predictive attributes and the target

    :Attribute Information:
        - MedInc        median income in block group
        - HouseAge      median house age in block group
        - AveRooms      average number of rooms per household
        - AveBedrms     average number of bedrooms per household
        - Population    block group population
        - AveOccup      average number of household members
        - Latitude      block group latitude
        - Longitude     block group longitude

    :Missing Attribute Values: None

This dataset was obtained from the StatLib repository.
https://www.dcc.fc.up.pt/~ltorgo/Regression/cal_housing.html

The target variable is the median house value for California districts,
expressed in hundreds of thousands of dollars ($100,000).

This dataset was derived

In [4]:
df = california_housing.frame
df.head(5)

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,MedHouseVal
0,8.3252,41.0,6.9841,1.0238,322.0,2.5556,37.88,-122.23,4.526
1,8.3014,21.0,6.2381,0.9719,2401.0,2.1098,37.86,-122.22,3.585
2,7.2574,52.0,8.2881,1.0734,496.0,2.8023,37.85,-122.24,3.521
3,5.6431,52.0,5.8174,1.0731,558.0,2.5479,37.85,-122.25,3.413
4,3.8462,52.0,6.2819,1.0811,565.0,2.1815,37.85,-122.25,3.422


In [5]:
df.shape

(20640, 9)

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20640 entries, 0 to 20639
Data columns (total 9 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   MedInc       20640 non-null  float64
 1   HouseAge     20640 non-null  float64
 2   AveRooms     20640 non-null  float64
 3   AveBedrms    20640 non-null  float64
 4   Population   20640 non-null  float64
 5   AveOccup     20640 non-null  float64
 6   Latitude     20640 non-null  float64
 7   Longitude    20640 non-null  float64
 8   MedHouseVal  20640 non-null  float64
dtypes: float64(9)
memory usage: 1.4 MB


In [7]:
df.describe()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,MedHouseVal
count,20640.0,20640.0,20640.0,20640.0,20640.0,20640.0,20640.0,20640.0,20640.0
mean,3.8707,28.6395,5.429,1.0967,1425.4767,3.0707,35.6319,-119.5697,2.0686
std,1.8998,12.5856,2.4742,0.4739,1132.4621,10.386,2.136,2.0035,1.154
min,0.4999,1.0,0.8462,0.3333,3.0,0.6923,32.54,-124.35,0.15
25%,2.5634,18.0,4.4407,1.0061,787.0,2.4297,33.93,-121.8,1.196
50%,3.5348,29.0,5.2291,1.0488,1166.0,2.8181,34.26,-118.49,1.797
75%,4.7432,37.0,6.0524,1.0995,1725.0,3.2823,37.71,-118.01,2.6472
max,15.0001,52.0,141.9091,34.0667,35682.0,1243.3333,41.95,-114.31,5.0


In [8]:
train_dataset, test_dataset = train_test_split(df, test_size=0.25, random_state=13)

In [9]:
print(f'Train_dataset shape: {train_dataset.shape}\nTest_dataset shape: {test_dataset.shape}')

Train_dataset shape: (15480, 9)
Test_dataset shape: (5160, 9)


In [10]:
class CaliforniaHousingDataset(torch.utils.data.Dataset):
    def __init__(self, init_dataset):
        self._base_dataset = init_dataset

    def __len__(self):
        return len(self._base_dataset)

    def __getitem__(self, index):
        features = np.asarray(self._base_dataset)[index][:-1]
        target = np.asarray(self._base_dataset)[index][-1]
        return features, target

In [11]:
california_housing_train_dataset = CaliforniaHousingDataset(train_dataset)

In [12]:
california_housing_train_dataset[0]

(array([   3.5174    ,   36.        ,    4.54794521,    1.09436834,
        1357.        ,    2.06544901,   34.21      , -118.23      ]),
 2.68)

In [13]:
train_loader = torch.utils.data.DataLoader(california_housing_train_dataset, batch_size=8, shuffle=True)
train_loader

<torch.utils.data.dataloader.DataLoader at 0x1dd5d519f10>

In [14]:
california_housing_test_dataset = CaliforniaHousingDataset(test_dataset)

In [15]:
california_housing_test_dataset[0]

(array([   4.1528    ,   15.        ,    6.19327731,    0.98739496,
         768.        ,    3.22689076,   35.34      , -119.08      ]),
 1.301)

In [16]:
test_loader = torch.utils.data.DataLoader(california_housing_test_dataset, batch_size=8, shuffle=False)
test_loader

<torch.utils.data.dataloader.DataLoader at 0x1dd5d52db20>

In [17]:
class Perceptron(nn.Module):
    def __init__(self, input_dim, output_dim, activation="relu"):
        super(Perceptron, self).__init__()
        self.fc = nn.Linear(input_dim, output_dim)
        self.activation = activation
        
    def forward(self, x):
        x = self.fc(x)
        if self.activation=="relu":
            return F.relu(x)
        if self.activation=="sigmoid":
            return F.sigmoid(x)
        raise RuntimeError
        

class LinearRegression(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super(LinearRegression, self).__init__()
        self.fc1 = Perceptron(input_dim, hidden_dim)
        self.bn = nn.BatchNorm1d(hidden_dim)
        self.dp = nn.Dropout(0.25)
        self.fc2 = Perceptron(hidden_dim, 1, "relu")
        
    def forward(self, x):
        x = self.fc1(x)
        x = self.dp(x)
        x = self.bn(x)
        x = self.fc2(x)
        return x

In [18]:
net = LinearRegression(8, 1)
criterion = nn.MSELoss()

#### SGD

In [19]:
%%time

optimizer = torch.optim.SGD(net.parameters(), lr=0.01)

for epoch in tqdm(range(10)):  
    running_loss, running_items = 0.0, 0.0
    
    for i, data in enumerate(train_loader):
        inputs, labels = data[0].to(torch.float), data[1].to(torch.float)
        labels = labels.reshape(8, 1)

        # обнуляем градиент
        optimizer.zero_grad()

        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # выводим статистику о процессе обучения
        running_loss += loss.item()
        running_items += len(labels)
        
        # выводим статистику о процессе обучения
        if i % 300 == 0:    # печатаем каждые 300 mini-batches
            net.eval()

            print(f'Loss: {running_loss / running_items:.3f}. ')
            
            running_loss, running_items = 0.0, 0.0

            net.train()
        
print('Training is finished!')

  0%|                                                                                           | 0/10 [00:00<?, ?it/s]

Loss: 0.808. 
Loss: 0.704. 
Loss: 0.710. 
Loss: 0.695. 
Loss: 0.703. 
Loss: 0.700. 
Loss: 0.693. 


 10%|████████▎                                                                          | 1/10 [00:01<00:09,  1.07s/it]

Loss: 0.347. 
Loss: 0.683. 
Loss: 0.704. 
Loss: 0.702. 
Loss: 0.688. 
Loss: 0.720. 
Loss: 0.717. 


 20%|████████████████▌                                                                  | 2/10 [00:02<00:08,  1.08s/it]

Loss: 0.880. 
Loss: 0.699. 
Loss: 0.694. 
Loss: 0.719. 
Loss: 0.701. 
Loss: 0.706. 
Loss: 0.701. 


 30%|████████████████████████▉                                                          | 3/10 [00:03<00:07,  1.07s/it]

Loss: 0.825. 
Loss: 0.692. 
Loss: 0.704. 
Loss: 0.707. 
Loss: 0.706. 
Loss: 0.680. 
Loss: 0.700. 


 40%|█████████████████████████████████▏                                                 | 4/10 [00:04<00:06,  1.09s/it]

Loss: 0.389. 
Loss: 0.716. 
Loss: 0.703. 
Loss: 0.695. 
Loss: 0.680. 
Loss: 0.715. 
Loss: 0.688. 


 50%|█████████████████████████████████████████▌                                         | 5/10 [00:05<00:05,  1.08s/it]

Loss: 0.673. 
Loss: 0.704. 
Loss: 0.697. 
Loss: 0.705. 
Loss: 0.675. 
Loss: 0.717. 
Loss: 0.704. 


 60%|█████████████████████████████████████████████████▊                                 | 6/10 [00:06<00:04,  1.10s/it]

Loss: 0.265. 
Loss: 0.695. 
Loss: 0.708. 
Loss: 0.682. 
Loss: 0.701. 
Loss: 0.705. 
Loss: 0.709. 


 70%|██████████████████████████████████████████████████████████                         | 7/10 [00:07<00:03,  1.10s/it]

Loss: 0.545. 
Loss: 0.694. 
Loss: 0.705. 
Loss: 0.700. 
Loss: 0.693. 
Loss: 0.702. 
Loss: 0.693. 


 80%|██████████████████████████████████████████████████████████████████▍                | 8/10 [00:08<00:02,  1.09s/it]

Loss: 0.731. 
Loss: 0.708. 
Loss: 0.700. 
Loss: 0.682. 
Loss: 0.705. 
Loss: 0.712. 
Loss: 0.693. 


 90%|██████████████████████████████████████████████████████████████████████████▋        | 9/10 [00:09<00:01,  1.09s/it]

Loss: 0.771. 
Loss: 0.703. 
Loss: 0.696. 
Loss: 0.703. 
Loss: 0.692. 
Loss: 0.687. 
Loss: 0.735. 


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:10<00:00,  1.09s/it]

Training is finished!
CPU times: total: 1min 26s
Wall time: 10.9 s





#### RMSProp

In [20]:
%%time

optimizer = torch.optim.RMSprop(net.parameters(), lr=0.1, alpha=0.99)

for epoch in tqdm(range(10)):  
    running_loss, running_items = 0.0, 0.0
    
    for i, data in enumerate(train_loader):
        inputs, labels = data[0].to(torch.float), data[1].to(torch.float)
        labels = labels.reshape(8, 1)

        # обнуляем градиент
        optimizer.zero_grad()

        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # выводим статистику о процессе обучения
        running_loss += loss.item()
        running_items += len(labels)
        
        # выводим статистику о процессе обучения
        if i % 300 == 0:    # печатаем каждые 300 mini-batches
            net.eval()

            print(f'Loss: {running_loss / running_items:.3f}. ')
            
            running_loss, running_items = 0.0, 0.0

            net.train()
        
print('Training is finished!')

  0%|                                                                                           | 0/10 [00:00<?, ?it/s]

Loss: 0.349. 
Loss: 0.692. 
Loss: 0.302. 
Loss: 0.166. 
Loss: 0.171. 
Loss: 0.170. 


 10%|████████▎                                                                          | 1/10 [00:01<00:11,  1.31s/it]

Loss: 0.178. 
Loss: 0.187. 
Loss: 0.170. 
Loss: 0.168. 
Loss: 0.169. 
Loss: 0.159. 
Loss: 0.171. 


 20%|████████████████▌                                                                  | 2/10 [00:02<00:10,  1.35s/it]

Loss: 0.168. 
Loss: 0.281. 
Loss: 0.166. 
Loss: 0.175. 
Loss: 0.170. 
Loss: 0.170. 
Loss: 0.168. 


 30%|████████████████████████▉                                                          | 3/10 [00:04<00:09,  1.39s/it]

Loss: 0.173. 
Loss: 0.209. 
Loss: 0.176. 
Loss: 0.166. 
Loss: 0.167. 
Loss: 0.175. 
Loss: 0.153. 


 40%|█████████████████████████████████▏                                                 | 4/10 [00:05<00:08,  1.40s/it]

Loss: 0.174. 
Loss: 0.304. 
Loss: 0.171. 
Loss: 0.171. 
Loss: 0.174. 
Loss: 0.166. 
Loss: 0.162. 
Loss: 0.167. 


 50%|█████████████████████████████████████████▌                                         | 5/10 [00:06<00:06,  1.38s/it]

Loss: 0.232. 
Loss: 0.174. 
Loss: 0.161. 
Loss: 0.159. 
Loss: 0.169. 
Loss: 0.181. 


 60%|█████████████████████████████████████████████████▊                                 | 6/10 [00:08<00:05,  1.36s/it]

Loss: 0.165. 
Loss: 0.324. 
Loss: 0.167. 
Loss: 0.170. 
Loss: 0.166. 
Loss: 0.168. 
Loss: 0.167. 


 70%|██████████████████████████████████████████████████████████                         | 7/10 [00:09<00:04,  1.34s/it]

Loss: 0.176. 
Loss: 0.117. 
Loss: 0.166. 
Loss: 0.172. 
Loss: 0.174. 
Loss: 0.166. 
Loss: 0.160. 


 80%|██████████████████████████████████████████████████████████████████▍                | 8/10 [00:10<00:02,  1.33s/it]

Loss: 0.175. 
Loss: 0.083. 
Loss: 0.167. 
Loss: 0.173. 
Loss: 0.161. 
Loss: 0.168. 
Loss: 0.174. 


 90%|██████████████████████████████████████████████████████████████████████████▋        | 9/10 [00:12<00:01,  1.33s/it]

Loss: 0.168. 
Loss: 0.175. 
Loss: 0.168. 
Loss: 0.160. 
Loss: 0.174. 
Loss: 0.174. 
Loss: 0.162. 
Loss: 0.175. 


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:13<00:00,  1.35s/it]

Training is finished!
CPU times: total: 1min 47s
Wall time: 13.5 s





#### Adam

In [21]:
%%time

optimizer = torch.optim.Adam(net.parameters(), lr=0.1)

for epoch in tqdm(range(10)):  
    running_loss, running_items = 0.0, 0.0
    
    for i, data in enumerate(train_loader):
        inputs, labels = data[0].to(torch.float), data[1].to(torch.float)
        labels = labels.reshape(8, 1)

        # обнуляем градиент
        optimizer.zero_grad()

        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # выводим статистику о процессе обучения
        running_loss += loss.item()
        running_items += len(labels)
        
        # выводим статистику о процессе обучения
        if i % 300 == 0:    # печатаем каждые 300 mini-batches
            net.eval()

            print(f'Loss: {running_loss / running_items:.3f}. ')
            
            running_loss, running_items = 0.0, 0.0

            net.train()
        
print('Training is finished!')

  0%|                                                                                           | 0/10 [00:00<?, ?it/s]

Loss: 0.291. 
Loss: 0.169. 
Loss: 0.173. 
Loss: 0.161. 
Loss: 0.165. 
Loss: 0.166. 


 10%|████████▎                                                                          | 1/10 [00:01<00:14,  1.63s/it]

Loss: 0.175. 
Loss: 0.128. 
Loss: 0.172. 
Loss: 0.164. 
Loss: 0.166. 
Loss: 0.165. 
Loss: 0.172. 


 20%|████████████████▌                                                                  | 2/10 [00:03<00:13,  1.65s/it]

Loss: 0.173. 
Loss: 0.112. 
Loss: 0.168. 
Loss: 0.172. 
Loss: 0.173. 
Loss: 0.160. 
Loss: 0.161. 


 30%|████████████████████████▉                                                          | 3/10 [00:04<00:11,  1.65s/it]

Loss: 0.166. 
Loss: 0.223. 
Loss: 0.174. 
Loss: 0.159. 
Loss: 0.178. 
Loss: 0.169. 
Loss: 0.164. 


 40%|█████████████████████████████████▏                                                 | 4/10 [00:06<00:10,  1.67s/it]

Loss: 0.170. 
Loss: 0.197. 
Loss: 0.163. 
Loss: 0.180. 
Loss: 0.173. 
Loss: 0.164. 
Loss: 0.170. 


 50%|█████████████████████████████████████████▌                                         | 5/10 [00:08<00:08,  1.69s/it]

Loss: 0.165. 
Loss: 0.115. 
Loss: 0.173. 
Loss: 0.172. 
Loss: 0.164. 
Loss: 0.217. 
Loss: 0.175. 


 60%|█████████████████████████████████████████████████▊                                 | 6/10 [00:09<00:06,  1.67s/it]

Loss: 0.161. 
Loss: 0.280. 
Loss: 0.175. 
Loss: 0.173. 
Loss: 0.167. 
Loss: 0.164. 
Loss: 0.170. 


 70%|██████████████████████████████████████████████████████████                         | 7/10 [00:11<00:04,  1.65s/it]

Loss: 0.160. 
Loss: 0.250. 
Loss: 0.166. 
Loss: 0.163. 
Loss: 0.172. 
Loss: 0.170. 
Loss: 0.166. 


 80%|██████████████████████████████████████████████████████████████████▍                | 8/10 [00:13<00:03,  1.64s/it]

Loss: 0.174. 
Loss: 0.078. 
Loss: 0.162. 
Loss: 0.170. 
Loss: 0.158. 
Loss: 0.169. 
Loss: 0.173. 


 90%|██████████████████████████████████████████████████████████████████████████▋        | 9/10 [00:14<00:01,  1.65s/it]

Loss: 0.170. 
Loss: 0.061. 
Loss: 0.167. 
Loss: 0.167. 
Loss: 0.164. 
Loss: 0.166. 
Loss: 0.173. 


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:16<00:00,  1.66s/it]

Loss: 0.177. 
Training is finished!
CPU times: total: 2min 12s
Wall time: 16.6 s





SGD отработал быстрее всех, Adam медленнее. Показатели Loss у SGD самые плохие, зато у Adam наилучшие. RMSProp по середине. Adam выглядит оптимальнее всех, так как разница во времени незначительная.