In [1]:
import numpy as np

from PIL import Image
from sklearn.datasets import fetch_california_housing

In [2]:
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader 
import matplotlib.pyplot as plt

In [3]:
from torchvision import transforms, datasets
import torch

In [4]:
import torch.nn.functional as F
import torch.nn as nn

In [5]:
#разделим на трейн и тест
X, y = fetch_california_housing(return_X_y=True, as_frame=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=13)

In [6]:
X_train.shape, X_test.shape

((15480, 8), (5160, 8))

In [7]:
class MyOwnHousing(Dataset):
    def __init__(self, X, y):
      self.X = X
      self.y = y
    
    def __len__(self):
      return len(self.X)
    
    def __getitem__(self, idx):
      X = self.X.iloc[idx]
      y = self.y.iloc[idx]
      return torch.tensor(X, dtype = torch.float), torch.tensor(y, dtype = torch.float)

In [8]:
class Perceptron(nn.Module):
    def __init__(self, input_dim, output_dim, activation="relu"):
        super(Perceptron, self).__init__()
        self.fc = nn.Linear(input_dim, output_dim)
        self.activation = activation
        
    def forward(self, x):
        x = self.fc(x)
        if self.activation=="relu":
            return F.relu(x)
        if self.activation=="relu":
            return F.relu(x)
        raise RuntimeError
        

class FeedForward(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(FeedForward, self).__init__()
        self.bn1 = nn.BatchNorm1d(input_dim)
        self.fc1 = Perceptron(input_dim, hidden_dim)
        self.bn2 = nn.BatchNorm1d(hidden_dim)
        self.dp = nn.Dropout(0.25)
        self.fc2 = Perceptron(hidden_dim, output_dim, "relu")
        
    def forward(self, x):
        x = self.bn1(x)
        x = self.fc1(x)
        x = self.bn2(x)
        x = self.dp(x)
        x = self.fc2(x)
        return x

In [9]:
train_dataset = MyOwnHousing(X_train, y_train)
test_dataset = MyOwnHousing(X_test, y_test)

In [10]:
train_loader = torch.utils.data.DataLoader(train_dataset,
                          batch_size=128,
                          shuffle=True,
                          num_workers=1)

In [11]:
test_loader = torch.utils.data.DataLoader(test_dataset,
                          batch_size=64,
                          shuffle=False,
                          num_workers=1)

In [12]:
sample = next(iter(train_loader))
sample[0].shape, sample[1].shape

(torch.Size([128, 8]), torch.Size([128]))

### ADAM

In [13]:
net = FeedForward(8, 200, 1)

optimizer = torch.optim.Adam(net.parameters(), lr=0.01)
criterion = nn.MSELoss()

In [14]:
from tqdm import tqdm

In [15]:
for epoch in tqdm(range(10)):  
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        x, y = data[0], data[1]

        # обнуляем градиент
        optimizer.zero_grad()

        outputs = net(x)
        loss = criterion(outputs, y)
        loss.backward()
        optimizer.step()

        # выводим статистику о процессе обучения
        running_loss += loss.item()        
        if i % 100 == 0:    # печатаем каждые 100 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0

print('Training is finished!')

  return F.mse_loss(input, target, reduction=self.reduction)


[1,     1] loss: 0.054
[1,   101] loss: 1.861


  return F.mse_loss(input, target, reduction=self.reduction)
 10%|█         | 1/10 [00:02<00:25,  2.83s/it]

[2,     1] loss: 0.013
[2,   101] loss: 1.408


 20%|██        | 2/10 [00:05<00:19,  2.47s/it]

[3,     1] loss: 0.014
[3,   101] loss: 1.378


 30%|███       | 3/10 [00:07<00:16,  2.38s/it]

[4,     1] loss: 0.012
[4,   101] loss: 1.356


 40%|████      | 4/10 [00:09<00:14,  2.34s/it]

[5,     1] loss: 0.012
[5,   101] loss: 1.351


 50%|█████     | 5/10 [00:11<00:11,  2.29s/it]

[6,     1] loss: 0.014
[6,   101] loss: 1.358


 60%|██████    | 6/10 [00:13<00:09,  2.25s/it]

[7,     1] loss: 0.011
[7,   101] loss: 1.362


 70%|███████   | 7/10 [00:16<00:06,  2.21s/it]

[8,     1] loss: 0.011
[8,   101] loss: 1.352


 80%|████████  | 8/10 [00:18<00:04,  2.21s/it]

[9,     1] loss: 0.015
[9,   101] loss: 1.342


 90%|█████████ | 9/10 [00:20<00:02,  2.24s/it]

[10,     1] loss: 0.012
[10,   101] loss: 1.352


100%|██████████| 10/10 [00:22<00:00,  2.28s/it]

Training is finished!





### SGD

In [16]:
optimizer = torch.optim.SGD(net.parameters(), lr=0.01)

In [17]:
for epoch in tqdm(range(10)):  
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        x, y = data[0], data[1]

        # обнуляем градиент
        optimizer.zero_grad()

        outputs = net(x)
        loss = criterion(outputs, y)
        loss.backward()
        optimizer.step()

        # выводим статистику о процессе обучения
        running_loss += loss.item()
        if i % 100 == 0:    # печатаем каждые 100 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0

print('Training is finished!')

  0%|          | 0/10 [00:00<?, ?it/s]

[1,     1] loss: 0.012
[1,   101] loss: 1.330


 10%|█         | 1/10 [00:02<00:19,  2.16s/it]

[2,     1] loss: 0.013
[2,   101] loss: 1.330


 20%|██        | 2/10 [00:04<00:17,  2.20s/it]

[3,     1] loss: 0.017
[3,   101] loss: 1.333


 30%|███       | 3/10 [00:06<00:15,  2.20s/it]

[4,     1] loss: 0.013
[4,   101] loss: 1.327


 40%|████      | 4/10 [00:08<00:12,  2.17s/it]

[5,     1] loss: 0.012
[5,   101] loss: 1.325


 50%|█████     | 5/10 [00:10<00:10,  2.15s/it]

[6,     1] loss: 0.010
[6,   101] loss: 1.327


 60%|██████    | 6/10 [00:12<00:08,  2.14s/it]

[7,     1] loss: 0.011
[7,   101] loss: 1.331


 70%|███████   | 7/10 [00:15<00:06,  2.14s/it]

[8,     1] loss: 0.012
[8,   101] loss: 1.317


 80%|████████  | 8/10 [00:17<00:04,  2.17s/it]

[9,     1] loss: 0.012
[9,   101] loss: 1.331


 90%|█████████ | 9/10 [00:19<00:02,  2.18s/it]

[10,     1] loss: 0.013
[10,   101] loss: 1.325


100%|██████████| 10/10 [00:21<00:00,  2.17s/it]

Training is finished!





### RMSProp

In [18]:
optimizer = torch.optim.RMSprop(net.parameters())

In [19]:
for epoch in tqdm(range(10)):  
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        x, y = data[0], data[1]

        # обнуляем градиент
        optimizer.zero_grad()

        outputs = net(x)
        loss = criterion(outputs, y)
        loss.backward()
        optimizer.step()

        # выводим статистику о процессе обучения
        running_loss += loss.item()
        if i % 100 == 0:    # печатаем каждые 100 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0

print('Training is finished!')

  0%|          | 0/10 [00:00<?, ?it/s]

[1,     1] loss: 0.016
[1,   101] loss: 1.556


 10%|█         | 1/10 [00:03<00:29,  3.29s/it]

[2,     1] loss: 0.016
[2,   101] loss: 1.344


 20%|██        | 2/10 [00:05<00:21,  2.70s/it]

[3,     1] loss: 0.012
[3,   101] loss: 1.335


 30%|███       | 3/10 [00:07<00:17,  2.48s/it]

[4,     1] loss: 0.012
[4,   101] loss: 1.355


 40%|████      | 4/10 [00:10<00:14,  2.38s/it]

[5,     1] loss: 0.019
[5,   101] loss: 1.341


 50%|█████     | 5/10 [00:12<00:11,  2.35s/it]

[6,     1] loss: 0.014
[6,   101] loss: 1.328


 60%|██████    | 6/10 [00:14<00:09,  2.33s/it]

[7,     1] loss: 0.012
[7,   101] loss: 1.338


 70%|███████   | 7/10 [00:16<00:06,  2.29s/it]

[8,     1] loss: 0.012
[8,   101] loss: 1.345


 80%|████████  | 8/10 [00:18<00:04,  2.25s/it]

[9,     1] loss: 0.013
[9,   101] loss: 1.333


 90%|█████████ | 9/10 [00:21<00:02,  2.22s/it]

[10,     1] loss: 0.013
[10,   101] loss: 1.334


100%|██████████| 10/10 [00:23<00:00,  2.33s/it]

Training is finished!





RMSProp и SGD ведут себя примерно одинаково. При использовании Adam ошибка падает стабильнее.
