In [1]:
import torch
from torch.autograd import Variable
from torch import nn, optim
import numpy as np
import pandas as pd
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
%matplotlib inline

# 보스턴 주택 가격 회귀 구현 [kaggle 링크](https://www.kaggle.com/c/house-prices-advanced-regression-techniques/data)
## make dataset
[pandas 기본 사용법 익히기](https://dandyrilla.github.io/2017-08-12/pandas-10min/)  
[dummy_na](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.get_dummies.html)  
[pytorch dense network for house pricing regression](https://www.kaggle.com/leostep/pytorch-dense-network-for-house-pricing-regression)

In [6]:
data_train = pd.read_csv('./data/house-prices/train.csv')
X_test = pd.read_csv('./data/house-prices/test.csv')
data = data_train.append(X_test, ignore_index=True, sort=False)
data = pd.get_dummies(data, dummy_na=True, drop_first=True)
data.drop('Id', axis=1, inplace=True)
data.isnull().values.any()

True

In [7]:
data.fillna(data.median(), inplace=True)
columns = data.columns
sale_price = data['SalePrice']
data.isnull().values.any()

False

In [8]:
scaler = MinMaxScaler()
data = pd.DataFrame(scaler.fit_transform(data), columns = columns)
data['SalePrice'] = sale_price
data.head()

Unnamed: 0,MSSubClass,LotFrontage,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,BsmtFinSF2,...,SaleType_New,SaleType_Oth,SaleType_WD,SaleType_nan,SaleCondition_AdjLand,SaleCondition_Alloca,SaleCondition_Family,SaleCondition_Normal,SaleCondition_Partial,SaleCondition_nan
0,0.235294,0.150685,0.03342,0.666667,0.5,0.949275,0.883333,0.1225,0.125089,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1,0.0,0.202055,0.038795,0.555556,0.875,0.753623,0.433333,0.0,0.173281,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2,0.235294,0.160959,0.046507,0.666667,0.5,0.934783,0.866667,0.10125,0.086109,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
3,0.294118,0.133562,0.038561,0.666667,0.5,0.311594,0.333333,0.0,0.038271,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.235294,0.215753,0.060576,0.777778,0.5,0.927536,0.833333,0.21875,0.116052,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [9]:
train = data.iloc[:1460]
test = data.iloc[1460:]
test.drop('SalePrice', axis=1, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


In [13]:
X_train, X_val, y_train, y_val = train_test_split(train.drop('SalePrice', axis=1), train['SalePrice'], test_size=0.2, random_state=42)

In [14]:
train_batch = np.array_split(X_train, 50)
label_batch = np.array_split(y_train, 50)

In [15]:
for i in range(len(train_batch)):
    train_batch[i] = torch.from_numpy(train_batch[i].values).float()
for i in range(len(label_batch)):
    label_batch[i] = torch.from_numpy(label_batch[i].values).float().view(-1, 1)

X_val = torch.from_numpy(X_val.values).float()
y_val = torch.from_numpy(y_val.values).float().view(-1, 1)


## make Model

In [19]:
class Regressor(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(288, 144)
        self.fc2 = nn.Linear(144, 72)
        self.fc3 = nn.Linear(72, 18)
        self.fc4 = nn.Linear(18, 1)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))

        return x

In [20]:
model = Regressor()
cost_func = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)


## Training Model

In [21]:
model = Regressor()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

epochs = 300

train_losses, test_losses = [], []
for e in range(epochs):
    model.train()
    train_loss = 0
    for i in range(len(train_batch)):
        optimizer.zero_grad()
        output = model(train_batch[i])
        loss = torch.sqrt(criterion(torch.log(output), torch.log(label_batch[i])))
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        
    else:
        test_loss = 0
        accuracy = 0
        
        with torch.no_grad():
            model.eval()
            predictions = model(X_val)
            test_loss += torch.sqrt(criterion(torch.log(predictions), torch.log(y_val)))
                
        train_losses.append(train_loss/len(train_batch))
        test_losses.append(test_loss)

        print("Epoch: {}/{}.. ".format(e+1, epochs),
              "Training Loss: {:.3f}.. ".format(train_loss/len(train_batch)),
              "Test Loss: {:.3f}.. ".format(test_loss))

Epoch: 1/300..  Training Loss: inf..  Test Loss: inf.. 
Epoch: 2/300..  Training Loss: inf..  Test Loss: inf.. 
Epoch: 3/300..  Training Loss: inf..  Test Loss: inf.. 
Epoch: 4/300..  Training Loss: inf..  Test Loss: inf.. 
Epoch: 5/300..  Training Loss: inf..  Test Loss: inf.. 
Epoch: 6/300..  Training Loss: inf..  Test Loss: inf.. 
Epoch: 7/300..  Training Loss: inf..  Test Loss: inf.. 
Epoch: 8/300..  Training Loss: inf..  Test Loss: inf.. 
Epoch: 9/300..  Training Loss: inf..  Test Loss: inf.. 
Epoch: 10/300..  Training Loss: inf..  Test Loss: inf.. 
Epoch: 11/300..  Training Loss: inf..  Test Loss: inf.. 
Epoch: 12/300..  Training Loss: inf..  Test Loss: inf.. 
Epoch: 13/300..  Training Loss: inf..  Test Loss: inf.. 
Epoch: 14/300..  Training Loss: inf..  Test Loss: inf.. 
Epoch: 15/300..  Training Loss: inf..  Test Loss: inf.. 
Epoch: 16/300..  Training Loss: inf..  Test Loss: inf.. 
Epoch: 17/300..  Training Loss: inf..  Test Loss: inf.. 
Epoch: 18/300..  Training Loss: inf..  T

KeyboardInterrupt: 


## test

In [None]:
plt.plot(train_losses, label='Training loss')
plt.plot(test_losses, label='Validation loss')
plt.legend(frameon=False)