In [None]:
import torch
from torch.autograd import Variable
from torch import nn, optim
import numpy as np
import pandas as pd
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
%matplotlib inline

## 보스턴 주택 가격 회귀 구현 [kaggle 링크](https://www.kaggle.com/c/house-prices-advanced-regression-techniques/data)
### make dataset
[pandas 기본 사용법 익히기](https://dandyrilla.github.io/2017-08-12/pandas-10min/)  
[dummy_na](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.get_dummies.html)  
[pytorch dense network for house pricing regression](https://www.kaggle.com/leostep/pytorch-dense-network-for-house-pricing-regression)

In [None]:
data_train = pd.read_csv('./data/house-prices/train.csv')
X_test = pd.read_csv('./data/house-prices/test.csv')

In [None]:
data = data_train.append(X_test, ignore_index=True, sort=False)
data = pd.get_dummies(data, dummy_na=True)   
data.drop('Id', axis=1, inplace=True)
data.isnull().values.any()

In [None]:
data.fillna(data.median(), inplace=True)
columns = data.columns
sale_price = data['SalePrice']
data.isnull().values.any()

In [None]:
#수량은1~10, 가격은100~100000. 각 Feature의 값을 일정한 범위에 두기 위해 scaler사용
scaler = MinMaxScaler() #최대값이 각각 1, 최소값이 0이 되도록 변환
data = pd.DataFrame(scaler.fit_transform(data), columns = columns)
data['SalePrice'] = sale_price

In [None]:
train = data.iloc[:1460]
test = data.iloc[1460:]
test.drop('SalePrice', axis=1, inplace=True)

In [None]:
X_train, X_val, y_train, y_val = train_test_split(train.drop('SalePrice', axis=1), train['SalePrice'], test_size=0.2, random_state=42)

In [None]:
train_batch = np.array_split(X_train, 100)
label_batch = np.array_split(y_train, 100)

In [None]:
for i in range(len(train_batch)):
    train_batch[i] = torch.from_numpy(train_batch[i].values).float()
for i in range(len(label_batch)):
    label_batch[i] = torch.from_numpy(label_batch[i].values).float().view(-1, 1)

X_val = torch.from_numpy(X_val.values).float()
y_val = torch.from_numpy(y_val.values).float().view(-1, 1)


## make Model

In [None]:
model = nn.Linear(331, 1)
cost_func = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)


## Training Model

In [None]:
# model = Regressor()
# criterion = nn.MSELoss()
# optimizer = optim.Adam(model.parameters(), lr=0.001)

epochs = 10000

train_losses, test_losses = [], []
for e in range(epochs):
    model.train()
    train_loss = 0
    for i in range(len(train_batch)):
        
        optimizer.zero_grad()
        output = model(train_batch[i])

        loss = torch.sqrt(cost_func(output, label_batch[i]))
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
    
    # for문이 정상적으로 종료되면 실행
    else:
        test_loss = 0
        accuracy = 0
        
        with torch.no_grad():
            model.eval()
            predictions = model(X_val)

            test_loss += torch.sqrt(cost_func(predictions,y_val))
                
        train_losses.append(train_loss/len(train_batch))
        test_losses.append(test_loss)
        
        if e % 50 == 0:
            print("Epoch: {}/{}.. ".format(e+1, epochs),
                  "Training Loss: {:.3f}.. ".format(train_loss/len(train_batch)),
                  "Test Loss: {:.3f}.. ".format(test_loss))


## test

In [None]:
plt.plot(train_losses, label='Training loss')
plt.plot(test_losses, label='Validation loss')
plt.legend(frameon=False)