In [1]:
# import
import numpy as np
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
# 데이터셋 로드하기
dfile = 'data/bikesharing.csv'
data = np.genfromtxt(dfile, delimiter=',')

X = data[:,:-1]
y = data[:,-1:]

dim_x = X.shape[1]

print('X,shape: %s, y.shape: %s'%(X.shape, y.shape))

X,shape: (17379, 14), y.shape: (17379, 1)


In [3]:
X_trn, X_tst, y_trn, y_tst = train_test_split(X, y, test_size=None, train_size = 5000, random_state = 42)

scalerX = StandardScaler()  
X_trn = scalerX.fit_transform(X_trn)
X_tst = scalerX.transform(X_tst)

In [4]:
# Regression datset 생성 (Tensor)
class RegDataset(Dataset):

    def __init__(self, X, Y):
    
        self.X = X
        self.Y = Y

    def __len__(self):
    
        return len(self.X)

    def __getitem__(self, idx):
    
        x = torch.from_numpy(self.X[idx]).float()
        y = torch.from_numpy(self.Y[idx]).float()
    
        return x, y
    
train_set = RegDataset(X_trn, y_trn)
test_set = RegDataset(X_tst, y_tst)

In [5]:
#Dataloader
train_dataloader = DataLoader(dataset=train_set, batch_size=50, shuffle=True, drop_last=True)
test_dataloader = DataLoader(dataset=test_set, batch_size=50, shuffle=False)

In [6]:
# 모델 정의
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(14, 500),
            nn.Tanh(),
            nn.Linear(500, 1),
        )

    def forward(self, x):
        pred = self.linear_relu_stack(x)
        return pred

model = NeuralNetwork()

In [7]:
# hyperparmeter 설정
learning_rate = 1e-3
batch_size = 50
epochs = 10

In [8]:
# loss function 설정
loss_fn = nn.MSELoss()

In [9]:
# Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [10]:
# training / test 함수 정의
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for batch_idx, batch_data in enumerate(dataloader):
        X, y = batch_data
        # 예측(prediction)과 손실(loss) 계산
        pred = model(X)
        loss = loss_fn(pred, y)

        # 역전파
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch_idx % 10 == 0:
            loss, current = loss.item(), batch_idx * len(X)
            print("loss: %.7f  [%d / %d]"%(loss, current, size))

def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss = 0

    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()

    test_loss /= num_batches
    RMSE = np.sqrt(test_loss)
    print("Test Error: \n RMSE: %.8f \n"%RMSE)

In [11]:
# 학습
for t in range(epochs):
    print("Epoch %d \n-------------------------------"%(t+1))
    train_loop(train_dataloader, model, loss_fn, optimizer)
    test_loop(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1 
-------------------------------
loss: 132125.4375000  [0 / 5000]
loss: 43517.8046875  [500 / 5000]
loss: 58613.8007812  [1000 / 5000]
loss: 76207.6796875  [1500 / 5000]
loss: 50406.9531250  [2000 / 5000]
loss: 99417.5234375  [2500 / 5000]
loss: 54372.5390625  [3000 / 5000]
loss: 49638.3945312  [3500 / 5000]
loss: 59209.1015625  [4000 / 5000]
loss: 78956.0703125  [4500 / 5000]
Test Error: 
 RMSE: 246.05397951 

Epoch 2 
-------------------------------
loss: 63056.9531250  [0 / 5000]
loss: 73838.7187500  [500 / 5000]
loss: 38915.6210938  [1000 / 5000]
loss: 68802.9218750  [1500 / 5000]
loss: 87640.2265625  [2000 / 5000]
loss: 50857.8007812  [2500 / 5000]
loss: 65981.2500000  [3000 / 5000]
loss: 51221.8359375  [3500 / 5000]
loss: 51218.4296875  [4000 / 5000]
loss: 72620.9687500  [4500 / 5000]
Test Error: 
 RMSE: 217.80974294 

Epoch 3 
-------------------------------
loss: 74399.7734375  [0 / 5000]
loss: 63011.6718750  [500 / 5000]
loss: 51256.0039062  [1000 / 5000]
loss: 22359.8

In [12]:
# 모델 저장하기
torch.save(model.state_dict(), 'model_weights.pth')

In [13]:
# 모델 불러오기
model.load_state_dict(torch.load('model_weights.pth'))
model.eval()

NeuralNetwork(
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=14, out_features=500, bias=True)
    (1): Tanh()
    (2): Linear(in_features=500, out_features=1, bias=True)
  )
)