In [3]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [10]:
import torch
import sklearn
import random

# 장치 설정
device = "cuda" if torch.cuda.is_available() else "cpu"

# 랜덤 시드 
torch.manual_seed(1)
random.seed(1)
if device == "cuda":
    torch.cuda.manual_seed_all(1)

# 데이터 로딩
train = pd.read_csv("/kaggle/input/2021-ai-midterm-p2/train.csv")
test = pd.read_csv("/kaggle/input/2021-ai-midterm-p2/test.csv")
submission = pd.read_csv("/kaggle/input/2021-ai-midterm-p2/submit_sample.csv")

# 데이터 확인
print(train.head())
print(test.head())
print(submission.head())

# 데이터 전처리

# 필요 없는 데이터 정리
x_train = np.array(train.drop(['ID','MEDV'],axis= 1))
y_train = np.array(train['MEDV'])
x_test = np.array(test.drop(['ID'],axis=1))

# 데이터 변형 (scale)
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

# 데이터 실수화 (encoding)
# from sklearn.preprocessing import LabelEncoder
# le = LabelEncoder()
# y_train = le.fit_transform(y_train)

# 데이터 텐서에 올리기
x_test = torch.Tensor(x_test).to(device)
x_train = torch.Tensor(x_train).to(device)
y_train = torch.Tensor(y_train).to(device)

# 데이터 모양 확인 
print(x_train.shape, y_train.shape)


# layer 생성
layer1 = torch.nn.Linear(13,64,bias=True).to(device)
layer2 = torch.nn.Linear(64,1,bias=True).to(device)
relu = torch.nn.ReLU()
dropout = torch.nn.Dropout()

# 가중치 초기화
torch.nn.init.xavier_normal_(layer1.weight).to(device)
torch.nn.init.xavier_normal_(layer2.weight).to(device)

# 모델 생성
model = torch.nn.Sequential(layer1,relu,dropout,
                           layer2).to(device)

# 학습 파라미터 설정
epochs = 3000
lr = 0.01

# loss 함수 회귀
# loss = torch.nn.CrossEntropyLoss()
loss = torch.nn.MSELoss()

# optim 설정
optim = torch.optim.Adam(model.parameters(),lr=lr)

# 학습 
model.train()
for epoch in range(epochs + 1):
    optim.zero_grad()
    h = model(x_train)
    cost = loss(h,y_train.unsqueeze(1))
    cost.backward()
    optim.step()
    if epoch % 100 == 0:
        print(epoch, cost.item())

In [12]:
with torch.no_grad():
    model.eval()
    predict = model(x_test)
    submission['MEDV'] = predict
    print(submission)
    submission.to_csv("submission.csv",index=False)