In [32]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [33]:
import torch
import sklearn
import random
# 장치 설정
device = "cuda" if torch.cuda.is_available() else "cpu"

# 난수 시드 설정
torch.manual_seed(1)
random.seed(1)
if device == "cuda":
    torch.cuda.manual_seed_all(1)
    
# 데이터 로드
train = pd.read_csv("/kaggle/input/2021-ai-midterm-p1/train.csv")
test = pd.read_csv("/kaggle/input/2021-ai-midterm-p1/test.csv")
submission = pd.read_csv("/kaggle/input/2021-ai-midterm-p1/submit_sample.csv")

#데이터 확인
print(train.head())
print(test.head())
print(submission.head())

# 데이터 전처리
from sklearn.preprocessing import StandardScaler, LabelEncoder
x_train = np.array(train.drop(['index','label'],axis=1))
y_train = np.array(train['label'])
x_test = np.array(test.drop(['index'],axis=1))

scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

le = LabelEncoder()
y_train = le.fit_transform(y_train)

# 데이터 텐서에 올리기
x_train = torch.Tensor(x_train).to(device)
x_test = torch.Tensor(x_test).to(device)
y_train = torch.LongTensor(y_train).to(device)

# 데이터 모양 확인
print(x_train.shape , y_train.shape)

# Layer 설정
layer1 = torch.nn.Linear(64,128,bias =True).to(device)
layer2 = torch.nn.Linear(128,64,bias =True).to(device)
layer3 = torch.nn.Linear(64,len(le.classes_),bias =True).to(device)
relu = torch.nn.ReLU().to(device)
dropout = torch.nn.Dropout(0.5).to(device)

# Layer 초기화
torch.nn.init.xavier_normal_(layer1.weight)
torch.nn.init.xavier_normal_(layer2.weight)
torch.nn.init.xavier_normal_(layer3.weight)

# Model 설정
model = torch.nn.Sequential(layer1,relu,dropout,
                            layer2,relu,dropout,
                            layer3).to(device)

# 학습 파라미터 설정
epochs = 2500
lr = 0.001
batch_size = 500

# loss 함수 다중 분류
loss = torch.nn.CrossEntropyLoss()

# optim 
optim = torch.optim.Adam(model.parameters(),lr=lr)

# data loader 사용 
from torch.utils.data import TensorDataset, DataLoader
dataset=TensorDataset(x_train,y_train)
data_loader = DataLoader(dataset=dataset,batch_size=batch_size)

# 학습
model.train()
for epoch in range(epochs):
    sum_cost = 0
    for x,y in data_loader:
        optim.zero_grad()
        h = model(x)
        cost = loss(h,y)
        cost.backward()
        optim.step()
        sum_cost += cost.item()
    if epoch % 100 == 0:
        print(epoch , sum_cost/len(data_loader))


In [34]:
# 학습 종료 및 저장
with torch.no_grad():
    model.eval()
    predict = model(x_test)
    predict = torch.argmax(torch.softmax(predict,dim=1),dim=1)
    predict = le.inverse_transform(predict)
    submission['label'] = predict
submission.to_csv("submission.csv",index=False)

In [35]:
#  try 1 : 0.02462046779692173