In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import torch
import sklearn
import random
# 장치 설정
device = "cuda" if torch.cuda.is_available() else "cpu"

# 난수 시드 설정
torch.manual_seed(1)
random.seed(1)
if device == "cuda":
    torch.cuda.manual_seed_all(1)
    
# 데이터 로드
train = pd.read_csv("/kaggle/input/2021-ai-midterm-p3/train.csv")
test = pd.read_csv("/kaggle/input/2021-ai-midterm-p3/test.csv")
submission = pd.read_csv("/kaggle/input/2021-ai-midterm-p3/submit_sample.csv")

#데이터 확인
print(train.head())
print(test.head())
print(submission.head())

# 데이터 전처리
from sklearn.preprocessing import StandardScaler, LabelEncoder,MinMaxScaler
x_train = np.array(train.drop(['index','target'],axis=1))
y_train = np.array(train['target'])
x_test = np.array(test.drop(['index'],axis=1))

# 난측값 채우기
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(missing_values='?', strategy='most_frequent')
x_train = imputer.fit_transform(x_train)
x_test = imputer.transform(x_test)

# 데이터 변형(scale)
scaler = StandardScaler()
# scaler = MinMaxScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

# 실수화 
le = LabelEncoder()
y_train = le.fit_transform(y_train)



# 데이터 텐서에 올리기
x_train = torch.Tensor(x_train).to(device)
x_test = torch.Tensor(x_test).to(device)
y_train = torch.LongTensor(y_train).to(device)

# 데이터 셋 
from torch.utils.data import DataLoader,TensorDataset
train_dataset = TensorDataset(x_train,y_train)

# 데이터 모양 확인
print(x_train.shape , y_train.shape)

# Layer 설정
layer1 = torch.nn.Linear(13,26,bias =True).to(device)
layer2 = torch.nn.Linear(26,13,bias =True).to(device)
layer3 = torch.nn.Linear(13,len(le.classes_),bias =True).to(device)
relu = torch.nn.ReLU().to(device)
dropout = torch.nn.Dropout(0.3).to(device)

# Layer 초기화
torch.nn.init.xavier_normal_(layer1.weight)
torch.nn.init.xavier_normal_(layer2.weight)
torch.nn.init.xavier_normal_(layer3.weight)


# Model 설정
model = torch.nn.Sequential(layer1,relu,dropout,
                            layer2,relu,dropout,
                            layer3).to(device)

# 학습 파라미터 설정
epochs = 1000
lr = 0.01
batch_size = 100

# loss 함수 다중 분류
loss = torch.nn.CrossEntropyLoss()
# loss = torch.nn.MSELoss()

# optim 
optim = torch.optim.Adam(model.parameters(),lr=lr)

#data loader
data_loader = DataLoader(dataset=train_dataset,batch_size=batch_size)

# 학습
model.train()
for epoch in range(epochs):
    optim.zero_grad()
    h = model(x_train)
    cost = loss(h,y_train)
    cost.backward()
    optim.step()
    if epoch % 100 == 0:
        print(epoch , cost.item())

In [3]:
# 학습 정확도
with torch.no_grad():
    model.eval()
    predict = model(x_train)
    predict = torch.argmax(torch.softmax(predict,dim=1),dim=1)
    predict = le.inverse_transform(predict)
    acc = predict == y_train.cpu().detach().numpy()
    print(acc.sum()/len(predict))

In [4]:
# 학습 예측 저장
with torch.no_grad():
    model.eval()
    predict = model(x_test)
    predict = torch.argmax(torch.softmax(predict,dim=1),dim=1)
    predict = le.inverse_transform(predict)
    predict = predict > 0
    print(predict.astype(int))
    submission['target'] = predict.astype(int)
submission.to_csv("submission.csv",index=False)