In [None]:
# library import
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import sklearn
import torch
import torch.nn.functional as F
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset

In [None]:
# gpu 사용하기
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

cuda


In [None]:
# 데이터 가져오기
train_data = pd.read_csv("train.csv")
train_data_drop = train_data.dropna()

# 데이터 쪼개기 => 결측치 제거한 값으로
idx1to26_drop = train_data_drop.iloc[:, 1:27]
idx28to40_drop = train_data_drop.iloc[:, 28:41]
idx41to56_drop = train_data_drop.iloc[:, 41:57]
idx57to68_drop = train_data_drop.iloc[:, 57:69]
target_drop = train_data_drop.iloc[:, 69]

main_data_drop = pd.concat([idx1to26_drop, idx28to40_drop], axis=1)

In [None]:
all_main1_data_drop = pd.concat([idx1to26_drop, target_drop], axis=1).corr().iloc[:, -1]

In [None]:
all_main1_data_drop

Q1           0.289843
Q2           0.266649
Q3           0.202462
Q4           0.267638
Q5           0.237976
Q6           0.307921
Q7           0.204040
Q8           0.228912
Q9           0.226813
Q10          0.181467
Q11          0.215895
Q12          0.249189
Q13          0.212534
Q14          0.234190
Q15          0.248817
Q16          0.172716
Q17          0.201207
Q18          0.178439
Q19          0.156633
Q20          0.215139
Q21          0.158997
Q22          0.162194
Q23          0.200746
Q24          0.235224
Q25          0.177255
Q26          0.249805
nerdiness    1.000000
Name: nerdiness, dtype: float64

In [None]:
all_main2_data_drop = pd.concat([idx28to40_drop, target_drop], axis=1).corr().iloc[:, -1]

In [None]:
print(all_main2_data_drop)

introelapse     0.006075
testelapse      0.005393
surveyelapse    0.012989
TIPI1          -0.099353
TIPI2           0.027334
TIPI3           0.033205
TIPI4           0.044497
TIPI5           0.035008
TIPI6           0.084720
TIPI7           0.035616
TIPI8           0.032185
TIPI9          -0.015910
TIPI10         -0.099571
nerdiness       1.000000
Name: nerdiness, dtype: float64


In [None]:
from sklearn.model_selection import train_test_split
main_train_input, main_test_input, main_train_target, main_test_target = train_test_split(idx1to26_drop, target_drop)

In [None]:
# CustomDataset
class CustomDataset(Dataset):
  def __init__(self, input, target):
    self.input = input
    self.target = target
  
  def __len__(self):
    return len(self.target)

  def __getitem__(self, idx):
    sample = torch.tensor(self.input.iloc[idx, :]).float()
    label = torch.tensor(self.target.iloc[idx]).float()
    return sample, label

In [None]:
train_dataset = CustomDataset(main_train_input, main_train_target)
test_dataset = CustomDataset(main_test_input, main_test_target)

train_loader = DataLoader(train_dataset, batch_size=128)
test_loader = DataLoader(test_dataset, batch_size=128)

In [None]:
class NN(nn.Module):
  def __init__(self):
    # nn 모듈을 사용하는 경우에는 모두 init안에 정의를 해 주어야 한다.
    super(NN, self).__init__()
    self.fc1 = nn.Linear(in_features=26, out_features=20)
    self.fc2 = nn.Linear(in_features=20, out_features=16)
    self.fc3 = nn.Linear(in_features=16, out_features=8)
    self.fc4 = nn.Linear(in_features=8, out_features=4)
    self.fc5 = nn.Linear(in_features=4, out_features=1)
    self.drop = nn.Dropout(0.3)

  def forward(self, input_data):
    out = F.relu(self.fc1(input_data))
    out = self.drop(out)
    out = F.relu(self.fc2(out))
    out = F.relu(self.fc3(out))
    out = F.relu(self.fc4(out))
    out = torch.sigmoid(self.fc5(out))
    return out

# 모델 객체 생성
model_main = NN().to(device)
print(model_main)

NN(
  (fc1): Linear(in_features=26, out_features=20, bias=True)
  (fc2): Linear(in_features=20, out_features=16, bias=True)
  (fc3): Linear(in_features=16, out_features=8, bias=True)
  (fc4): Linear(in_features=8, out_features=4, bias=True)
  (fc5): Linear(in_features=4, out_features=1, bias=True)
  (drop): Dropout(p=0.3, inplace=False)
)


In [None]:
# loss = cross entropy
# optimizer = adam

laerning_rate = 0.001

criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model_main.parameters(), lr=laerning_rate)

In [None]:
epochs = 1000
aggregated_losses = []
for i in range(epochs):
    for x, y in train_loader:
        x = x.to(device); y = y.to(device)
        # print(x)
        # print(y)
        output = model_main(x).view(-1)
        # print(output)
        # print(y)
        loss = criterion(output, y)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

In [None]:
for i, (x, y) in enumerate(test_loader):
  x = x.to(device); y = y.to(device)
  outputs = model_main(x).view(-1)
  # accuracy_ls = []

  # for output in outputs:
  #   if output > 0.5:
  #     accuracy_ls.append(True)
  #   else:
  #     accuracy_ls.append(False)
  # accuracy = sum(y == accuracy_ls) / len(x)
  # print(accuracy)
  print(outputs)
  print(y)
  break

tensor([0.8771, 0.6767, 0.7028, 0.8529, 0.8038, 0.9532, 0.4466, 0.2973, 0.8315,
        0.9092, 0.2881, 0.8377, 0.6855, 0.8162, 0.8578, 0.3698, 0.7550, 0.4080,
        0.6395, 0.6539, 0.9110, 0.3218, 0.2656, 0.7036, 0.4442, 0.4612, 0.8128,
        0.3500, 0.3371, 0.6573, 0.5310, 0.7258, 0.5786, 0.7924, 0.7771, 0.2362,
        0.1537, 0.9216, 0.3835, 0.3078, 0.3317, 0.8021, 0.4255, 0.6690, 0.6281,
        0.8650, 0.7516, 0.7902, 0.9404, 0.0799, 0.0722, 0.8657, 0.2934, 0.7327,
        0.5973, 0.7591, 0.1560, 0.5933, 0.0579, 0.5754, 0.7952, 0.6812, 0.8555,
        0.4151, 0.3540, 0.8584, 0.5293, 0.8578, 0.2770, 0.6774, 0.3992, 0.8196,
        0.6188, 0.3444, 0.6859, 0.8706, 0.3106, 0.7782, 0.7121, 0.2822, 0.4100,
        0.8264, 0.4244, 0.3518, 0.2427, 0.8270, 0.9454, 0.4112, 0.7096, 0.4587,
        0.5622, 0.1514, 0.8690, 0.2363, 0.5401, 0.7753, 0.8377, 0.7684, 0.7073,
        0.5312, 0.7362, 0.5249, 0.2945, 0.3000, 0.2790, 0.7157, 0.9399, 0.8935,
        0.3164, 0.6800, 0.1173, 0.8238, 

In [None]:
print(torch.tensor([1,0,0,1,0,0,1,1,1,0,1,0,1,0,0,0,1,1]))

tensor([1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1])


In [None]:
# 모델 평가 코드
! pip install torchmetrics
import torchmetrics

metrics = torchmetrics.Accuracy()

predict = torch.round(model_main(torch.Tensor(main_test_input.to_numpy()).to(device)))
target = torch.tensor(main_test_target.to_numpy())

acc = torchmetrics.functional.accuracy(predict, target.to(device))
print(acc.item())

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
0.7126582264900208


In [None]:
# testfile
