<a href="https://colab.research.google.com/github/jiwoong2/deeplearning/blob/main/gunpoint_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader, random_split
import numpy as np
from torch import nn, optim
from google.colab import drive
drive.mount('/content/drive')
import torch.nn.functional as F
from tqdm import tqdm
import time
from sklearn.preprocessing import MinMaxScaler, StandardScaler

In [None]:
# 파일 경로
train_file_path = '/content/drive/MyDrive/Colab Notebooks/data/GunPointOldVersusYoung_TRAIN.txt'
test_file_path = '/content/drive/MyDrive/Colab Notebooks/data/GunPointOldVersusYoung_TEST.txt'

# 데이터셋 클래스 정의
class GunPointDataset(Dataset):
    def __init__(self, file_path, scaler=None):
        # 데이터와 레이블을 로드
        data = np.loadtxt(file_path)
        self.labels = data[:, 0].astype(int) - 1  # 레이블을 0과 1로 조정
        self.features = data[:, 1:]

        if scaler:
            self.features = scaler.fit_transform(self.features)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        # 특정 인덱스의 샘플과 레이블을 반환
        return torch.tensor(self.features[idx], dtype=torch.float), self.labels[idx]

scaler = StandardScaler() # data를 스케일링하지 않는경우 초기 모델의 출력값의 폭이 아주 큼.

# 데이터셋 인스턴스 생성
train_dataset = GunPointDataset(train_file_path, scaler)
test_dataset = GunPointDataset(test_file_path, scaler)

# 데이터를 훈련 데이터와 검증데이터로 나누기.
train_size = int(0.9 * len(train_dataset))  # 전체 데이터의 90%를 훈련 데이터로 사용
val_size = len(train_dataset) - train_size

train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])

# 데이터 로더 생성
batch_size = 4
train_DL = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_DL = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
test_DL = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

# 데이터 로더를 통해 데이터 확인
for features, labels in train_DL:
    print(features.shape, labels.shape)
    print(f"Features: {features}, Labels: {labels}")
    break  # 첫 배치만 출력하고 멈춤

In [None]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()

        # 히든레이어의 노드가 10개만 돼도 금방수렴함.

        self.linear = nn.Sequential(nn.Linear(150, 10), nn.ReLU(),
                                    nn.Linear(10,1), nn.Sigmoid(),)

    def forward(self, x):

        x = self.linear(x)

        return x

In [None]:
len(train_DL.dataset)

In [None]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

def loss_epoch(model, DL, optimizer = None):
    N = len(DL.dataset)
    rloss = 0; rcorrect = 0
    for x_batch, y_batch in tqdm(DL, leave=False):
        x_batch = x_batch.to(DEVICE)
        y_batch = y_batch.to(DEVICE)

        y_hat = model(x_batch)

        loss = F.binary_cross_entropy(y_hat.squeeze(), y_batch.float())

        if optimizer is not None:
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        loss_b = loss.item() * x_batch.shape[0]
        rloss += loss_b

        pred = (y_hat > 0.5).float()
        corrects_b = torch.sum(pred.squeeze() == y_batch).item()
        rcorrect += corrects_b

    loss_e = rloss / N
    accuracy_e = rcorrect/N*100

    return loss_e, accuracy_e, rcorrect

def Test(model, test_DL):
    model.eval()
    with torch.no_grad():
        test_loss, test_acc, rcorrect = loss_epoch(model, test_DL)
    print()
    print(f"Test loss: {round(test_loss,5)}")
    print(f"Test accuracy: {rcorrect}/{len(test_DL.dataset)} ({round(test_acc,1)} %)")
    return round(test_acc,1)

def Train(model, train_DL, val_DL, optimizer, EPOCH):

    loss_history = {"train":[], "val":[]}
    acc_history = {"train":[], "val":[]}

    for ep in range(EPOCH):
        epoch_start = time.time()

        model.train() # train mode로 전환
        train_loss, train_acc, _ = loss_epoch(model, train_DL, optimizer)
        loss_history["train"] += [train_loss]
        acc_history["train"] += [train_acc]

        model.eval() # test mode로 전환
        with torch.no_grad():
            val_loss, val_acc, _ = loss_epoch(model, val_DL)
            loss_history["val"] += [val_loss]
            acc_history["val"] += [val_acc]

        # print loss
        print(f"train loss: {round(train_loss,5)}, "
              f"val loss: {round(val_loss,5)} \n"
              f"train acc: {round(train_acc,1)} %, "
              f"val acc: {round(val_acc,1)} %, time: {round(time.time()-epoch_start)} s")
        print("-"*20)

    return loss_history

In [None]:
print(len(train_DL.dataset))

In [None]:
model = MLP()
LR = 1e-3 # -1 인경우 한번에 0또는 1만 출력하는 로컬 미니멈으로 수렴.
optimizer = optim.Adam(model.parameters(), lr = LR)

In [None]:
Train(model, train_DL, val_DL, optimizer, 5)

In [None]:
Test(model, test_DL)

In [None]:
N = len(train_DL.dataset)
rloss = 0; rcorrect = 0; corrects_b=0
for x_batch, y_batch in tqdm(train_DL, leave=False):

    y_hat = model(x_batch)

    print(y_hat)

    loss = F.binary_cross_entropy(y_hat.squeeze(), y_batch.float())

    print(f"loss = {loss}")

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    loss_b = loss.item() * x_batch.shape[0]
    rloss += loss_b

    pred = (y_hat > 0.5).float()

    print(pred)
    print(y_batch)

    corrects_b = torch.sum(pred.squeeze() == y_batch).item()

    print(corrects_b)
    rcorrect += corrects_b

    break

# loss_e = rloss / N
# accuracy_e = rcorrect/N*100