In [1]:
import numpy as np
import pandas as pd
import csv
import matplotlib.pyplot as plt
import sys
# colab으로 실행시 파일 업로드
if "google.colab" in sys.modules:
    from google.colab import files
    files.upload()

In [2]:
# colab으로 실행시 
if "google.colab" in sys.modules:
    def data_load():
        return pd.read_csv("binary_dataset.csv")
# 로컬에서 실행시
else:
    def data_load(file_dir):
        return pd.read_csv(file_dir)
    
# train, test 나누고 feature target 분리하는 함수
def data_split(dataframe):
    # data를 shuffle 후 80:20 비율로 분할
    train_df = dataframe.sample(frac=0.8)
    test_df = dataframe.drop(train_df.index, axis=0).sample(frac=1)
    
    # feature, target 분리 후 numpy array로 변환
    X_train, y_train = train_df.iloc[:, :-1].to_numpy(), train_df.iloc[:, -1].to_numpy()
    X_test, y_test = test_df.iloc[:, :-1].to_numpy(), test_df.iloc[:, -1].to_numpy()
    
    return X_train, y_train, X_test, y_test

# minibatch 만들기
def minibatch(X, y, batch, batch_size):
    # 데이터셋이 batch*batch_size+batch_size보다 클 경우
    if len(y) >= batch*batch_size + batch_size:
        X_batch = X[batch*batch_size:batch*batch_size+batch_size]
        y_batch = y[batch*batch_size:batch*batch_size+batch_size]
        
    # 작을 경우
    else:
        X_batch = X[batch*batch_size:]
        y_batch = y[batch*batch_size:]
        
    return X_batch, y_batch

# model
class ANN():
    def __init__(self, in_features:int, out_features:int, bias:bool=True):
        self.in_features = in_features
        self.out_features = out_features
        # 모델 호출 시 파라미터 초기화
        self.weight = np.zeros((self.in_features, self.out_features))
        self.bias = np.zeros((self.out_features))
        self.reset_parameter()
    
    # weight, bias 생성
    def reset_parameter(self):
        self.weight = np.random.randn(self.in_features, self.out_features)
        self.bias = np.random.randn(self.out_features)
        return self.weight, self.bias
    
    # sigmoid function
    def sigmoid(self, x):
        return 1/(1+np.exp(-x))
    
    # forward function
    def forward(self, X):
        x = np.dot(X, self.weight) + self.bias
        x = self.sigmoid(x)
        return x
    
# loss function
class BCELoss():
    def __init__(self):
        pass
    
    def forward(self, _y, y):
        c = 1e-7
        return -np.sum(y*np.log(_y+c)+(1-y)*np.log(1-_y+c))/len(y)
    
# accuracy function
def accuracy(X, y):
    # 0.5보다 크면 1로, 작으면 0으로 변환
    X = np.where(X >= 0.5, 1, 0)
    # target과 shape 맞추기
    X = np.reshape(X, y.shape)
    return (X == y).sum()/len(y)

# train function
def train(model, criterion, X, y, batch_size=1):
    losses = []
    accs = []
    batchs = int(np.ceil(len(y) / batch_size))
    for batch in range(batchs):
        X_batch, label = minibatch(X, y, batch, batch_size)
        pred = model.forward(X_batch)
        loss = criterion.forward(pred, label)
        losses.append(loss)
        acc = accuracy(pred, label)
        accs.append(acc)
        
    # loss와 acc 평균 반환
    avg_loss = sum(losses) / len(losses)
    avg_acc = sum(accs) / len(accs)
    return avg_loss, avg_acc

# test function
def evaluate(model, criterion, X, y):
    losses = []
    accs = []
    pred = model.forward(X)
    loss = criterion.forward(pred, y)
    losses.append(loss)
    acc = accuracy(pred, y)
    accs.append(acc)
        
    # loss와 acc 평균 반환
    avg_loss = sum(losses) / len(losses)
    avg_acc = sum(accs) / len(accs)
    return avg_loss, avg_acc

In [3]:
def main():
    # 데이터 불러오기
    if "google.colab" in sys.modules:
        df = data_load()
    else:
        df_dir = "./data/binary_dataset.csv"
        df = data_load(df_dir)
        
    # 데이터 shuffle 후 train/test 및 features/target 분리
    X_train, y_train, X_test, y_test = data_split(df)
    
    # 변수 설정
    batch_size = 4
    
    # 모델 정의
    model = ANN(in_features=X_train.shape[1], out_features=1, bias=True)
    
    # loss function 정의
    criterion = BCELoss()
    
    # train
    loss, acc = train(model, criterion, X_train, y_train, batch_size=batch_size)
    
    # test
    test_loss, test_acc = evaluate(model, criterion, X_test, y_test)
    
    # 결과 출력
    print(f"[Epoch 1] TrainData - Loss = {loss}, Accuracy = {acc}")
    print(f"[Epoch 1] TestData - Loss = {test_loss}, Accuracy = {test_acc}")
main()

[Epoch 1] TrainData - Loss = 16.118095350958335, Accuracy = 0.75
[Epoch 1] TestData - Loss = 32.23619110191665, Accuracy = 0.5
