In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

path = './drive/MyDrive/ML2024/'
filename = 'Banknote.csv'

data_pd = pd.read_csv(path + filename)
data = data_pd.values

**독립변수(4개의 input)와 종속변수(0,1) 설정**

In [8]:
x = data[:, :-1]
y = data[:, -1]

**data를 훈련데이터와 테스트데이터로 분할**

In [9]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0, shuffle=True, stratify=y)

**다변량 선형회귀모델 구현**

In [19]:
learning_rate = 0.3
learning_iteration = 1000

# 회귀식
def h(x, weights, bias):
  return np.dot(x, weights) + bias

# 학습과정
def train_linear(x_train, y_train, x_test, y_test):
  n_samples, n_features = x_train.shape
  weights = np.ones(n_features)
  bias = 1.0

  print("[Linear Regression]")

  for i in range(learning_iteration):
    # 예측값 구하기
    y_predicted_train = h(x_train, weights, bias)

    # 에러 구하기
    error = y_predicted_train - y_train

    # weight와 bias 업데이트
    weights -= learning_rate * (error[:, np.newaxis] * x_train).mean(axis=0)
    bias -= learning_rate * error.mean()

    if (i + 1) % 100 == 0:
      y_pred_train_class = predict_threshold(x_train, weights, bias)
      train_accuracy = np.mean(y_pred_train_class == y_train)
      print(f"epochs {i + 1} TRAIN ACC: {train_accuracy:.6f}")

  return weights, bias

# 데이터 예측값 반환
def predict_linear(x, weights, bias):
  return np.dot(x, weights) + bias

def predict_threshold(x, weights, bias):
  y_predicted = predict_linear(x, weights, bias)
  y_predicted_class = []

  # 예측값이 0.5보다 크면 1, 작으면 0을 반환
  for i in y_predicted:
    if i > 0.5:
      y_predicted_class.append(1)
    else:
      y_predicted_class.append(0)

  return y_predicted_class

# 모델 학습
weights, bias = train_linear(x_train, y_train, x_test, y_test)

y_pred_test = predict_threshold(x_test, weights, bias) # 테스트 데이터 대상 예측수행
test_accuracy = np.mean(y_pred_test == y_test) # 정확도 계산
print(f"TEST ACC: {test_accuracy:.6f}")


[Linear Regression]
epochs 100 TRAIN ACC: 0.373747
epochs 200 TRAIN ACC: 0.373747
epochs 300 TRAIN ACC: 0.555150
epochs 400 TRAIN ACC: 0.555150
epochs 500 TRAIN ACC: 0.555150
epochs 600 TRAIN ACC: 0.555150
epochs 700 TRAIN ACC: 0.555150
epochs 800 TRAIN ACC: 0.555150
epochs 900 TRAIN ACC: 0.555150
epochs 1000 TRAIN ACC: 0.555150
TEST ACC: 0.556364


  ret = umr_sum(arr, axis, dtype, out, keepdims, where=where)
  ret = umr_sum(arr, axis, dtype, out, keepdims, where=where)


**다변량 로지스틱 회귀모델 구현**

In [20]:
learning_rate = 0.3
learning_iteration = 1000

# 시그모이드 함수(활성화 함수) : 0과 1 사이의 확률값을 반환
def sigmoid(z):
  return 1 / (1 + np.exp(-z))

def h(x, weights, bias):
  # 활성화함수(시그모이드) 적용한 회귀식
  return sigmoid(np.dot(x, weights) + bias)

# 학습과정
def train_sigmoid(x_train, y_train, x_test, y_test):
  n_samples, n_features = x_train.shape
  weights = np.ones(n_features)
  bias = 1.0

  print("[Logistic Regression]")

  for i in range(learning_iteration):
    # 예측값 구하기
    y_predicted_train = h(x_train, weights, bias)

    # 에러 구하기
    error = y_predicted_train - y_train

    # weights와 bias 업데이트
    weights -= learning_rate * (error[:, np.newaxis] * x_train).mean(axis=0)
    bias -= learning_rate * error.mean()

    # 100번마다 훈련 데이터의 정확도 출력
    if (i + 1) % 100 == 0:
      y_pred_train_class = predict_threshold(x_train, weights, bias)
      train_accuracy = np.mean(y_pred_train_class == y_train)
      print(f"epochs {i + 1} TRAIN ACC: {train_accuracy:.6f}")

  return weights, bias

def predict_threshold(x, weights, bias):
  # 예측값이 0.5보다 크면 1, 작으면 0을 반환
  y_predicted = h(x, weights, bias)
  y_predicted_class = []

  for i in y_predicted:
    if i > 0.5:
      y_predicted_class.append(1)
    else:
      y_predicted_class.append(0)

  return y_predicted_class

# 모델 학습
weights, bias = train_sigmoid(x_train, y_train, x_test, y_test)

y_pred = predict_threshold(x_test, weights, bias) # 테스트 데이터 대상 예측수행
test_accuracy = np.mean(y_pred == y_test) # 정확도 계산
print(f"TEST ACC: {test_accuracy:.6f}")

[Logistic Regression]
epochs 100 TRAIN ACC: 0.986326
epochs 200 TRAIN ACC: 0.986326
epochs 300 TRAIN ACC: 0.986326
epochs 400 TRAIN ACC: 0.988149
epochs 500 TRAIN ACC: 0.988149
epochs 600 TRAIN ACC: 0.988149
epochs 700 TRAIN ACC: 0.988149
epochs 800 TRAIN ACC: 0.988149
epochs 900 TRAIN ACC: 0.988149
epochs 1000 TRAIN ACC: 0.988149
TEST ACC: 0.996364
