In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# 1. Data Load

In [None]:
dataset = pd.read_csv('/content/data_banknote_authentication.csv')
dataset.head(3)

Unnamed: 0,variance,skewness,curtosis,entropy,class
0,3.6216,8.6661,-2.8073,-0.44699,0
1,4.5459,8.1674,-2.4586,-1.4621,0
2,3.866,-2.6383,1.9242,0.10645,0


In [None]:
banknote = dataset.values

- variance, skewness, curtosis, entropy 4개의 특징(X)을 통해 위폐 여부를 판단하는 classification model을 생성함

# 2. X와 y 분리

In [None]:
X = banknote[:,:-1]
# X: 전체 행, index 0(variance)부터 index 3(entropy) column까지의 데이터
y = banknote[:,-1]
# y: 전체 행, 마지막 column (class)
print(X.shape, y.shape)

(1372, 4) (1372,)


# 3. Training data와 test data 분리

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=42)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(960, 4) (412, 4) (960,) (412,)


# 4. Scaling (data scale 조정)

# 5. Logistic regression model 생성

In [None]:
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression()
lr.fit(X_train,y_train)

# 6. Prediction

In [None]:
pred = lr.predict(X_test)
# X_test에 대한 예측값
# pred: class 예측값 0 또는 1

# 7. 성능 평가

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix

# y_test, pred 비교
cm = confusion_matrix(y_test,pred)
# [[TN, FP]
#  [FN, TP]]
print(cm)
TN = cm[0,0]
FP = cm[0,1]
FN = cm[1,0]
TP = cm[1,1]

# accuracy = (TP+TN) / 전체데이터의 수
acc = (TP+TN)/(TP+TN+FP+FN) * 100
print('Accuracy  : ', round(acc,2),'%')

# error rate = (FP+FN) / 전체데이터의 수
err = (FP+FN) / (TP+TN+FP+FN) * 100
print('Error rate: ', round(err,2),'%')

# precision = TP / (TP+FP)
precision = TP / (TP+FP) * 100
print('Precision : ', round(precision,2),'%')

# recall = TP / (TP+FN)
recall = TP / (TP+FN) * 100
print('Recall    : ', round(recall,2),'%')

[[226   3]
 [  2 181]]
Accuracy  :  98.79 %
Error rate:  1.21 %
Precision :  98.37 %
Recall    :  98.91 %


In [None]:
print(accuracy_score(y_test,pred))
print(precision_score(y_test,pred))
print(recall_score(y_test,pred))

0.9878640776699029
0.9836956521739131
0.9890710382513661


In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test,pred))

              precision    recall  f1-score   support

         0.0       0.99      0.99      0.99       229
         1.0       0.98      0.99      0.99       183

    accuracy                           0.99       412
   macro avg       0.99      0.99      0.99       412
weighted avg       0.99      0.99      0.99       412

