## OR 데이터 인식

# 사이킷 런의 디자인 패턴

- 데이터 읽기
- 모델 객체 생성
- 모델 학습
- 예측
- 성능 평가

In [1]:
from sklearn.linear_model import Perceptron

In [2]:
#훈련 집합 구축
x=[[0,0],[0,1],[1,0],[1,1]]
y=[-1,1,1,1]

In [4]:
#fit 함수로 perceptron 학습
p=Perceptron()    #perceptron 함수 호출하여 객체 p에 저장
p.fit(x,y)        #fit 함수로 x와 y에 대한 학습을 수행한다.

Perceptron()

In [5]:
print("학습된 퍼셉트론의 매개변수: ",p.coef_,p.intercept_)  #학습을 마친 퍼셉트론의 매개변수, 즉 가중치 w0,w1,w2를 출력한다.
#coef_: w1,w2 , intercept_ : w0

학습된 퍼셉트론의 매개변수:  [[2. 2.]] [-1.]


In [6]:
print("훈련집합에 대한 예측: ",p.predict(x)) #predict 함수를 사용해 훈련집합x를 테스트용으로 간주하고 예측을 수행한다.

훈련집합에 대한 예측:  [-1  1  1  1]


In [7]:
print("정확률 측정: ",p.score(x,y)*100,"%") #score 함수는 x를 퍼셉트론으로 예측한 값과 y(레이블)을 비교하여 맞힌 샘플 개수를 세어 정확률을 계산한다.

정확률 측정:  100.0 %


## 필기 숫자 데이터 인식

In [1]:
from sklearn import datasets
from sklearn.linear_model import Perceptron
from sklearn.model_selection import train_test_split
import numpy as np

In [2]:
#데이터셋을 읽고 훈련집합과 테스트집합으로 분할
digit=datasets.load_digits()
x_train,x_test,y_train,y_test=train_test_split(digit.data,digit.target,train_size=0.6) #train_test_split 함수

In [3]:
#fit 함수로 Perceptron 학습
p=Perceptron(max_iter=100,eta0=0.001,verbose=0) #모델 객체 생성 :sklearn의 perceptron 함수
p.fit(x_train,y_train) #digit 데이터로 모델링 -> 모델 학습

Perceptron(eta0=0.001, max_iter=100)

In [4]:
res=p.predict(x_test) #테스트 집합으로 예측 > 학습된 모델로 예측

### 성능 측정

In [5]:
#혼동 행렬
conf=np.zeros((10,10))
for i in range(len(res)):
    conf[res[i]][y_test[i]]+=1
print(conf)

[[66.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0. 61.  0.  0.  1.  0.  0.  0.  0.  1.]
 [ 0.  0. 75.  0.  0.  1.  0.  0.  0.  0.]
 [ 0.  0.  0. 63.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0. 71.  0.  0.  1.  0.  0.]
 [ 0.  0.  0.  4.  0. 79.  0.  0.  0.  2.]
 [ 0.  1.  0.  0.  1.  0. 65.  1.  1.  0.]
 [ 0.  0.  0.  0.  0.  0.  0. 62.  1.  0.]
 [ 0. 14.  2.  4.  2.  1.  1.  1. 70.  5.]
 [ 0.  1.  0.  1.  1.  0.  0.  1.  0. 58.]]


In [8]:
#정확률 계산
no_correct=0
for i in range(10):
    no_correct+=conf[i][i]
accuracy=no_correct/len(res)
print(accuracy*100,"%")

93.18497913769124 %


# 다층 퍼셉트론 프로그래밍

## sklearn의 필기 숫자 데이터셋

In [2]:
from sklearn import datasets
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
import numpy as np

##### 데이터 셋을 읽고 훈련 집합과 테스트 집합으로 분할

In [3]:
digit=datasets.load_digits()

In [4]:
x_train,x_test,y_train,y_test=train_test_split(digit.data,digit.target,train_size=0.6)

##### MLP 분류기 모델을 학습

In [5]:
mlp=MLPClassifier(hidden_layer_sizes=(100),learning_rate_init=0.001,batch_size=32,max_iter=300,solver='sgd',verbose=True)

##### 하이퍼 매개변수 살피기
- hidden layer size : 노드가 100개인 은닉층 하나를 둬라
  > (100,80) : 노드가 100개인 은닉층과 노드가 80개인 은닉층을 두어 은닉층이 2개인 다층 퍼셉트론
- 나머지 : 학습률을 0.001, 미니 배치 크기를 32, 최대 세대수를 300으로 설정하여 **스토케스틱 경사 하강법** 사용하라

In [6]:
mlp.fit(x_train,y_train)

Iteration 1, loss = 1.71552641
Iteration 2, loss = 0.28271789
Iteration 3, loss = 0.19721607
Iteration 4, loss = 0.14870148
Iteration 5, loss = 0.12195080
Iteration 6, loss = 0.10326788
Iteration 7, loss = 0.08917248
Iteration 8, loss = 0.07782328
Iteration 9, loss = 0.06707542
Iteration 10, loss = 0.06306242
Iteration 11, loss = 0.05571084
Iteration 12, loss = 0.05047992
Iteration 13, loss = 0.04746593
Iteration 14, loss = 0.04202160
Iteration 15, loss = 0.04102589
Iteration 16, loss = 0.03661391
Iteration 17, loss = 0.03613750
Iteration 18, loss = 0.03075455
Iteration 19, loss = 0.02901092
Iteration 20, loss = 0.02766658
Iteration 21, loss = 0.02703221
Iteration 22, loss = 0.02490051
Iteration 23, loss = 0.02379030
Iteration 24, loss = 0.02268045
Iteration 25, loss = 0.02271619
Iteration 26, loss = 0.02156309
Iteration 27, loss = 0.01950871
Iteration 28, loss = 0.01895399
Iteration 29, loss = 0.01867952
Iteration 30, loss = 0.01753876
Iteration 31, loss = 0.01716896
Iteration 32, los

MLPClassifier(batch_size=32, hidden_layer_sizes=100, max_iter=300, solver='sgd',
              verbose=True)

##### 테스트 집합으로 예측

In [7]:
res=mlp.predict(x_test)

##### 혼동행렬

In [9]:
conf=np.zeros((10,10))

In [10]:
for i in range(len(res)):
    conf[res[i]][y_test[i]]+=1
print(conf)

[[68.  0.  0.  0.  0.  1.  1.  0.  1.  0.]
 [ 0. 71.  2.  0.  0.  0.  0.  0.  1.  0.]
 [ 0.  0. 75.  0.  0.  1.  0.  0.  0.  0.]
 [ 0.  0.  0. 71.  0.  0.  0.  1.  0.  1.]
 [ 1.  1.  0.  0. 78.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  1.  0. 60.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  1. 59.  0.  1.  0.]
 [ 0.  0.  0.  0.  0.  1.  0. 72.  0.  0.]
 [ 0.  2.  1.  1.  1.  0.  1.  0. 62.  0.]
 [ 0.  0.  0.  0.  0.  3.  0.  1.  1. 77.]]


##### 정확률 계산

In [11]:
no_correct=0
for i in range(10):
    no_correct+=conf[i][i]
accuracy=no_correct/len(res)
print("테스트 집합에 대한 정확률은 ", accuracy*100,"%입니다.")

테스트 집합에 대한 정확률은  96.38386648122392 %입니다.


## MNIST 데이터셋을 다층 퍼셉트론으로

In [12]:
from sklearn.datasets import fetch_openml
from sklearn.neural_network import MLPClassifier
import matplotlib.pyplot as plt
import numpy as np

##### 데이터 셋을 읽고 훈련/테스트 데이터로 분할

In [13]:
mnist=fetch_openml('mnist_784')
mnist.data=mnist.data/255.0
x_train=mnist.data[:60000]; x_test=mnist.data[60000:]
y_train=np.int16(mnist.target[:60000]); y_test=np.int16(mnist.target[60000:])

##### MLP 분류기 모델 학습

In [14]:
mlp=MLPClassifier(hidden_layer_sizes=(100),learning_rate_init=0.001,batch_size=512,max_iter=300,solver='adam',verbose=True)

In [15]:
mlp.fit(x_train,y_train)

Iteration 1, loss = 0.61456712
Iteration 2, loss = 0.26291586
Iteration 3, loss = 0.21087402
Iteration 4, loss = 0.17928135
Iteration 5, loss = 0.15531062
Iteration 6, loss = 0.13650675
Iteration 7, loss = 0.12098139
Iteration 8, loss = 0.10847372
Iteration 9, loss = 0.09848476
Iteration 10, loss = 0.08964481
Iteration 11, loss = 0.08219066
Iteration 12, loss = 0.07550918
Iteration 13, loss = 0.06911999
Iteration 14, loss = 0.06360781
Iteration 15, loss = 0.05905341
Iteration 16, loss = 0.05464478
Iteration 17, loss = 0.05058346
Iteration 18, loss = 0.04678050
Iteration 19, loss = 0.04333599
Iteration 20, loss = 0.04070796
Iteration 21, loss = 0.03792621
Iteration 22, loss = 0.03499545
Iteration 23, loss = 0.03292134
Iteration 24, loss = 0.03050904
Iteration 25, loss = 0.02794301
Iteration 26, loss = 0.02661371
Iteration 27, loss = 0.02420656
Iteration 28, loss = 0.02317294
Iteration 29, loss = 0.02163818
Iteration 30, loss = 0.01968901
Iteration 31, loss = 0.01823704
Iteration 32, los

MLPClassifier(batch_size=512, hidden_layer_sizes=100, max_iter=300,
              verbose=True)

##### 예측

In [16]:
res=mlp.predict(x_test)

##### 혼동 행렬

In [17]:
conf=np.zeros((10,10),dtype=np.int16)
for i in range(len(res)):
    conf[res[i]][y_test[i]]+=1
print(conf)

[[ 971    0    4    0    1    3    4    1    4    2]
 [   0 1126    3    0    1    0    2    3    1    2]
 [   1    3 1005    6    5    0    2   12    3    0]
 [   0    1    6  988    1   10    1    7    8    5]
 [   1    0    0    1  963    2    2    1    3    8]
 [   2    1    0    3    0  867    7    0    4    4]
 [   2    1    2    0    2    5  937    0    1    0]
 [   1    1    5    4    2    1    1  996    3    2]
 [   2    2    7    3    1    3    2    3  943    2]
 [   0    0    0    5    6    1    0    5    4  984]]


##### 정확률 계산

In [19]:
no_correct=0
for i in range(10):
    no_correct+=conf[i][i]
accuracy=no_correct/len(res)
print("정확률은",accuracy*100,"%입니다.")

정확률은 97.8 %입니다.
