<a href="https://colab.research.google.com/github/hyunicecream/ML-DL/blob/main/RandomForest(iris_Data).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

In [None]:
# iris 데이터를 읽어온다
iris = load_iris()

In [None]:
# Train 데이터 세트와 Test 데이터 세트를 구성한다
trainX, testX, trainY, testY = train_test_split(iris['data'], iris['target'], test_size = 0.2)

In [None]:
rf = RandomForestClassifier(max_depth=3, n_estimators=100)
rf.fit(trainX, trainY)

RandomForestClassifier(max_depth=3)

In [None]:
# 시험데이터의 confusion matrix를 작성하고, (row : actual, col : predict),
# 4개 score를 확인한다.
predY = rf.predict(testX)

print('\nConfusion matrix :')
print(confusion_matrix(testY, predY))
print()
print(classification_report(testY, predY, target_names=iris.target_names))
print()
recall = 1 * 11/30  + 0.88 * 8/30 + 0.91 * 11/30
print("Recall:", recall)


Confusion matrix :
[[12  0  0]
 [ 0  9  0]
 [ 0  0  9]]

              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        12
  versicolor       1.00      1.00      1.00         9
   virginica       1.00      1.00      1.00         9

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30


Recall: 0.9349999999999999


In [None]:
# Sub tree별 시험데이터의 정확도를 확인한다.
print('\nSubtree별 시험데이터 정확도 :')
for i in range(10):
  subTree = rf.estimators_[i]
  print('subtree (%d) = %.2f' %(i, subTree.score(testX, testY)))


Subtree별 시험데이터 정확도 :
subtree (0) = 0.93
subtree (1) = 1.00
subtree (2) = 0.93
subtree (3) = 1.00
subtree (4) = 1.00
subtree (5) = 0.93
subtree (6) = 0.93
subtree (7) = 0.87
subtree (8) = 0.93
subtree (9) = 0.93


In [None]:
# classification_report()를 해석해 보기
import numpy as np
label = np.vstack([testY, predY]).T

# precision : class = n 이라고 예측한 것 중 실제 class=n인 비율
def precision(n):
  y = label[label[:, 1] == n]
  match = y[y[:, 0] == y[:, 1]]
  return match.shape[0] / y.shape[0]

print('class-0 precision : %.2f' % precision(0))
print('class-1 precision : %.2f' % precision(1))
print('class-2 precision : %.2f' % precision(2))

class-0 precision : 1.00
class-1 precision : 1.00
class-2 precision : 1.00


In [None]:
# recall : 실제 class = n 인 것중 class = n으로 예측한 비율
def recall(n):
  y = label[label[:, 0] == n]
  match = y[y[:, 0] == y[:, 1]]
  return match.shape[0] / y.shape[0]

print('class-0 recall : %.2f' % recall(0))
print('class-1 recall : %.2f' % recall(1))
print('class-2 recall : %.2f' % recall(2))

class-0 recall : 1.00
class-1 recall : 1.00
class-2 recall : 1.00


In [None]:
# F1-score (b=1) : precision과 recall의 가중조화평균 
def f1_score(n):
  p = precision(n)
  r = recall(n)
  return 2 * p * r / (p + r)

print('class-0 f1-score : %.2f' % f1_score(0)) 
print('class-1 f1-score : %.2f' % f1_score(1)) 
print('class-2 f1-score : %.2f' % f1_score(2))

# F1-score (b=0.5) : precision과 recall의 가중조화평균 
def f1_score(n):
  b = 0.5
  p = precision(n)
  r = recall(n)
  return b * p * r / b**(p + r)
  
print()
print('class-3 f1-score : %.2f' % f1_score(0)) 
print('class-4 f1-score : %.2f' % f1_score(1)) 
print('class-5 f1-score : %.2f' % f1_score(2))

class-0 f1-score : 1.00
class-1 f1-score : 1.00
class-2 f1-score : 1.00

class-3 f1-score : 2.00
class-4 f1-score : 2.00
class-5 f1-score : 2.00
