In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score

## RandomForest

In [2]:
# Importando base de dados
credito = pd.read_csv('../Dados/Credit.csv')

In [3]:
credito.head()

Unnamed: 0,checking_status,duration,credit_history,purpose,credit_amount,savings_status,employment,installment_commitment,personal_status,other_parties,...,property_magnitude,age,other_payment_plans,housing,existing_credits,job,num_dependents,own_telephone,foreign_worker,class
0,<0,6,'critical/other existing credit',radio/tv,1169,'no known savings',>=7,4,'male single',none,...,'real estate',67,none,own,2,skilled,1,yes,yes,good
1,0<=X<200,48,'existing paid',radio/tv,5951,<100,1<=X<4,2,'female div/dep/mar',none,...,'real estate',22,none,own,1,skilled,1,none,yes,bad
2,'no checking',12,'critical/other existing credit',education,2096,<100,4<=X<7,2,'male single',none,...,'real estate',49,none,own,1,'unskilled resident',2,none,yes,good
3,<0,42,'existing paid',furniture/equipment,7882,<100,4<=X<7,2,'male single',guarantor,...,'life insurance',45,none,'for free',1,skilled,2,none,yes,good
4,<0,24,'delayed previously','new car',4870,<100,1<=X<4,3,'male single',none,...,'no known property',53,none,'for free',2,skilled,2,none,yes,bad


In [4]:
# Separando dados e classe (Feature, Target)
preditos = credito.iloc[:, 0:20].values
classe = credito.iloc[:, 20].values

# Convertendo valores categóricos em numéricos
label_enconder = LabelEncoder()
preditos[:, 0] = label_enconder.fit_transform(preditos[:, 0])
preditos[:, 2] = label_enconder.fit_transform(preditos[:, 2])
preditos[:, 3] = label_enconder.fit_transform(preditos[:, 3])
preditos[:, 5] = label_enconder.fit_transform(preditos[:, 5])
preditos[:, 6] = label_enconder.fit_transform(preditos[:, 6])
preditos[:, 8] = label_enconder.fit_transform(preditos[:, 8])
preditos[:, 9] = label_enconder.fit_transform(preditos[:, 9])
preditos[:, 11] = label_enconder.fit_transform(preditos[:, 11])
preditos[:, 13] = label_enconder.fit_transform(preditos[:, 13])
preditos[:, 14] = label_enconder.fit_transform(preditos[:, 14])
preditos[:, 16] = label_enconder.fit_transform(preditos[:, 16])
preditos[:, 18] = label_enconder.fit_transform(preditos[:, 18])
preditos[:, 19] = label_enconder.fit_transform(preditos[:, 19])

# Dividindo base entre treino e teste
X_training, X_test, y_training, y_test = train_test_split(preditos, classe, test_size = 0.3, random_state = 0)

In [6]:
# Criando modelo de treinamento e teste
floresta = RandomForestClassifier(n_estimators = 100)
floresta.fit(X_training, y_training)

RandomForestClassifier()

In [7]:
# Testando modelo
previsoes = floresta.predict(X_test)
previsoes

array(['bad', 'good', 'good', 'good', 'good', 'good', 'good', 'good',
       'good', 'bad', 'bad', 'good', 'good', 'good', 'bad', 'good',
       'good', 'good', 'bad', 'good', 'good', 'bad', 'good', 'good',
       'good', 'bad', 'good', 'good', 'good', 'bad', 'good', 'good',
       'good', 'good', 'good', 'bad', 'good', 'good', 'good', 'good',
       'good', 'bad', 'good', 'good', 'good', 'bad', 'good', 'bad',
       'good', 'good', 'good', 'bad', 'good', 'good', 'good', 'good',
       'bad', 'good', 'good', 'good', 'bad', 'good', 'good', 'good',
       'good', 'good', 'good', 'good', 'bad', 'good', 'good', 'good',
       'bad', 'good', 'bad', 'good', 'good', 'good', 'good', 'good',
       'good', 'good', 'good', 'good', 'good', 'good', 'good', 'bad',
       'good', 'good', 'good', 'good', 'good', 'good', 'good', 'good',
       'good', 'good', 'good', 'good', 'good', 'good', 'good', 'good',
       'good', 'good', 'bad', 'bad', 'bad', 'good', 'good', 'good', 'bad',
       'good', 'good'

In [8]:
# Gerando matriz de confusão
confusao = confusion_matrix(y_test, previsoes)
confusao

array([[ 37,  49],
       [ 19, 195]])

In [9]:
# Verificando taxa de acerto
taxa_acerto = accuracy_score(y_test, previsoes)
taxa_acerto

0.7733333333333333

In [10]:
# Verificando taxa de erro
taxa_erro = 1 - taxa_acerto
taxa_erro

0.22666666666666668

In [12]:
# Verificando dados das árvores
floresta.estimators_

[DecisionTreeClassifier(max_features='auto', random_state=2116285466),
 DecisionTreeClassifier(max_features='auto', random_state=1308180464),
 DecisionTreeClassifier(max_features='auto', random_state=13381242),
 DecisionTreeClassifier(max_features='auto', random_state=1218757057),
 DecisionTreeClassifier(max_features='auto', random_state=946957466),
 DecisionTreeClassifier(max_features='auto', random_state=1665564144),
 DecisionTreeClassifier(max_features='auto', random_state=764659465),
 DecisionTreeClassifier(max_features='auto', random_state=2027109875),
 DecisionTreeClassifier(max_features='auto', random_state=1736444540),
 DecisionTreeClassifier(max_features='auto', random_state=1471187083),
 DecisionTreeClassifier(max_features='auto', random_state=1983695656),
 DecisionTreeClassifier(max_features='auto', random_state=334451137),
 DecisionTreeClassifier(max_features='auto', random_state=49359999),
 DecisionTreeClassifier(max_features='auto', random_state=115889310),
 DecisionTreeC

In [13]:
# Verificando dados das árvores
floresta.estimators_[1]

DecisionTreeClassifier(max_features='auto', random_state=1308180464)