**Construindo um modelo de classificação para o dataset de Iris**

# 1. Importar bibliotecas

In [None]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification

# 2. Carregar iris dataset

In [None]:
iris = datasets.load_iris()


# 3. Input features

O dataset possui 4 input features e 1 output variable (a classe)

## Input features

In [None]:
print(iris.feature_names)

['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']


## Output features

In [None]:
print(iris.target_names)

['setosa' 'versicolor' 'virginica']


# 4. Visão geral dos dados

## Input features

In [None]:
iris.data

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0.2],
       [4.9, 3

## Output variable (a classe)

In [None]:
iris.target

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

## Determinando as variáveis de input e output

In [None]:
X = iris.data
y = iris.target

## Examinar a dimensão dos dados

In [None]:
X.shape

(150, 4)

In [None]:
y.shape

(150,)

# 5. Construir modelo de classificação usando Random Forest

In [None]:
clf = RandomForestClassifier()

In [None]:
clf.fit(X, y)

# 6. Importância da feature

In [None]:
print(clf.feature_importances_)

[0.07969243 0.02105893 0.43928482 0.45996381]


# 7. Faça a predição

In [None]:
X[0]

array([5.1, 3.5, 1.4, 0.2])

In [None]:
print(clf.predict(([[5.1, 3.5, 1.4, 0.2]])))

['setosa']


In [None]:
print(clf.predict(X[[0]]))

['setosa']


In [None]:
print(clf.predict_proba(X[[0]]))

[[1. 0. 0.]]


O resultado desse predict_proba quer dizer que existe 100% de chance de ser a primeira classificação

In [None]:
clf.fit(iris.data, iris.target_names[iris.target])

# 8. Data split (80/20)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [None]:
X_train.shape, y_train.shape

((120, 4), (120,))

In [None]:
X_test.shape, y_test.shape

((30, 4), (30,))

# 9. Reconstrua o Random Forest Model

In [None]:
clf.fit(X_train, y_train)

In [None]:
print(clf.predict([[5.1, 3.5, 1.4, 0.2]]))

[0]


In [None]:
print(clf.predict_proba([[5.1, 3.5, 1.4, 0.2]]))

[[1. 0. 0.]]


## Faz a predição no test set

In [None]:
print(clf.predict(X_test))

[1 2 0 1 1 0 1 0 2 1 1 2 0 0 2 1 1 1 0 1 0 1 0 1 1 2 2 1 2 2]


In [None]:
clf.fit(iris.data, iris.target_names[iris.target])

In [None]:
print(y_test)

[1 2 0 1 1 0 1 0 2 1 1 2 0 0 2 1 1 1 0 1 0 1 0 1 1 2 2 1 1 2]


# 10. Performance do modelo

In [None]:
print(clf.score(X_test, y_test))

0.0


  score = y_true == y_pred
