In [20]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [16]:
iris = pd.read_csv("Iris.csv")
iris.drop(columns='Id', inplace = True)
iris

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica


In [18]:
X = iris.iloc[:,:-1]
Y = iris.iloc[:,-1]
X

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


In [19]:
Y

0         Iris-setosa
1         Iris-setosa
2         Iris-setosa
3         Iris-setosa
4         Iris-setosa
            ...      
145    Iris-virginica
146    Iris-virginica
147    Iris-virginica
148    Iris-virginica
149    Iris-virginica
Name: Species, Length: 150, dtype: object

In [21]:
X_train, X_test, y_train, y_test = train_test_split(X,Y, test_size=0.2,random_state=42)

In [22]:
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

clf = RandomForestClassifier(random_state=42)
n_estimators = [int(x) for x in np.linspace(start = 50, stop = 500, num = 10)]
max_features = ['auto', 'sqrt']
max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
max_depth.append(None)
min_samples_split = [2, 5, 10]
min_samples_leaf = [1, 2, 4]
bootstrap = [True, False]
params  = {'n_estimators': n_estimators,
            'max_features': max_features,
            'max_depth': max_depth,
            'min_samples_split': min_samples_split,
            'min_samples_leaf': min_samples_leaf,
            'bootstrap': bootstrap}
params

{'n_estimators': [50, 100, 150, 200, 250, 300, 350, 400, 450, 500],
 'max_features': ['auto', 'sqrt'],
 'max_depth': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, None],
 'min_samples_split': [2, 5, 10],
 'min_samples_leaf': [1, 2, 4],
 'bootstrap': [True, False]}

In [24]:
rf_search = GridSearchCV(clf, param_grid=params,verbose=2,cv=3, n_jobs=-1)

In [None]:
rf_search.fit(X_train,y_train)

In [28]:
print(rf_search.best_params_)

{'bootstrap': True, 'max_depth': 10, 'max_features': 'auto', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 50}


In [29]:
f = open('hasil.json', 'wb')
f.write(rf_search.best_params_)

TypeError: a bytes-like object is required, not 'dict'

In [35]:
import json
# Serializing json  
hasil = json.dumps(rf_search.best_score_, indent = 4) 
print(hasil)

0.9583333333333334


## Modeling

In [38]:
clfix = rf_search.best_estimator_
y_pred = clfix.predict(X_test)
y_pred

array(['Iris-versicolor', 'Iris-setosa', 'Iris-virginica',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-setosa',
       'Iris-versicolor', 'Iris-virginica', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-virginica', 'Iris-setosa', 'Iris-setosa',
       'Iris-setosa', 'Iris-setosa', 'Iris-versicolor', 'Iris-virginica',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-virginica',
       'Iris-setosa', 'Iris-virginica', 'Iris-setosa', 'Iris-virginica',
       'Iris-virginica', 'Iris-virginica', 'Iris-virginica',
       'Iris-virginica', 'Iris-setosa', 'Iris-setosa'], dtype=object)

In [43]:
def evaluate(model,test_feature,test_label):
    prediction = model.predict(test_feature)
    accuracy = accuracy_score(prediction,test_label)
    precision = precision_score(prediction,test_label, average='micro')
    recall = recall_score(prediction,test_label, average='micro')
    f1 = f1_score(prediction,test_label, average='micro')

    print(f"Accuracy Score is : {accuracy}".format())
    print(f"Precision Score is : {precision}".format())
    print(f"Recall Score is : {recall}".format())
    print(f"f1 Score is : {f1}".format())

In [44]:
evaluate(clfix,X_test,y_test)

Accuracy Score is : 1.0
Precision Score is : 1.0
Recall Score is : 1.0
f1 Score is : 1.0


In [45]:
import warnings
warnings.filterwarnings('ignore')

In [46]:
import pickle
pickle.dump(clfix,open('model.pkl','wb'))