# Menyimpan Model Scikit-Learn Menggunakan Pipeline

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [6]:
dataset = pd.read_csv('drive/My Drive/Datasets/iris.csv')

dataset.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [7]:
Y = dataset['Species'] # Target prediksi
x = dataset.drop(['Id', 'Species'], axis=1) # Data Id tidak diperlukan

In [9]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, Y, test_size=0.15)

In [10]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression

pipeline = Pipeline([('scaler', StandardScaler()), ('model', LogisticRegression())])
pipeline.fit(x_train, y_train)

Pipeline(memory=None,
         steps=[('scaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('model',
                 LogisticRegression(C=1.0, class_weight=None, dual=False,
                                    fit_intercept=True, intercept_scaling=1,
                                    l1_ratio=None, max_iter=100,
                                    multi_class='auto', n_jobs=None,
                                    penalty='l2', random_state=None,
                                    solver='lbfgs', tol=0.0001, verbose=0,
                                    warm_start=False))],
         verbose=False)

In [11]:
y_pred = pipeline.predict(x_test)

In [14]:
from sklearn.metrics import f1_score, precision_score, recall_score

print('Accuracy of logistic regression classifier on test set: {:.2f}'.format(pipeline.score(x_test, y_test))) # .score() -> Accuracy
print(f1_score(y_test, y_pred, average='macro'))
print(precision_score(y_test, y_pred, average='macro'))
print(recall_score(y_test, y_pred, average='macro'))

Accuracy of logistic regression classifier on test set: 0.96
0.9440559440559441
0.9523809523809524
0.9444444444444445


## Save model hasil training

In [16]:
import pickle

In [17]:
filename = 'model.pkl' # Nama yang digunakan bebas
pickle.dump(pipeline, open(filename, 'wb')) # Membuat file model, ganti logreg dengan nama model yang digunakan

## Loading model hasil training

In [18]:
loaded_model = pickle.load(open(filename, 'rb'))

In [19]:
y_pred_loaded = loaded_model.predict(x_test)

In [20]:
print('Accuracy of logistic regression classifier on test set: {:.2f}'.format(loaded_model.score(x_test, y_test))) # .score() -> Accuracy
print(f1_score(y_test, y_pred_loaded, average='macro'))
print(precision_score(y_test, y_pred_loaded, average='macro'))
print(recall_score(y_test, y_pred_loaded, average='macro'))

Accuracy of logistic regression classifier on test set: 0.96
0.9440559440559441
0.9523809523809524
0.9444444444444445


In [21]:
x_test

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
54,6.5,2.8,4.6,1.5
102,7.1,3.0,5.9,2.1
138,6.0,3.0,4.8,1.8
43,5.0,3.5,1.6,0.6
147,6.5,3.0,5.2,2.0
101,5.8,2.7,5.1,1.9
30,4.8,3.1,1.6,0.2
80,5.5,2.4,3.8,1.1
44,5.1,3.8,1.9,0.4
8,4.4,2.9,1.4,0.2


In [23]:
# Membuat sebuah list dengan dimensi yang sama

test_data = [[5.1, 3.5, 1.4, 0.2]] # Data sebelum di pre-process

prediction_example = loaded_model.predict(test_data)

prediction_example[0]

'Iris-setosa'