# Machine Learning - Model Deployment with Joblib

## Import Libraries

In [114]:
import pandas as pd
import numpy as np
import sklearn as sklearn
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split,cross_val_score,cross_val_predict,ShuffleSplit,GridSearchCV
from sklearn.datasets import load_breast_cancer
from sklearn import preprocessing
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler,scale, RobustScaler
from sklearn.metrics import confusion_matrix,accuracy_score, roc_auc_score,roc_curve, classification_report,mean_squared_error,f1_score,recall_score,precision_score
import time
import joblib
from warnings import filterwarnings

filterwarnings("ignore")

## Data Preparation

In [115]:
def dataframe_description(df):
  print('The data matrix:\n',df['data'])
  print('The classification target:\n',df['target'])
  print('The names of the dataset columns:\n',df['feature_names'])
  if 'target_names' in df.keys():
    print('The names of target classes:\n',df['target_names'])
  print('The full description of the dataset:\n',df['DESCR'])

In [116]:
breast_cancer = sklearn.datasets.load_breast_cancer()
dataframe_description(breast_cancer)

The data matrix:
 [[1.799e+01 1.038e+01 1.228e+02 ... 2.654e-01 4.601e-01 1.189e-01]
 [2.057e+01 1.777e+01 1.329e+02 ... 1.860e-01 2.750e-01 8.902e-02]
 [1.969e+01 2.125e+01 1.300e+02 ... 2.430e-01 3.613e-01 8.758e-02]
 ...
 [1.660e+01 2.808e+01 1.083e+02 ... 1.418e-01 2.218e-01 7.820e-02]
 [2.060e+01 2.933e+01 1.401e+02 ... 2.650e-01 4.087e-01 1.240e-01]
 [7.760e+00 2.454e+01 4.792e+01 ... 0.000e+00 2.871e-01 7.039e-02]]
The classification target:
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 1 0 0 0 0 0 0 0 0 1 0 1 1 1 1 1 0 0 1 0 0 1 1 1 1 0 1 0 0 1 1 1 1 0 1 0 0
 1 0 1 0 0 1 1 1 0 0 1 0 0 0 1 1 1 0 1 1 0 0 1 1 1 0 0 1 1 1 1 0 1 1 0 1 1
 1 1 1 1 1 1 0 0 0 1 0 0 1 1 1 0 0 1 0 1 0 0 1 0 0 1 1 0 1 1 0 1 1 1 1 0 1
 1 1 1 1 1 1 1 1 0 1 1 1 1 0 0 1 0 1 1 0 0 1 1 0 0 1 1 1 1 0 1 1 0 0 0 1 0
 1 0 1 1 1 0 1 1 0 0 1 0 0 0 0 1 0 0 0 1 0 1 0 1 1 0 1 0 0 0 0 1 1 0 0 1 1
 1 0 1 1 1 1 1 0 0 1 1 0 1 1 0 0 1 0 1 1 1 1 0 1 1 1 1 1 0 1 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 1 1 1 

In [117]:
df = pd.DataFrame(breast_cancer['data'], columns = breast_cancer['feature_names'])
df['Outcome'] = breast_cancer.target
df.sample(10)

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,Outcome
435,13.98,19.62,91.12,599.5,0.106,0.1133,0.1126,0.06463,0.1669,0.06544,...,30.8,113.9,869.3,0.1613,0.3568,0.4069,0.1827,0.3179,0.1055,0
314,8.597,18.6,54.09,221.2,0.1074,0.05847,0.0,0.0,0.2163,0.07359,...,22.44,56.65,240.1,0.1347,0.07767,0.0,0.0,0.3142,0.08116,1
198,19.18,22.49,127.5,1148.0,0.08523,0.1428,0.1114,0.06772,0.1767,0.05529,...,32.06,166.4,1688.0,0.1322,0.5601,0.3865,0.1708,0.3193,0.09221,0
292,12.95,16.02,83.14,513.7,0.1005,0.07943,0.06155,0.0337,0.173,0.0647,...,19.93,88.81,585.4,0.1483,0.2068,0.2241,0.1056,0.338,0.09584,1
78,20.18,23.97,143.7,1245.0,0.1286,0.3454,0.3754,0.1604,0.2906,0.08142,...,31.72,170.3,1623.0,0.1639,0.6164,0.7681,0.2508,0.544,0.09964,0
337,18.77,21.43,122.9,1092.0,0.09116,0.1402,0.106,0.0609,0.1953,0.06083,...,34.37,161.1,1873.0,0.1498,0.4827,0.4634,0.2048,0.3679,0.0987,0
158,12.06,12.74,76.84,448.6,0.09311,0.05241,0.01972,0.01963,0.159,0.05907,...,18.41,84.08,532.8,0.1275,0.1232,0.08636,0.07025,0.2514,0.07898,1
468,17.6,23.33,119.0,980.5,0.09289,0.2004,0.2136,0.1002,0.1696,0.07369,...,28.87,143.6,1437.0,0.1207,0.4785,0.5165,0.1996,0.2301,0.1224,0
19,13.54,14.36,87.46,566.3,0.09779,0.08129,0.06664,0.04781,0.1885,0.05766,...,19.26,99.7,711.2,0.144,0.1773,0.239,0.1288,0.2977,0.07259,1
475,12.83,15.73,82.89,506.9,0.0904,0.08269,0.05835,0.03078,0.1705,0.05913,...,19.35,93.22,605.8,0.1326,0.261,0.3476,0.09783,0.3006,0.07802,1


In [118]:
X = df.drop("Outcome",axis=1)
y= df["Outcome"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=17)

print("X_train Shape: ",X_train.shape)
print("X_test Shape: ",X_test.shape)
print("y_train Shape: ",y_train.shape)
print("y_test Shape: ",y_test.shape)

X_train Shape:  (455, 30)
X_test Shape:  (114, 30)
y_train Shape:  (455,)
y_test Shape:  (114,)


In [119]:
# Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## Model Training

In [120]:
multilayer_ann = MLPClassifier()
multilayer_ann.fit(X_train_scaled,y_train)

In [121]:
multilayer_ann.get_params()

{'activation': 'relu',
 'alpha': 0.0001,
 'batch_size': 'auto',
 'beta_1': 0.9,
 'beta_2': 0.999,
 'early_stopping': False,
 'epsilon': 1e-08,
 'hidden_layer_sizes': (100,),
 'learning_rate': 'constant',
 'learning_rate_init': 0.001,
 'max_fun': 15000,
 'max_iter': 200,
 'momentum': 0.9,
 'n_iter_no_change': 10,
 'nesterovs_momentum': True,
 'power_t': 0.5,
 'random_state': None,
 'shuffle': True,
 'solver': 'adam',
 'tol': 0.0001,
 'validation_fraction': 0.1,
 'verbose': False,
 'warm_start': False}

In [122]:
y_pred = multilayer_ann.predict(X_test_scaled)
cm = confusion_matrix(y_test,y_pred)
print(cm)

[[34  4]
 [ 1 75]]


In [123]:
print("Our Accuracy is: {}\n".format(accuracy_score(y_test,y_pred)))
print("Our Recall is: {}\n".format(recall_score(y_test,y_pred)))
print("Our Precision is: {}\n".format(precision_score(y_test,y_pred)))
print("Our F1 score is: {}\n".format(f1_score(y_test,y_pred)))
print(classification_report(y_test,y_pred))

Our Accuracy is: 0.956140350877193

Our Recall is: 0.9868421052631579

Our Precision is: 0.9493670886075949

Our F1 score is: 0.967741935483871

              precision    recall  f1-score   support

           0       0.97      0.89      0.93        38
           1       0.95      0.99      0.97        76

    accuracy                           0.96       114
   macro avg       0.96      0.94      0.95       114
weighted avg       0.96      0.96      0.96       114



In [124]:
accuracies= cross_val_score(estimator=multilayer_ann,
                            X=X_train_scaled,y=y_train,
                            cv=10)
print("Average Accuracy: {:.2f} %".format(accuracies.mean()*100))
print("Standart Deviation of Accuracies: {:.2f} %".format(accuracies.std()*100))

Average Accuracy: 98.02 %
Standart Deviation of Accuracies: 1.54 %


In [125]:
results =pd.DataFrame(multilayer_ann.predict_proba(X_test_scaled)[:10],
             columns=["Possibility of 0","Possibility of 1"])

results["Class"]=[1 if i>0.5 else 0 for i in results["Possibility of 1"]]
results.head()

Unnamed: 0,Possibility of 0,Possibility of 1,Class
0,7.096961e-05,0.999929,1
1,5.654178e-05,0.999943,1
2,1.214672e-05,0.999988,1
3,0.0001190713,0.999881,1
4,1.310491e-08,1.0,1


## Model Tuning

In [None]:
ann_params ={"hidden_layer_sizes":[(100,100,10),
                                   (100,10,10),
                                   (100,100,100),
                                   (50,50,100),
                                   (3,3,5),
                                   (2,7)],
             "solver":["lbfgs","sgd", "adam"],
             "alpha": [0.0001,0.001,0.01,0.1],
             "activation": ["relu","logistic"]}

multilayer_ann = MLPClassifier()
multilayer_ann_cv = GridSearchCV(multilayer_ann,ann_params,cv=8,
                                 n_jobs=-1 , verbose=2)

start_time = time.time()

multilayer_ann_cv.fit(X_train_scaled,y_train)

elapsed_time = time.time() - start_time

print(f"Elapsed time for multilayer_ann_cv cross validation: "
      f"{elapsed_time:.3f} seconds")

In [127]:
print("The best score is: {}".format(multilayer_ann_cv.best_score_))
print("The best params are: {}".format(multilayer_ann_cv.best_params_))

The best score is: 0.9802239974937343
The best params are: {'activation': 'logistic', 'alpha': 0.01, 'hidden_layer_sizes': (100, 100, 10), 'solver': 'lbfgs'}


In [128]:
ann_tuned = MLPClassifier(hidden_layer_sizes= multilayer_ann_cv.best_params_['hidden_layer_sizes'],
                          activation= multilayer_ann_cv.best_params_['activation'],
                          alpha= multilayer_ann_cv.best_params_['alpha'],
                          solver= multilayer_ann_cv.best_params_['solver']).fit(X_train_scaled,y_train)
y_pred = ann_tuned.predict(X_test_scaled)
cm = confusion_matrix(y_test,y_pred)
print(cm)

[[34  4]
 [ 1 75]]


In [129]:
print("Our Accuracy is: {}\n".format(accuracy_score(y_test,y_pred)))
print("Our Recall is: {}\n".format(recall_score(y_test,y_pred)))
print("Our Precision is: {}\n".format(precision_score(y_test,y_pred)))
print("Our F1 score is: {}\n".format(f1_score(y_test,y_pred)))
print(classification_report(y_test,y_pred))

Our Accuracy is: 0.956140350877193

Our Recall is: 0.9868421052631579

Our Precision is: 0.9493670886075949

Our F1 score is: 0.967741935483871

              precision    recall  f1-score   support

           0       0.97      0.89      0.93        38
           1       0.95      0.99      0.97        76

    accuracy                           0.96       114
   macro avg       0.96      0.94      0.95       114
weighted avg       0.96      0.96      0.96       114



## Model Deployment

In [130]:
joblib.dump(ann_tuned,'ann_final_model.pkl')
joblib.dump(list(X.columns),'column_names.pkl')

['column_names.pkl']

In [131]:
column_names = joblib.load('column_names.pkl')
print(column_names)

['mean radius', 'mean texture', 'mean perimeter', 'mean area', 'mean smoothness', 'mean compactness', 'mean concavity', 'mean concave points', 'mean symmetry', 'mean fractal dimension', 'radius error', 'texture error', 'perimeter error', 'area error', 'smoothness error', 'compactness error', 'concavity error', 'concave points error', 'symmetry error', 'fractal dimension error', 'worst radius', 'worst texture', 'worst perimeter', 'worst area', 'worst smoothness', 'worst compactness', 'worst concavity', 'worst concave points', 'worst symmetry', 'worst fractal dimension']


In [132]:
loaded_ann_model = joblib.load('ann_final_model.pkl')
loaded_ann_model.predict([X_test_scaled[0]])

array([1])