# Machine Learning Operations (ML Ops)

## Model Deployment in Scikit Learn

In [1]:
import pandas as pd
from sklearn import set_config
set_config(transform_output="pandas")
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report

# libraries for saving ML models
import pickle
import joblib


In [3]:
# Load the Titanic dataset "Dataset_titanic.csv" from Moodle into a Pandas DataFrame
titanic_df = pd.read_csv("C:/Users/Vic/Desktop/Data Scienece/Datasets-20231016/Dataset_Titanic.csv")

# Save the features 'Pclass', 'Sex', 'Age', 'SibSp', 'Parch' in a variable X 
# and the labels 'Survived' in a variable y
features = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch']
X = titanic_df[features]
y = titanic_df['Survived']

# train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# Instantiate a Simple Imputer with a Median Strategy
imp = SimpleImputer(strategy="median")

# Instantiate a One Hot Encoder that ignores unknown categories
ohe = OneHotEncoder(handle_unknown='ignore', sparse_output=False)

# Use a Column Transformer to apply the two transformers (Imputer and One-hot-encoder) to the 
# correct columns and pass through all other columns
ct = ColumnTransformer(
    [('ohe', ohe, ['Sex']), 
    ('imputer', imp, ['Age'])],              
    remainder='passthrough'
)
ct.fit_transform(X_train)

# Instantiate rf
rf = RandomForestClassifier(n_estimators=50, random_state=42)

# Create a Pipeline with two steps: preprocessing (Column Transformer) and classifier
pipe = Pipeline([
    ('preprocessor', ct),
    ('classifier', rf)]
)

# Train the Pipeline on the train set
pipe.fit(X_train, y_train)

# Predict the labels for the test set
y_pred = pipe.predict(X_test)

In [4]:
# Print a classification report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.82      0.85      0.84       110
           1       0.75      0.70      0.72        69

    accuracy                           0.79       179
   macro avg       0.78      0.78      0.78       179
weighted avg       0.79      0.79      0.79       179



In [7]:
# Save the Models
with open('C:/Users/Vic/Desktop/Data Scienece/Models/model1.pkl', 'wb') as f:
    pickle.dump(pipe, f)
 
# Save model using joblib
joblib.dump(pipe, "C:/Users/Vic/Desktop/Data Scienece/Models/model1.joblib")

['C:/Users/Vic/Desktop/Data Scienece/Models/model1.joblib']

In [8]:
# Load model using pickle
with open('C:/Users/Vic/Desktop/Data Scienece/Models/model1.pkl', 'rb') as f:
    model_pkl = pickle.load(f)
 
# Load model using joblib
model_joblib = joblib.load('C:/Users/Vic/Desktop/Data Scienece/Models/model1.joblib')

In [9]:
# make predictions with loaded models
y_pred = model_pkl.predict(X_test)

# Print classification report for 
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.82      0.85      0.84       110
           1       0.75      0.70      0.72        69

    accuracy                           0.79       179
   macro avg       0.78      0.78      0.78       179
weighted avg       0.79      0.79      0.79       179



In [10]:
# make predictions with loaded models
y_pred = model_joblib.predict(X_test)
# Print classification report for 
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.82      0.85      0.84       110
           1       0.75      0.70      0.72        69

    accuracy                           0.79       179
   macro avg       0.78      0.78      0.78       179
weighted avg       0.79      0.79      0.79       179



# 