In [1]:
import pandas as pd
import numpy as np
import onnx
import onnxruntime as ort 

from configs.config import FEATURE_DIR, MODEL_DIR
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV
from datetime import datetime
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType

# Load data from feature store

In [2]:
train_df = pd.read_hdf(str(FEATURE_DIR / 'feature_engineering_1.h5'), 'train')
test_df = pd.read_hdf(str(FEATURE_DIR / 'feature_engineering_1.h5'), 'test')
val_df = pd.read_hdf(str(FEATURE_DIR / 'feature_engineering_1.h5'), 'val')

In [3]:
X_train, y_train = train_df.drop(columns=["loan_status"]).iloc[:, :].values, train_df["loan_status"].iloc[:].values 

In [4]:
X_test, y_test = test_df.drop(columns=["loan_status"]).iloc[:, :].values, test_df["loan_status"].iloc[:].values 

# Build, Hyperparameters selection and train model

In [5]:
hyper_parameters = {
    "C": [1, 10],
    "kernel": ["rbf", "linear"],
    "gamma": ["scale", "auto"]
}

In [6]:
# Create a LinearSVC model
model = SVC()

grid_search = GridSearchCV(estimator=model, 
                           param_grid=hyper_parameters, 
                           cv=3, # Number of fold 
                           n_jobs=5,
                           verbose=True)

grid_search.fit(X_train, y_train)

best_model = grid_search.best_estimator_

Fitting 3 folds for each of 8 candidates, totalling 24 fits


# Save model

In [11]:
# Define the initial type of the model input
initial_type = [('float_input', FloatTensorType([None, X_train.shape[1]]))]

# Convert the model to ONNX
onnx_model = convert_sklearn(best_model, initial_types=initial_type)

# Save the ONNX model to a file
onnx.save_model(onnx_model, str(MODEL_DIR / 'training_experiment_1.onnx'))

# Load model and make prediction 

In [12]:
# Initialize the ONNX Runtime session
session = ort.InferenceSession(str(MODEL_DIR / 'training_experiment_1.onnx'))

# Prepare the input data for prediction
input_name = session.get_inputs()[0].name
X_test_onnx = np.array(X_test, dtype=np.float32)

# Perform inference
y_pred = session.run(None, {input_name: X_test_onnx})[0]

# Evaluation

In [13]:
score = classification_report(y_test, y_pred)

In [14]:
print(score)

              precision    recall  f1-score   support

           0       0.92      0.93      0.93      3500
           1       0.75      0.71      0.73      1000

    accuracy                           0.88      4500
   macro avg       0.84      0.82      0.83      4500
weighted avg       0.88      0.88      0.88      4500

