# Logistic Regression

In [1]:
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression

## Load Data

In [2]:
iris = load_iris()
X, y = iris.data, iris.target

In [3]:
n_features = 100
n_observations = 100

X = np.random.randn(n_observations, n_features)
y = np.random.randint(0, 3, n_observations)

print("X shape", X.shape)
print("X example", X[0])
print("y shape", y.shape)
print("y example", y[0])

X shape (100, 100)
X example [-0.35205207 -0.69279609  1.81209312 -0.45738326 -1.80701217 -0.54311716
  0.49053358  1.7285293  -0.29986388  0.89915369  0.23615477 -0.49689827
 -0.22513294  0.62159903 -0.60690971  0.26674585  1.30063601 -0.97670051
  0.3339427   0.20017859  0.57271373 -2.88477509  0.5059092   0.69035764
  0.41987245 -1.20958609  0.26129794 -0.51918168  1.57742738  0.77256207
 -0.97861134  1.7342215   0.15511774 -1.19627687 -1.85716606 -0.35158422
 -0.21299438 -0.16047745  0.44910956  1.03460385 -0.88535268 -2.20528715
 -0.66135497 -0.09874927 -0.39686063  0.38928856  0.02124242  1.22616386
  0.40962288 -0.982113    0.40741168  1.88913033  0.27645403  1.37369061
 -2.2313305   1.41558957  0.09287023  0.05671697 -0.5914969  -0.74761669
 -1.42353898  1.11435735  0.67037357  1.20996652  1.91805951 -0.74833707
  0.24517938  2.52656297  1.65232133  0.93795446  0.17088269  0.26235573
  0.68030412  0.5076055   1.03259102 -0.02921635  0.72869724  0.47847228
 -0.06326772  0.793379

## Train Model

In [4]:
clf = LogisticRegression()
clf.fit(X, y)

LogisticRegression()

## Save Weights

In [5]:
from joblib import dump

dump(clf, "models/log_reg_model.joblib")

FileNotFoundError: [Errno 2] No such file or directory: 'models/log_reg_model.joblib'

In [None]:
# Check file size
from utils import execution_time

test_rows = 10000000

X_test = np.random.rand(test_rows, n_features)

inference_time = execution_time(clf.predict(X_test))
weights_size = Path("models/log_reg_model.joblib").stat().st_size

print(f"Inference time: {inference_time:.2f}s")
print(f"File size: {weights_size / 1000} KB")
print(f"Test rows: {(test_rows / 1000000)} million")

: 

## Export model as ONNX

In [None]:
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType

initial_type = [('float_input', FloatTensorType([None, n_features]))]
onx = convert_sklearn(clf, initial_types=initial_type)
with open("models/log_reg_model.onnx", "wb") as f:
    f.write(onx.SerializeToString())

: 

In [None]:
import onnxruntime as rt

sess = rt.InferenceSession("models/log_reg_model.onnx")
input_name = sess.get_inputs()[0].name
label_name = sess.get_outputs()[0].name

X_test_onnx = X_test.astype(np.float32)

pred_onx = sess.run([label_name], {input_name: X_test_onnx})[0]

: 

In [None]:
test_rows = 10000000

X_test = np.random.rand(test_rows, 4)

inference_time = execution_time(lambda: sess.run([label_name], {input_name: X_test_onnx}))
weights_size = Path("models/log_reg_model.onnx").stat().st_size

print(f"Inference time: {inference_time:.2f}s")
print(f"File size: {weights_size / 1000} KB")
print(f"Test rows: {(test_rows / 1000000)} million")

: 