# Logistic Regression

In [23]:
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression

## Load Data

In [24]:
iris = load_iris()
X, y = iris.data, iris.target

In [25]:
n_features = 100
n_observations = 100

X = np.random.randn(n_observations, n_features)
y = np.random.randint(0, 3, n_observations)

print("X shape", X.shape)
print("X example", X[0])
print("y shape", y.shape)
print("y example", y[0])

X shape (100, 100)
X example [ 0.74412993 -1.05462399 -1.14395605 -0.867277   -0.32781437  2.98354855
 -0.65535973 -1.02628784 -0.17413571 -0.85936728 -1.11886663  0.08209197
 -0.77269333  0.29027634  0.27869535 -1.06614616 -0.41135708  2.23390804
  0.58007668  0.7996253  -0.21301315  0.22148628 -2.23110119 -1.74484248
  0.61165814 -0.54803421  0.84912473  0.16417913 -0.1182921   0.79508234
 -0.35347602  2.11172162 -0.73374603  0.37512716 -2.24698566  1.09054386
  0.07711984  2.09803854 -0.01773146 -0.44279201  1.23148034  0.60439517
  1.06866797  0.35697025 -0.45395715  2.08659551 -0.42984696  0.57514958
  2.00445117 -0.68890385 -1.6407538  -2.25972271  1.57361877  0.18531059
  0.47168033  1.27524396 -0.37071767  0.32125645  0.34909791  1.36067328
  0.99613073 -1.10069499  0.46411016 -1.29066336  1.86571388  1.2718142
 -1.00514275 -0.41467908 -0.98619969 -1.53628655 -0.19590851  0.24877193
 -0.13015768  0.78244126 -0.14173945 -1.55440867  0.87510995  0.97154984
 -2.45914188  1.6775019

## Train Model

In [26]:
clf = LogisticRegression()
clf.fit(X, y)

LogisticRegression()

## Save Weights

In [27]:
from pathlib import Path
from joblib import dump

Path("models").mkdir(exist_ok=True)

dump(clf, "models/log_reg_model.joblib")

['models/log_reg_model.joblib']

In [28]:
import re
import time

def execution_time(process):
    """ Calculates the execution time of a function """
    start_time = time.time()
    (lambda: process)()
    end_time = time.time()
    return (end_time - start_time)

In [22]:
# Check file size

test_rows = 10000000

X_test = np.random.rand(test_rows, n_features)

inference_time = execution_time(clf.predict(X_test))
weights_size = Path("models/log_reg_model.joblib").stat().st_size

print(f"Inference time: {inference_time:.2f}s")
print(f"File size: {weights_size / 1000} KB")
print(f"Test rows: {(test_rows / 1000000)} million")

Inference time: 0.00s
File size: 3.205 KB
Test rows: 10.0 million


## Export model as ONNX

In [29]:
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType

initial_type = [('float_input', FloatTensorType([None, n_features]))]
onx = convert_sklearn(clf, initial_types=initial_type)
with open("models/log_reg_model.onnx", "wb") as f:
    f.write(onx.SerializeToString())

In [30]:
import onnxruntime as rt

sess = rt.InferenceSession("models/log_reg_model.onnx")
input_name = sess.get_inputs()[0].name
label_name = sess.get_outputs()[0].name

X_test_onnx = X_test.astype(np.float32)

pred_onx = sess.run([label_name], {input_name: X_test_onnx})[0]

In [31]:
test_rows = 10000000

X_test = np.random.rand(test_rows, 4)

inference_time = execution_time(lambda: sess.run([label_name], {input_name: X_test_onnx}))
weights_size = Path("models/log_reg_model.onnx").stat().st_size

print(f"Inference time: {inference_time:.2f}s")
print(f"File size: {weights_size / 1000} KB")
print(f"Test rows: {(test_rows / 1000000)} million")

Inference time: 0.00s
File size: 2.139 KB
Test rows: 10.0 million
