# Logistic Regression

In [39]:
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression

## Load Data

In [40]:
iris = load_iris()
X, y = iris.data, iris.target

In [82]:
n_features = 100
n_observations = 100

X = np.random.randn(n_observations, n_features)
y = np.random.randint(0, 3, n_observations)

print("X shape", X.shape)
print("X example", X[0])
print("y shape", y.shape)
print("y example", y[0])

X shape (100, 100)
X example [-0.95525507 -2.5393372   1.04320624  2.25727852 -1.32246001 -0.65136624
 -1.22814486  0.28531415  0.42977981 -1.46231784 -0.97223222 -0.50967561
  0.28734442 -0.19864146 -0.45020693 -1.35166643 -1.74771416  0.2511429
  0.90821607  0.09671154  0.05002272  1.70745186  0.09940079 -0.20171883
  0.81018488  1.12933264  1.21835718  0.86590791  0.43400353 -0.34388696
  0.55569143 -1.10387368 -0.80699393  0.41525958  0.4962095  -1.63839207
  0.65135464 -0.16946154 -0.21845057 -1.17545363 -0.36455252 -0.1891814
  0.17032391  0.72153018 -0.22610354  1.26059283  0.32955441  1.25376854
  0.90310513  1.05572704 -0.35800126 -0.29762561  2.05085754 -0.3766795
 -0.28281631 -0.89676154  0.27406499 -0.82695734 -1.69346651  2.11165371
 -0.01710709  0.95013389  2.01530051  1.00597632  0.97939386  0.00726233
 -0.27416538  1.36266566 -0.05868107  0.18660559  1.07128523  1.57668939
  0.71624157  0.65465647  0.80831849 -1.26807624  0.59680123 -1.29008354
  1.16917763  0.41091497 

## Train Model

In [83]:
clf = LogisticRegression()
clf.fit(X, y)

LogisticRegression()

## Save Weights

In [84]:
from joblib import dump

dump(clf, "models/log_reg_model.joblib")

['models/log_reg_model.joblib']

In [86]:
# Check file size
from utils import execution_time

test_rows = 10000000

X_test = np.random.rand(test_rows, n_features)

inference_time = execution_time(clf.predict(X_test))
weights_size = Path("models/log_reg_model.joblib").stat().st_size

print(f"Inference time: {inference_time:.2f}s")
print(f"File size: {weights_size / 1000} KB")
print(f"Test rows: {(test_rows / 1000000)} million")

Inference time: 2.14s
File size: 3.205 KB
Test rows: 10.0 million


## Export model as ONNX

In [89]:
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType

initial_type = [('float_input', FloatTensorType([None, n_features]))]
onx = convert_sklearn(clf, initial_types=initial_type)
with open("models/log_reg_model.onnx", "wb") as f:
    f.write(onx.SerializeToString())

In [90]:
import onnxruntime as rt

sess = rt.InferenceSession("models/log_reg_model.onnx")
input_name = sess.get_inputs()[0].name
label_name = sess.get_outputs()[0].name

X_test_onnx = X_test.astype(np.float32)

pred_onx = sess.run([label_name], {input_name: X_test_onnx})[0]

In [91]:
test_rows = 10000000

X_test = np.random.rand(test_rows, 4)

inference_time = execution_time(lambda: sess.run([label_name], {input_name: X_test_onnx}))
weights_size = Path("models/log_reg_model.onnx").stat().st_size

print(f"Inference time: {inference_time:.2f}s")
print(f"File size: {weights_size / 1000} KB")
print(f"Test rows: {(test_rows / 1000000)} million")

Inference time: 2.27s
File size: 2.137 KB
Test rows: 10.0 million
