In [4]:
!pip install skl2onnx
!pip install onnxruntime

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import RidgeCV
from sklearn.linear_model import SGDRegressor
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import BaggingRegressor
from sklearn.ensemble import AdaBoostRegressor

from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType
import onnxruntime as rt

# Read dataset
df1 = pd.read_csv('sample_data/california_housing_train.csv')
df2 = pd.read_csv('sample_data/california_housing_test.csv')
housing = pd.concat([df1, df2])

# Choose target column
housing_data = housing.drop("median_house_value", axis=1)
housing_target = housing["median_house_value"].copy()

# Split train/test data
housing_data_train, \
housing_data_test, \
housing_target_train, \
housing_target_test = train_test_split(housing_data, housing_target, \
                                       test_size=0.2, random_state=42)

names = [
    "LinearRegression",
    "Ridge",
    "RidgeCV",
    "SGDRegressor",
    "GradientBoostingRegressor",
    "HistGradientBoostingRegressor",
    "RandomForestRegressor",
    "BaggingRegressor",
    "AdaBoostRegressor",
]
models = [
    LinearRegression(),
    Ridge(random_state=42),
    RidgeCV(),
    SGDRegressor(random_state=42),
    GradientBoostingRegressor(random_state=42),
    HistGradientBoostingRegressor(random_state=42),
    RandomForestRegressor(random_state=42),
    BaggingRegressor(random_state=42),
    AdaBoostRegressor(random_state=42),
]

def mae(y_true, predictions):
    y_true, predictions = np.array(y_true), np.array(predictions)
    return np.mean(np.abs(y_true - predictions))

for name, model in zip(names, models):
  # Learn model
  model.fit(housing_data_train, housing_target_train)

  # Save model
  initial_type = [('float_input', FloatTensorType([None, housing_data_test.shape[1]]))]
  onx = convert_sklearn(model, initial_types=initial_type)
  with open(name + ".onnx", "wb") as f:
      f.write(onx.SerializeToString())

  # Run with onnxruntime
  sess = rt.InferenceSession(name + ".onnx")
  input_name = sess.get_inputs()[0].name
  label_name = sess.get_outputs()[0].name
  pred_onx = sess.run([label_name], {input_name: housing_data_test.astype(np.float32).to_numpy()})[0]

  print("OUT " + label_name)

  # Test
  housing_target_predict = model.predict(housing_data_test)
  mse = mean_squared_error(housing_target_test, housing_target_predict)
  maer = mae(housing_target_test, housing_target_predict)
  r2 = r2_score(housing_target_test, housing_target_predict)
  print(name)
  print("Mean squared error: %.2f" % mse)
  print("Coefficient of determination: %.2f" % r2)
  print("Mean Absolute Error: %.2f" % maer)
  print("Expected: " + str(housing_target_test[0]) + " but actual " + str(pred_onx[0]))
  print()




OUT variable
LinearRegression
Mean squared error: 4869758532.52
Coefficient of determination: 0.65
Mean Absolute Error: 50564.36
Expected: 66900.0 but actual [3922.75]

OUT variable
Ridge
Mean squared error: 4869754270.19
Coefficient of determination: 0.65
Mean Absolute Error: 50564.18
Expected: 66900.0 but actual [3948.25]

OUT variable
RidgeCV
Mean squared error: 4869720398.54
Coefficient of determination: 0.65
Mean Absolute Error: 50562.59
Expected: 66900.0 but actual [4181.]

OUT variable
SGDRegressor
Mean squared error: 5933527002779660004990193238016.00
Coefficient of determination: -432139884479360270336.00
Mean Absolute Error: 1507632132301393.75
Expected: 66900.0 but actual [7.816472e+13]

OUT variable
GradientBoostingRegressor
Mean squared error: 3006495692.28
Coefficient of determination: 0.78
Mean Absolute Error: 37858.84
Expected: 66900.0 but actual [94773.99]

OUT variable
HistGradientBoostingRegressor
Mean squared error: 2309847012.90
Coefficient of determination: 0.83
M



OUT variable
BaggingRegressor
Mean squared error: 2620300684.14
Coefficient of determination: 0.81
Mean Absolute Error: 33328.86
Expected: 66900.0 but actual [107310.]

OUT variable
AdaBoostRegressor
Mean squared error: 8976896093.04
Coefficient of determination: 0.35
Mean Absolute Error: 82572.44
Expected: 66900.0 but actual [117985.125]

