In [2]:
# Testing ONNX input types 
import sys
import os

import pandas as pd
import numpy as np
import pytest
from pathlib import Path
from sklearn.datasets import load_iris
import onnxruntime as rt
from mlisne.dataset import IVEstimatorDataset
from mlisne.helpers import estimate_qps

In [3]:
model_out_path = "../examples/models"

## Generating Sklearn Logistic Regression Models

In [4]:
iris = load_iris()
X, y = iris.data, iris.target

In [5]:
X = np.array(X, dtype=np.float32)
y[y > 0] = 1 # change y into a "binary" recommendation 

In [11]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression 

X_train, X_test, y_train, y_test = train_test_split(X, y)

In [12]:
clr = LogisticRegression()
clr.fit(X_train, y_train)

LogisticRegression()

In [13]:
from skl2onnx import convert_sklearn, to_onnx
from skl2onnx.common.data_types import FloatTensorType, DoubleTensorType, Int64TensorType

initial_type = [('float_input', FloatTensorType([None, 4]))]
onx = convert_sklearn(clr, initial_types=initial_type)
with open(f"{model_out_path}/logreg_iris.onnx", "wb") as f:
    f.write(onx.SerializeToString())
    
initial_type = [('double_input', DoubleTensorType([None, 4]))]
onx = convert_sklearn(clr, initial_types=initial_type)
with open(f"{model_out_path}/logreg_iris_double.onnx", "wb") as f:
    f.write(onx.SerializeToString())
    
onx = to_onnx(clr, X = X_train)
with open(f"{model_out_path}/logreg_iris_infertype.onnx", "wb") as f:
    f.write(onx.SerializeToString())

In [6]:
sess = rt.InferenceSession(f"{model_out_path}/logreg_iris.onnx")

In [7]:
input_name = sess.get_inputs()[0].name
input_name

'float_input'

In [23]:
label_name = sess.get_outputs()[1].name
label_name

'output_probability'

In [24]:
pred_onx = sess.run([label_name], {input_name: X})[0]
print(pred_onx)

[{0: 0.9755793213844299, 1: 0.02442067861557007}, {0: 0.966856837272644, 1: 0.03314316272735596}, {0: 0.979035496711731, 1: 0.02096453309059143}, {0: 0.9671860933303833, 1: 0.0328139066696167}, {0: 0.9784682989120483, 1: 0.02153170108795166}, {0: 0.954814076423645, 1: 0.04518589377403259}, {0: 0.9770119786262512, 1: 0.02298802137374878}, {0: 0.9689376950263977, 1: 0.031062304973602295}, {0: 0.97145015001297, 1: 0.02854984998703003}, {0: 0.9655818939208984, 1: 0.03441813588142395}, {0: 0.970598578453064, 1: 0.029401451349258423}, {0: 0.9651646614074707, 1: 0.03483530879020691}, {0: 0.9710370302200317, 1: 0.02896296977996826}, {0: 0.9877074956893921, 1: 0.01229250431060791}, {0: 0.9851080179214478, 1: 0.014891952276229858}, {0: 0.9768781661987305, 1: 0.02312186360359192}, {0: 0.9801970720291138, 1: 0.019802898168563843}, {0: 0.9732888340950012, 1: 0.02671116590499878}, {0: 0.9488072991371155, 1: 0.05119270086288452}, {0: 0.9741181135177612, 1: 0.02588188648223877}, {0: 0.9440691471099854

In [22]:
np.apply_along_axis(lambda x: (min(x), max(x)), axis=0, arr=X)

array([[4.3, 2. , 1. , 0.1],
       [7.9, 4.4, 6.9, 2.5]], dtype=float32)

## Generating Pytorch binary classification models

In [34]:
from typing import Tuple, Dict, Union, Sequence, Optional

test_arr = np.array([1,2,3])
isinstance(test_arr, Sequence)

False

In [36]:
test = np.array([[1,2,3],[1,2,3]])
len(test[0,])

3

In [27]:
pred_test = [d[1] for d in pred_onx]
pred_test

[0.02442067861557007,
 0.03314316272735596,
 0.02096453309059143,
 0.0328139066696167,
 0.02153170108795166,
 0.04518589377403259,
 0.02298802137374878,
 0.031062304973602295,
 0.02854984998703003,
 0.03441813588142395,
 0.029401451349258423,
 0.03483530879020691,
 0.02896296977996826,
 0.01229250431060791,
 0.014891952276229858,
 0.02312186360359192,
 0.019802898168563843,
 0.02671116590499878,
 0.05119270086288452,
 0.02588188648223877,
 0.05593085289001465,
 0.030642032623291016,
 0.007720082998275757,
 0.06851387023925781,
 0.0639677345752716,
 0.052129507064819336,
 0.04550310969352722,
 0.03144177794456482,
 0.02768629789352417,
 0.03896534442901611,
 0.04408806562423706,
 0.04446026682853699,
 0.017809540033340454,
 0.016771167516708374,
 0.03761017322540283,
 0.019547998905181885,
 0.02384895086288452,
 0.01878756284713745,
 0.021422266960144043,
 0.032512664794921875,
 0.020723789930343628,
 0.04267913103103638,
 0.01825082302093506,
 0.05015730857849121,
 0.06387647986412048,

In [32]:
# Test IV Estimator
import sys
import os
import pandas as pd
import numpy as np
import pytest
from pathlib import Path
from sklearn.datasets import load_iris
import onnxruntime as rt
from pathlib import Path
from linearmodels.iv import IV2SLS
import statsmodels.api as sm

from mlisne.dataset import IVEstimatorDataset
from mlisne.helpers import estimate_qps
from mlisne.estimator import TreatmentIVEstimator


In [34]:
data_path = "D:\Tobin\mlisne\examples\data"
model_path = "D:\Tobin\mlisne\examples\models"

In [35]:
iris_data = iris = pd.read_csv(f"{data_path}/iris_data.csv")
empty_estimator = TreatmentIVEstimator()

qps = np.array(iris_data['QPS'])
data = np.array(iris_data.drop("QPS", axis=1))
dataset = IVEstimatorDataset(data)
empty_estimator.fit(dataset, qps)



We will fit on 10000 values out of 10000 from the dataset for which the QPS estimation is nondegenerate.


In [36]:
print(str(empty_estimator))

+-----------+-----------+----------+---------+---------+----------+----------+
|           | Parameter | Std. Err |  T-Stat | P-Value | Lower CI | Upper CI |
+-----------+-----------+----------+---------+---------+----------+----------+
|   const   |   1.0098  |  0.0588  | 17.1649 |   0.0   |  0.8945  |  1.1251  |
| Treatment |   4.5396  |  0.185   | 24.5359 |   0.0   |  4.1769  |  4.9023  |
|    QPS    |   1.732   |  0.113   | 15.3246 |   0.0   |  1.5104  |  1.9535  |
+-----------+-----------+----------+---------+---------+----------+----------+
