In [205]:
# Testing ONNX input types 
import sys
import os

import pandas as pd
import numpy as np
import pytest
from pathlib import Path
from sklearn.datasets import load_iris
import onnxruntime as rt
from mlisne.dataset import IVEstimatorDataset
from mlisne.helpers import estimate_qps

In [206]:
model_out_path = "../examples/models"

## Generating Sklearn Logistic Regression Models

In [207]:
iris = load_iris()
X, y = iris.data, iris.target

In [208]:
X = np.array(X, dtype=np.float32)
y[y > 0] = 1 # change y into a "binary" recommendation 

In [209]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression 

X_train, X_test, y_train, y_test = train_test_split(X, y)

In [210]:
clr = LogisticRegression()
clr.fit(X_train, y_train)

LogisticRegression()

In [214]:
clr.predict_proba(X_test)

array([[9.79952542e-01, 2.00474575e-02],
       [9.85864068e-01, 1.41359323e-02],
       [5.53610219e-05, 9.99944639e-01],
       [4.60621774e-03, 9.95393782e-01],
       [3.50490473e-03, 9.96495095e-01],
       [9.76274021e-01, 2.37259794e-02],
       [9.86244166e-01, 1.37558341e-02],
       [2.99901312e-02, 9.70009869e-01],
       [2.53523796e-02, 9.74647620e-01],
       [9.51934181e-04, 9.99048066e-01],
       [7.29251099e-03, 9.92707489e-01],
       [9.87931575e-01, 1.20684252e-02],
       [9.77088390e-01, 2.29116105e-02],
       [2.31593267e-02, 9.76840673e-01],
       [8.87567556e-03, 9.91124324e-01],
       [5.88629494e-04, 9.99411371e-01],
       [6.33131253e-02, 9.36686875e-01],
       [3.34391956e-04, 9.99665608e-01],
       [9.86028635e-01, 1.39713649e-02],
       [4.23865644e-03, 9.95761344e-01],
       [6.10376104e-03, 9.93896239e-01],
       [9.34769252e-02, 9.06523075e-01],
       [2.73249580e-01, 7.26750420e-01],
       [9.85331736e-01, 1.46682637e-02],
       [9.663642

In [211]:
import pickle 

with open(f"{model_out_path}/iris_logreg.pickle", "wb") as f:
    pickle.dump(clr, f)

In [47]:
from skl2onnx import convert_sklearn, to_onnx
from skl2onnx.common.data_types import FloatTensorType, DoubleTensorType, Int64TensorType

initial_type = [('float_input', FloatTensorType([None, 4]))]
onx = convert_sklearn(clr, initial_types=initial_type)
with open(f"{model_out_path}/logreg_iris.onnx", "wb") as f:
    f.write(onx.SerializeToString())
    
initial_type = [('double_input', DoubleTensorType([None, 4]))]
onx = convert_sklearn(clr, initial_types=initial_type)
with open(f"{model_out_path}/logreg_iris_double.onnx", "wb") as f:
    f.write(onx.SerializeToString())
    
onx = to_onnx(clr, X = X_train)
with open(f"{model_out_path}/logreg_iris_infertype.onnx", "wb") as f:
    f.write(onx.SerializeToString())

In [175]:
sess = rt.InferenceSession(f"{model_out_path}/logreg_iris.onnx")

In [176]:
input_name = sess.get_inputs()[0].name
input_name

'float_input'

In [177]:
label_name = sess.get_outputs()[1].name
label_name

'output_probability'

In [183]:
pred_onx = sess.run([label_name], {input_name: X})[0]

In [22]:
np.apply_along_axis(lambda x: (min(x), max(x)), axis=0, arr=X)

array([[4.3, 2. , 1. , 0.1],
       [7.9, 4.4, 6.9, 2.5]], dtype=float32)

## Generating Pytorch binary classification models

Training code adapted from: https://stackabuse.com/introduction-to-pytorch-for-classification/

In [49]:
import numpy as np
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import torch
import torch.nn as nn

In [53]:
data_path = "../examples/data"
churn = pd.read_csv(f"{data_path}/Kaggle_Churn_Modelling.csv")
churn.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


### We will first simulate treatment effect data

In [84]:
categorical_cols = ['Geography', 'Gender', 'HasCrCard', 'IsActiveMember']
numerical_cols = ['CreditScore', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'EstimatedSalary']

In [60]:
# Convert categorical to discrete types 
for category in categorical_cols:
    churn[category] = churn[category].astype('category')

In [78]:
# Random sampling of 8 columns to use in treatment effect estimation 
treatment_cols = np.random.choice(churn.drop(["Surname", 'RowNumber'], axis=1).columns, 8, replace=False)
treatment_cols

array(['IsActiveMember', 'Gender', 'CreditScore', 'HasCrCard',
       'Geography', 'NumOfProducts', 'Tenure', 'CustomerId'], dtype=object)

In [79]:
# Random sampling of coefficient distributions
means = np.random.uniform(-8, 8, 8)
sd = np.random.uniform(0,5, 8)

In [80]:
# Generate Y0 
churn['Y0'] = 0
for i in range(len(treatment_cols)):
    coefs = np.random.normal(means[i], sd[i], len(churn))
    if churn[treatment_cols[i]].dtype.name == 'category':
        churn['Y0'] += coefs * churn[treatment_cols[i]].cat.codes.values
    else:
        churn['Y0'] += coefs * churn[treatment_cols[i]].to_numpy()

err = np.random.normal(0,3) 
churn['Y0'] += err 

CreditScore
NumOfProducts
Tenure
CustomerId


In [82]:
# Generate Y1 
treatment_effect = np.random.normal(5, 1, len(churn))
err_iv = np.random.normal(size=len(churn))
churn['Y1'] = churn['Y0'] + treatment_effect + err_iv

## Train Pytorch Model

In [240]:
# Get data as tensors
cat = []
for c in categorical_cols:
    cat.append(churn[c].cat.codes.values)
cat_data = np.stack(cat, 1)

cat_tensor = torch.tensor(cat_data).double()
num_tensor = torch.tensor(np.array(churn[numerical_cols]))
tot_tensor = torch.cat((cat_tensor, num_tensor), 1)
output = torch.tensor(churn['Exited'])

In [241]:
# Create categorical embeddings 
categorical_column_sizes = [len(churn[column].cat.categories) for column in categorical_cols]
categorical_embedding_sizes = [(col_size, min(50, (col_size+1)//2)) for col_size in categorical_column_sizes]

We will create two different models: one with categorical embeddings, and one without 

In [116]:
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler, random_split

train_length = round(0.8 * len(cat_tensor))
test_length = len(cat_tensor) - train_length
cat_dataset = TensorDataset(cat_tensor, num_tensor, output) 
cat_dataset_train, cat_dataset_test = random_split(cat_dataset, [train_length, test_length])

dataset = TensorDataset(tot_tensor, output) 
dataset_train, dataset_test = random_split(dataset, [train_length, test_length])

In [117]:
train_sampler = RandomSampler(cat_dataset_train)
train_dataloader = DataLoader(cat_dataset_train, sampler=train_sampler, batch_size=train_length)

In [132]:
# Model with categorical embeddings
class CatModel(nn.Module):

    def __init__(self, embedding_size, num_numerical_cols, output_size, layers, p=0.4):
        super().__init__()
        self.all_embeddings = nn.ModuleList([nn.Embedding(ni, nf) for ni, nf in embedding_size])
        self.embedding_dropout = nn.Dropout(p)
        self.batch_norm_num = nn.BatchNorm1d(num_numerical_cols)

        all_layers = []
        num_categorical_cols = sum((nf for ni, nf in embedding_size))
        input_size = num_categorical_cols + num_numerical_cols

        for i in layers:
            all_layers.append(nn.Linear(input_size, i))
            all_layers.append(nn.ReLU(inplace=True))
            all_layers.append(nn.BatchNorm1d(i))
            all_layers.append(nn.Dropout(p))
            input_size = i

        all_layers.append(nn.Linear(layers[-1], output_size))

        self.layers = nn.Sequential(*all_layers)
        
        self.m = nn.Softmax(dim=1)

    def forward(self, x_categorical, x_numerical):
        embeddings = []
        for i,e in enumerate(self.all_embeddings):
            embeddings.append(e(x_categorical[:,i]))
        x = torch.cat(embeddings, 1)
        x = self.embedding_dropout(x)

        x_numerical = self.batch_norm_num(x_numerical)
        x = torch.cat([x, x_numerical], 1)
        x = self.layers(x)
        x = self.m(x)
        
        return x[:,1]

In [136]:
cat_model = CatModel(categorical_embedding_sizes, num_tensor.shape[1], 2, [200,100,50], p=0.4)

In [137]:
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(cat_model.parameters(), lr=0.001)

In [141]:
epochs = 300
aggregated_losses = []
cat_model.train()

for i in range(epochs):
    i += 1
    for step, batch in enumerate(train_dataloader):
        cat_train, num_train, output = batch
        y_pred = cat_model(cat_train.long(), num_train.float())
        single_loss = loss_function(y_pred.float(), output.float())
        aggregated_losses.append(single_loss)
        single_loss.backward()
        optimizer.step()
    optimizer.zero_grad()
    
    if i%25 == 1:
        print(f'epoch: {i:3} loss: {single_loss.item():10.8f}')

print(f'epoch: {i:3} loss: {single_loss.item():10.10f}')

epoch:   1 loss: 0.29775116
epoch:  26 loss: 0.18236019
epoch:  51 loss: 0.15809424
epoch:  76 loss: 0.14666839
epoch: 101 loss: 0.14009865
epoch: 126 loss: 0.13339068
epoch: 151 loss: 0.12312392
epoch: 176 loss: 0.11838789
epoch: 201 loss: 0.11551349
epoch: 226 loss: 0.11376292
epoch: 251 loss: 0.11447604
epoch: 276 loss: 0.11106870
epoch: 300 loss: 0.1125489250


In [225]:
# Model without categorical embeddings
class Model(nn.Module):

    def __init__(self, input_size, output_size, layers, p=0.4):
        super().__init__()

        all_layers=[]
        for i in layers:
            all_layers.append(nn.Linear(input_size, i))
            all_layers.append(nn.ReLU(inplace=True))
            all_layers.append(nn.BatchNorm1d(i))
            all_layers.append(nn.Dropout(p))
            input_size = i

        all_layers.append(nn.Linear(layers[-1], output_size))

        self.layers = nn.Sequential(*all_layers)
        
        self.m = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.layers(x)
        x = self.m(x)
        
        return x[:,1]

In [226]:
model = Model(tot_tensor.shape[1], 2, [200,100,50], p=0.4)
train_sampler = RandomSampler(dataset_train)
train_dataloader = DataLoader(dataset_train, sampler=train_sampler, batch_size=train_length)

In [238]:
tot_tensor.shape[1]

10

In [237]:
model.eval()
model(tot_tensor[0:1,].float())

tensor([[616.8236]], grad_fn=<AddmmBackward>)

In [224]:
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [151]:
epochs = 300
aggregated_losses = []
model.train()

for i in range(epochs):
    i += 1
    for step, batch in enumerate(train_dataloader):
        train, output = batch
        y_pred = model(train.float())
        single_loss = loss_function(y_pred.float(), output.float())
        aggregated_losses.append(single_loss)
        single_loss.backward()
        optimizer.step()
    optimizer.zero_grad()
    
    if i%25 == 1:
        print(f'epoch: {i:3} loss: {single_loss.item():10.8f}')

print(f'epoch: {i:3} loss: {single_loss.item():10.10f}')

epoch:   1 loss: 0.27642402
epoch:  26 loss: 0.23063646
epoch:  51 loss: 0.19596481
epoch:  76 loss: 0.17618151
epoch: 101 loss: 0.16872561
epoch: 126 loss: 0.16440813
epoch: 151 loss: 0.16339450
epoch: 176 loss: 0.16258520
epoch: 201 loss: 0.16184682
epoch: 226 loss: 0.16221909
epoch: 251 loss: 0.16204761
epoch: 276 loss: 0.16202207
epoch: 300 loss: 0.1619667709


In [220]:
# Save trained models 
torch.save(cat_model.state_dict(), f"{model_out_path}/churn_categorical.pt")
torch.save(model.state_dict(), f"{model_out_path}/churn.pt")

In [162]:
# Save data and keep track of treatment columns, true treatment effects, etc. 
cat_model.eval()
with torch.no_grad():
    cat_out = cat_model(cat_tensor.long(), num_tensor.float())

rec_draws = np.random.uniform(size=len(churn))
churn['Z_cat'] = (rec_draws <= cat_out.numpy()).astype(int)

In [163]:
model.eval()
with torch.no_grad():
    out = model(tot_tensor.float())

rec_draws = np.random.uniform(size=len(churn))
churn['Z'] = (rec_draws <= out.numpy()).astype(int)

In [164]:
# Follow the ML recommendation Z 75% of the time 
treat_probs = np.random.uniform(size=len(churn))
D = []
for i in range(len(treat_probs)):
    if treat_probs[i] >= 0.75:
        if churn["Z"][i] == 1:
            D.append(0)
        else:
            D.append(1)
    else:
        D.append(churn["Z"][i])

churn['D'] = D 
treat_probs = np.random.uniform(size=len(churn))
D = []
for i in range(len(treat_probs)):
    if treat_probs[i] >= 0.75:
        if churn["Z_cat"][i] == 1:
            D.append(0)
        else:
            D.append(1)
    else:
        D.append(churn["Z_cat"][i])
churn['D_cat'] = D

In [165]:
churn.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Y0,Y1,Z_cat,Z,D,D_cat
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1,-20558960.0,-20558960.0,0,0,1,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0,-64057860.0,-64057860.0,0,0,0,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1,-28493080.0,-28493070.0,1,0,0,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0,-68022210.0,-68022210.0,0,0,0,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0,-58647290.0,-58647290.0,1,0,1,0


In [167]:
churn.to_csv(f"{data_path}/churn_data.csv", index=False)

In [189]:
test = np.array([[1,2,3],[1,2,3]])

In [219]:
np.array(churn.loc[0,])

array([1, 15634602, 'Hargrave', 619, 'France', 'Female', 42, 2, 0.0, 1, 1,
       1, 101348.88, 1, -20558960.356191415, -20558955.818390768, 0, 0, 1,
       1], dtype=object)

TypeError: unsupported operand type(s) for /: 'str' and 'str'