# Consuming a Python Model from a Python Client

In [1]:
import pickle

import pandas as pd
from sklearn.linear_model import LogisticRegression

df = pd.read_csv("titanic.csv")
df = df[["Survived", "Age", "Sex", "Pclass"]]
df = pd.get_dummies(df, columns=["Sex", "Pclass"])
df.dropna(inplace=True)

X = df.drop("Survived", axis=1)
y = df["Survived"]

In [2]:
model = LogisticRegression(random_state=0)
model.fit(X, y)

In [3]:
pickle.dump(model, open("titanic.pkl", "wb"))

In [4]:
import pickle

import pandas as pd

model = pickle.load(open("titanic.pkl", "rb"))
female = pd.DataFrame(
    {
        "Age": [30],
        "Sex_female": [1],
        "Sex_male": [0],
        "Pclass_1": [1],
        "Pclass_2": [0],
        "Pclass_3": [0],
    }
)

probability = model.predict_proba(female)[0][1]
print(f"Probability of survival: {probability:.1%}")

Probability of survival: 92.8%


## Training and saving a sentiment analysis pipeline

In [6]:
import pickle

import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline

df = pd.read_csv("reviews.csv", encoding="ISO-8859-1")
df = df.drop_duplicates()

X = df["Text"]
y = df["Sentiment"]

vectorizer = CountVectorizer(ngram_range=(1, 2), stop_words="english", min_df=20)
model = LogisticRegression(max_iter=1000, random_state=0)

pipe = make_pipeline(vectorizer, model)
pipe.fit(X, y)

In [7]:
pickle.dump(pipe, open("sentiment.pkl", "wb"))

## 

In [9]:
import pickle

pipe = pickle.load(open("sentiment.pkl", "rb"))
score = pipe.predict_proba(["Great food and excellent service!"])[0][1]
score

np.float64(0.8889811604545275)

## Using ONNX to bridge the language gap

ONNX (Open Neural Network Exchange)

In [11]:
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import StringTensorType

initial_type = [("string_input", StringTensorType([None, 1]))]
onnx = convert_sklearn(pipe, initial_types=initial_type)


with open("sentiment.onnx", "wb") as f:
    f.write(onnx.SerializeToString())

In [22]:
import numpy as np
import onnxruntime as rt

session = rt.InferenceSession("sentiment.onnx")
input_name = session.get_inputs()[0].name  # string_input
label_name = session.get_outputs()[1].name  # output_probability

# NOTE: use index 0 for "predict", 1 for "predict_proba"

input = np.array("Greet food and excellent service!").reshape(1, -1)
score = session.run([label_name], {input_name: input})[0][0][1]
score

0.8176727294921875