# Operationalizing Machine Learning Models

## Consuming a Python Model from a Python Client

In [1]:
#To demonstrate I'm building and training the titanic model from earlier but instead of making predictions I'll save the model to a pickle file

In [2]:
import pickle
import pandas as pd
from sklearn.linear_model import LogisticRegression

df = pd.read_csv("https://raw.githubusercontent.com/jeffprosise/Applied-Machine-Learning/main/Chapter%203/Data/titanic.csv")
df = df[["Survived","Age","Sex","Pclass"]]
df = pd.get_dummies(df,columns=["Sex","Pclass"])
df.dropna(inplace=True)

x = df.drop("Survived",axis=1)
y = df["Survived"]

model = LogisticRegression(random_state=0)
model.fit(x,y)

pickle.dump(model,open("titanic.pkl","wb"))

In [3]:
#To invoke the model a python client uses pickle load to load the model and make a prediction

model = pickle.load(open("titanic.pkl","rb"))

female = pd.DataFrame({ 'Age': [30], 'Sex_female': [1], 'Sex_male': [0],
                        'Pclass_1': [1], 'Pclass_2': [0], 'Pclass_3': [0] })

probability = model.predict_proba(female)[0][1]
print(f'Probability of survival: {probability:.1%}')

Probability of survival: 92.8%


In [4]:
#The following code trains and saves a sentiment-analysis model. This time, a pipeline containing a 
#CountVectorizer and a LogisticRegression classifier is saved:


import pickle
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
 
df = pd.read_csv("https://raw.githubusercontent.com/jeffprosise/Applied-Machine-Learning/main/Chapter%204/Data/reviews.csv", encoding="ISO-8859-1")
df = df.drop_duplicates()
 
x = df['Text']
y = df['Sentiment']
 
vectorizer = CountVectorizer(ngram_range=(1, 2), stop_words='english',
                             min_df=20)

model = LogisticRegression(max_iter=1000, random_state=0)
pipe = make_pipeline(vectorizer, model)
pipe.fit(x, y)
 
pickle.dump(pipe, open('sentiment.pkl', 'wb'))

In [5]:
#deserialize the pipeline and call predict_proba to score a line of text for sentiment with a few simple lines of code:

import pickle

pipe = pickle.load(open('sentiment.pkl', 'rb'))
pipe.predict_proba(['Great food and excellent service!'])[0][1]

0.8826850598493062

## Using ONNX to Bridge the Language Gap

In [6]:
!pip install Skl2onnx

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting Skl2onnx
  Downloading skl2onnx-1.14.0-py2.py3-none-any.whl (294 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m294.0/294.0 KB[0m [31m19.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting onnx>=1.2.1
  Downloading onnx-1.13.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.5/13.5 MB[0m [31m85.4 MB/s[0m eta [36m0:00:00[0m
Collecting onnxconverter-common>=1.7.0
  Downloading onnxconverter_common-1.13.0-py2.py3-none-any.whl (83 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m83.8/83.8 KB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: onnx, onnxconverter-common, Skl2onnx
Successfully installed Skl2onnx-1.14.0 onnx-1.13.1 onnxconverter-common-1.13.0


In [7]:
#using the built in method convert_sklearn to save a trained Scikit model to .onnx file that can be used in other programming languages

from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import StringTensorType

initial_type = [("string_input",StringTensorType([None,1]))]

onnx = convert_sklearn(pipe,initial_types=initial_type)

with open("sentiment.onnx","wb") as f:
  f.write(onnx.SerializeToString())