In [2]:
import pickle
import pandas as pd

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline

In [3]:
#Save model to pickle
df = pd.read_csv('../../Datasets/reviews.csv', encoding = "ISO-8859-1")
df = df.drop_duplicates()

x = df['Text']
y = df['Sentiment']

vectorizer = CountVectorizer(ngram_range=(1,2), stop_words='english', min_df=20)

model = LogisticRegression(max_iter=1000, random_state=0)
pipe = make_pipeline(vectorizer, model)
pipe.fit(x,y)

pickle.dump(pipe, open ('sentiment.pkl', 'wb'))

In [4]:
#Open pickle
pipe = pickle.load(open('sentiment.pkl', 'rb'))
score = pipe.predict_proba(['Great food and excellent service!'])[0][1]
print(score)

0.8889867397468919


In [6]:
#Save a model as .onnx file
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import StringTensorType

initial_type = [('string_input', StringTensorType([None, 1]))]
onnx = convert_sklearn(pipe, initial_types=initial_type)

with open ('sentiment.onnx', 'wb') as f:
    f.write(onnx.SerializeToString())

In [10]:
#Consume the onnx model from Python
import numpy as np
import onnxruntime as rt

session = rt.InferenceSession('sentiment.onnx')
input_name = session.get_inputs()[0].name
label_name = session.get_outputs()[1].name #0 - predict, 1 - predict_proba

input = np.array('Sometimes maybe good sometimes maybe shit').reshape(1,-1)
score = session.run([label_name], { input_name: input })[0][0][1]
print(score)

0.32899209856987
