# Logistic Regression with Preprocessing

This example demonstrates how to call one deployed endpoint from another.

In this scenario, two projects could be iterated on and deployed independently—one for pre-processing and one for classification—  
and composed modularly across their endpoints.

In [1]:
try:
    import verta
except ImportError:
    !pip install verta

In [2]:
HOST = "app.verta.ai"

In [3]:
# import os
# os.environ['VERTA_EMAIL'] = 
# os.environ['VERTA_DEV_KEY'] = 

## Imports

In [4]:
from __future__ import print_function

import itertools
import os
import time

import pandas as pd

import sklearn
from sklearn import preprocessing
from sklearn import linear_model

In [5]:
try:
    import wget
except ImportError:
    !pip install wget  # you may need pip3
    import wget

---

## Prepare data

In [6]:
train_data_url = "http://s3.amazonaws.com/verta-starter/census-train.csv"
train_data_filename = wget.detect_filename(train_data_url)
if not os.path.isfile(train_data_filename):
    wget.download(train_data_url)

test_data_url = "http://s3.amazonaws.com/verta-starter/census-test.csv"
test_data_filename = wget.detect_filename(test_data_url)
if not os.path.isfile(test_data_filename):
    wget.download(test_data_url)

In [7]:
df_train = pd.read_csv(train_data_filename)
X_train = df_train.iloc[:,:-1]
y_train = df_train.iloc[:, -1]

df_test = pd.read_csv(test_data_filename)
X_test = df_test.iloc[:,:-1]
y_test = df_test.iloc[:, -1]

df_train.head()

## Instantiate client

In [8]:
from verta import Client

client = Client(HOST)

## Log preprocessor

First, we will log and deploy a data preprocessor. This will have its own endpoint that can be called (and updated) in isolation.

In [9]:
import pickle

class Preprocessor:
    def __init__(self, artifacts):
        with open(artifacts['preprocessor'], 'rb') as f:
            self.preprocessor = pickle.load(f)
    
    def predict(self, x):
        return self.preprocessor.transform(x)
    
    def example(self):
        return [
            [44, 0, 0, 40, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        ]

In [10]:
client.get_or_create_project("Preprocessor")
client.get_or_create_experiment("Normalization")
run = client.get_or_create_experiment_run()

sklearn_preprocessor = preprocessing.Normalizer()
run.log_artifact('preprocessor', sklearn_preprocessor)
run.log_model(Preprocessor, artifacts=['preprocessor'])
run.log_requirements(['sklearn'])

endpoint = client.get_or_create_endpoint("ml-preprocessor")
endpoint.update(run, wait=True)
endpoint

## Log classifier

With the preprocessor running, we can call its endpoint from within our model. Inputs sent to this model endpoint will therefore also be passed to the preprocessor endpoint during the course of a prediction.

In [11]:
import pickle
assert client  # the model will reuse the client from this notebook

class Classifier:    
    def __init__(self, artifacts):
        with open(artifacts['classifier'], 'rb') as f:
            self.classifier = pickle.load(f)
        
        endpoint = client.get_endpoint("ml-preprocessor")
        self.preprocessor = endpoint.get_deployed_model()
    
    def predict(self, x):
        transformed_x = self.preprocessor.predict(x)
        return self.classifier.predict(transformed_x)
    
    def example(self):
        return [
            [0.7396263843801948, 0.0, 0.0, 0.6723876221638134, 0.0, 0.016809690554095334, 0.0, 0.0, 0.0, 0.0, 0.0,
             0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.016809690554095334, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
             0.0, 0.0, 0.0, 0.016809690554095334, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
        ]

In [12]:
client.get_or_create_project("Classifier")
client.get_or_create_experiment("Logistic Regression")
run = client.get_or_create_experiment_run()

sklearn_classifier = linear_model.LogisticRegression(max_iter=10**5)
sklearn_classifier.fit(sklearn_preprocessor.transform(X_train), y_train)
run.log_artifact('classifier', sklearn_classifier)
run.log_model(Classifier, artifacts=['classifier'])
run.log_requirements(['sklearn', 'urllib3'])

endpoint = client.get_or_create_endpoint("ml-classifier")
endpoint.update(run, wait=True)
endpoint

## Run predictions

In [13]:
deployed_model = endpoint.get_deployed_model()

for row in itertools.cycle(X_test.values):
    print(deployed_model.predict([row]))
    time.sleep(.5)

---