In [1]:
from optbinning import OptimalBinning

from os import listdir
from os.path import isfile, join
from datetime import datetime, timedelta
import time
import sys

import pandas as pd
import numpy as np
import random

from scipy import stats
from scipy.stats import chi2_contingency
from scipy.stats import chi2

from optbinning import OptimalBinning
from catboost import CatBoostClassifier, Pool, cv

import scikitplot as skplt
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score

from bayes_opt import BayesianOptimization
import catboost
import pickle
import csv
import scipy as sc

## Create pyhon file

In [2]:
%%writefile model.py
import numpy as np
import csv
import os
import pandas as pd
import pickle
default_model_path = '/opt/ml'

model_cache = {}

def load_model(algorithm, model_path):
    if model_cache.get(algorithm) is None:
        model_filename = os.path.join(model_path, 'model.pkl')
        with open(model_filename, newline='') as file:
            model_cache[algorithm] = pickle.load(open(model_filename, 'rb'))
    
    return model_cache[algorithm]


def __read_csv_list(filename):
    with open(filename, newline='') as file:
        reader = csv.reader(file)
        return list(reader)[0]
        
    return []

predictors_cache = {}

def load_predictors(algorithm, model_path):
    if predictors_cache.get(algorithm) is None:
        predictors_filename = os.path.join(model_path, 'predictors.csv')
        predictors_cache[algorithm] = __read_csv_list(predictors_filename)
            
    return predictors_cache[algorithm]

to_cat_cache = {}

def load_to_cat(algorithm, model_path):
    if to_cat_cache.get(algorithm) is None:
        to_cat_filename = os.path.join(model_path, 'to_cat.csv')
        to_cat_cache[algorithm] = __read_csv_list(to_cat_filename)
            
    return to_cat_cache[algorithm]


def predict(data, model_path = default_model_path):
    algorithm = "algorithm_catboost"
    
    model = load_model(algorithm, model_path)
    predictors = load_predictors(algorithm, model_path)
    to_cat = load_to_cat(algorithm, model_path)
    
    if data.shape[0] == 0:
        return pd.DataFrame()

    for x in to_cat:
        data[x] = data[x].astype(str)

    y_pred_probs = model.predict_proba( data[predictors].values )
    probabilities = [item[1] for item in y_pred_probs]
    data['pd'] = probabilities
    
    
    return data

Writing model.py


__Test model__

In [9]:
import model
import os

model_path = os.path.dirname(os.getcwd())
#model_path = os.getcwd()
ret = None
with open(os.path.join(model_path,'local-model/X_test.csv'), newline='') as file:
    data = pd.read_csv(file, sep=',', low_memory=False, error_bad_lines=False)
    ret = model.predict(data, model_path)
ret

Unnamed: 0,x1,x3,y1,x4,x2,x5,y,flag_train,pd
0,1.527905,-0.732013,3.507635,-0.010777,1.134899,D,1,2,0.624604
1,-0.100697,0.359118,2.718173,0.507330,0.526648,B,1,2,0.592694
2,0.303793,-0.450724,2.095225,0.165429,1.590835,B,1,2,0.592271
3,-0.764048,0.538282,1.699226,-0.337947,1.398086,B,1,2,0.587154
4,1.064482,0.223539,2.646861,-0.204668,-0.050285,D,1,2,0.623080
...,...,...,...,...,...,...,...,...,...
194,0.855495,-1.220430,5.829651,0.270106,-1.789519,D,1,2,0.617781
195,0.783989,1.232337,5.536935,-0.046588,0.883816,D,1,2,0.618870
196,-1.800726,-1.168124,-1.656599,-0.302814,-1.160325,B,1,2,0.433109
197,1.002345,2.190936,-4.664816,-0.068465,-1.399620,D,0,2,0.432097


# Create web server app
__Bottle__ is the webservice api

__bjoern__ is the WSGI server

In [7]:
%%writefile app.py
import pandas as pd
import pickle
import csv
import sys
import os
import io
import bjoern
import bottle
from bottle import run, request, post, get

# adds the model.py path to the list
model_path = os.path.dirname(os.getcwd())
if 'MODEL_PATH' in os.environ:
    model_path = os.environ['MODEL_PATH']

sys.path.insert(0,model_path)

import model

@get('/ping')
def ping():
    return "Ok"

@post('/invocations')
def invoke():
    # load image from POST and convert it to json
    try:
        req = request.body

        data = pd.read_csv(req, sep=',', low_memory=False, error_bad_lines=False)
        predictions = model.predict(data, model_path)

        return predictions.to_csv(sep=',', index=False)
    except Exception as e:
        print(e)
        return bottle.HTTPResponse(status=500)
    

if __name__ == '__main__':
    
    if len(sys.argv) == 2 and ( not sys.argv[1] in [ "serve", "train"] ):
        raise Exception("Invalid argument: you must inform 'train' for fake training mode or 'serve' predicting mode") 

    train = len(sys.argv) == 2 and (sys.argv[1] == "train")
    
    if train:
        print( "Fake training completed" )
       
    else:
        print("Server started")
        if 'PORT' in os.environ: 
            port = int(os.environ['PORT'])
        else:
            port = 8080
        
        print(f"Port: {port}")
        print(f"Model path: {model_path}")
        bjoern.run(bottle.app(), "0.0.0.0", port)
        
        

Writing app.py


## Create Dockerfile

In [20]:
#Docker doesn't allow to copy files from parent folders.
!cp ../local-model/model.pkl ./
!cp ../local-model/predictors.csv ./
!cp ../local-model/to_cat.csv ./

In [21]:
%%writefile Dockerfile.heroku
FROM python:3.9.1

RUN apt-get update -y && apt-get install -y libev-dev
RUN pip install bottle
RUN pip install bjoern
RUN pip install pandas==1.2.2
RUN pip install numpy==1.20.1
RUN pip install catboost==0.24.4

RUN mkdir -p /opt/program
RUN mkdir -p /opt/ml

ENV PYTHONUNBUFFERED=TRUE
ENV PYTHONDONTWRITEBYTECODE=TRUE
ENV PATH="/opt/program:${PATH}"
ENV MODEL_PATH='/opt/ml'

COPY app.py /opt/program
COPY model.py /opt/ml
COPY model.pkl /opt/ml
COPY predictors.csv /opt/ml
COPY to_cat.csv /opt/ml

WORKDIR /opt/program

ENTRYPOINT ["python", "app.py"]

Overwriting Dockerfile.heroku


## Build docker image

In [22]:
!docker build -f Dockerfile.heroku -t containerdocker:v0.0.1 .

Sending build context to Docker daemon  2.224MB
Step 1/20 : FROM python:3.9.1
 ---> 2a93c239d591
Step 2/20 : RUN apt-get update -y && apt-get install -y libev-dev
 ---> Using cache
 ---> 6596e41441ae
Step 3/20 : RUN pip install bottle
 ---> Using cache
 ---> 9c477085055a
Step 4/20 : RUN pip install bjoern
 ---> Using cache
 ---> 8458496a66ce
Step 5/20 : RUN pip install pandas==1.2.2
 ---> Using cache
 ---> f114805d12d7
Step 6/20 : RUN pip install numpy==1.20.1
 ---> Using cache
 ---> 7e192144e093
Step 7/20 : RUN pip install catboost==0.24.4
 ---> Using cache
 ---> 9d83467dc052
Step 8/20 : RUN mkdir -p /opt/program
 ---> Using cache
 ---> 6efac8035fb6
Step 9/20 : RUN mkdir -p /opt/ml
 ---> Using cache
 ---> df2caa0db5de
Step 10/20 : ENV PYTHONUNBUFFERED=TRUE
 ---> Using cache
 ---> 2a4e8d9623a2
Step 11/20 : ENV PYTHONDONTWRITEBYTECODE=TRUE
 ---> Using cache
 ---> cd203f5fb80e
Step 12/20 : ENV PATH="/opt/program:${PATH}"
 ---> Using cache
 ---> 623ce389700e
Step 13/20 : ENV MODEL_PATH='/

In [28]:
!docker run -p 8081:8080 -d --rm --name test_img containerdocker:v0.0.1

042c641e416ec97af066d33af0d3aa42474c0ac6e450d833df4380a442bd364e


In [29]:
import urllib.request
contents = urllib.request.urlopen("http://localhost:8081/ping").read()
contents

b'Ok'

In [30]:
from io import StringIO
with open(os.path.join(model_path,'local-model/X_test.csv'), newline='') as file:
    #data = pd.read_csv(file, sep=',', low_memory=False, error_bad_lines=False)
    data = file.read().encode('utf-8')

    # create request
    req = urllib.request.Request("http://localhost:8081/invocations", data = data)
    

# execute request
resp = str(urllib.request.urlopen(req).read(),'utf-8')
resp = StringIO(resp) 
res = pd.read_csv(resp)
res

Unnamed: 0,x1,x3,y1,x4,x2,x5,y,flag_train,pd
0,1.527905,-0.732013,3.507635,-0.010777,1.134899,D,1,2,0.916793
1,-0.100697,0.359118,2.718173,0.507330,0.526648,B,1,2,0.853656
2,0.303793,-0.450724,2.095225,0.165429,1.590835,B,1,2,0.865216
3,-0.764048,0.538282,1.699226,-0.337947,1.398086,B,1,2,0.871932
4,1.064482,0.223539,2.646861,-0.204668,-0.050285,D,1,2,0.906335
...,...,...,...,...,...,...,...,...,...
194,0.855495,-1.220430,5.829651,0.270106,-1.789519,D,1,2,0.893840
195,0.783989,1.232337,5.536935,-0.046588,0.883816,D,1,2,0.939172
196,-1.800726,-1.168124,-1.656599,-0.302814,-1.160325,B,1,2,0.243705
197,1.002345,2.190936,-4.664816,-0.068465,-1.399620,D,0,2,0.207043


In [31]:
!docker stop test_img

test_img
