# IBM AI Enterprise Workflow Capstone

## Part 3: Model Production

## Table of Contents
* [Flask API](#first-bullet)
* [Unit Tests](#second-bullet)
* [Docker Container](#third-bullet)
* [Post-Production Analysis](#fourth-bullet)

## Flask API<a class="anchor" id="first-bullet"></a>

We build a Flask API with endpoints for train, predict, and logfile.

In [1]:
%%writefile app.py

from flask import Flask, jsonify, request, send_from_directory
import os
import argparse
import joblib
import socket
import json
import numpy as np
import pandas as pd

from model import *
from logger import *

app = Flask(__name__)


@app.route('/')
def home():
    return 'Home Page'
    
    
@app.route('/train', methods = ['GET', 'POST'])
def train():
    
    if not request.json:
        return jsonify(False)
    
    data_dir = os.path.join('.', 'data', 'cs-train')
    
    model = model_train(data_dir)
    
    return jsonify(True)

    
@app.route('/predict', methods = ['GET','POST'])
def predict():
    
    if not request.json:
        return jsonify(False)
    
    _result = model_predict(country = request.json['country'], year = request.json['year'], 
                            month = request.json['month'], day = request.json['day'])
    
    result = {}
    
    for key, item in _result.items():
        
        if isinstance(item, np.ndarray):
            result[key] = item.tolist()
        else:
            result[key] = item
        
    return(jsonify(result))
    

@app.route('/logs/<filename>', methods = ['GET'])
def logs(filename):

    if not re.search('.log', filename):
        return jsonify(False)

    log_dir = os.path.join('.', 'logs')
    
    if not os.path.isdir(log_dir):
        return jsonify(False)

    file_path = os.path.join(log_dir, filename)
    
    if not os.path.exists(file_path):
        return jsonify(False)
    
    return send_from_directory(log_dir, filename, as_attachment = True)


if __name__ == '__main__':

    ap = argparse.ArgumentParser()
    ap.add_argument('-d', '--debug', action = 'store_true', help = 'debug flask')
    args = vars(ap.parse_args())

    if args['debug']:
        app.run(debug = True, port = 8080)
    else:
        app.run(host = '0.0.0.0', threaded = True , port = 8080)

Overwriting app.py


In [12]:
import requests

# testing predict endpoint

query = {'country': 'all', 'year': '2018', 'month': '1', 'day': '5'}
port = 8080
r = requests.post('http://localhost:{}/predict'.format(port), json = query)
print(r.text)

{
  "y_pred": [
    16504.730266666662
  ], 
  "y_proba": null
}



In [None]:
# testing train endpoint

query = {'data_dir': './data/cs-train'}
port = 8080
r = requests.post('http://localhost:{}/train'.format(port), json = query)

## Unit Tests<a class="anchor" id="second-bullet"></a>

Unit tests are organized in a suite for enabling automation. The tests were created for the following functionalities:

- Model tests: train, load, predict
- API tests: train, predict, logfile

In [16]:
%%writefile ./unit_tests/__init__.py

import unittest
import getopt
import sys
import os


try:
    optlist, args = getopt.getopt(sys.argv[1:], 'v')
except getopt.GetoptError:
    print(getopt.GetoptError)
    print(sys.argv[0] + '-v')
    print('... the verbose flag (-v) may be used')
    sys.exit()

VERBOSE = False
RUNALL = False

sys.path.append(os.path.realpath(os.path.dirname(__file__)))

for o, a in optlist:
    if o == '-v':
        VERBOSE = True
        
        
from model_tests import *
ModelTestSuite = unittest.TestLoader().loadTestsFromTestCase(ModelTest)

from api_tests import *
ApiTestSuite = unittest.TestLoader().loadTestsFromTestCase(ApiTest)


MainSuite = unittest.TestSuite([ModelTestSuite, ApiTestSuite])

Overwriting ./unit_tests/__init__.py


In [7]:
%%writefile ./unit_tests/model_tests.py

import unittest, random

from model import *


class ModelTest(unittest.TestCase):
    
    def test_01_train(self):
        
        data_dir = './data/cs-train'
        model_dir = './models'
        
        model_train(data_dir)
        models = [f for f in os.listdir(model_dir) if re.search('test', f)]
        
        self.assertEqual(len(models), 11)
        
    def test_02_load(self):    
        
        all_data, all_models = model_load()
        models_loaded = list(all_models.keys())
        
        model = all_models[random.choice(models_loaded)]
        
        self.assertTrue('predict' in dir(model))
        self.assertTrue('fit' in dir(model))
        
    def test_03_predict(self):  
        
        country = 'all'
        year = '2018'
        month = '01'
        day = '05'
        
        result = model_predict(country, year, month, day)
        y_pred = result['y_pred']
        
        self.assertTrue(y_pred.dtype == np.float64)
        
        
if __name__ == '__main__':
    unittest.main()

Overwriting ./unit_tests/model_tests.py


In [8]:
%run unit_tests/model_tests.py

Model Results for Portugal: 

RMSE Values:
RF: 481.4510586410663 

ADA: 633.5492860870743 

GB: 680.0064118013312 

Best Model: 
 RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features=5, max_leaf_nodes=None, min_impurity_split=1e-07,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=20, n_jobs=1,
           oob_score=False, random_state=100, verbose=0, warm_start=False)
Model Results for Belgium: 

RMSE Values:
RF: 98.22937829428255 

ADA: 288.44677520183154 

GB: 296.7943232262412 

Best Model: 
 RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features=4, max_leaf_nodes=None, min_impurity_split=1e-07,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=20, n_jobs=1,
           oob_score=False, random_state=100, verbose=0, warm_start=False)
Model Results for United Kingdom: 

RMSE Values:
RF: 17626.2

.

Model Results for Netherlands: 

RMSE Values:
RF: 95.07514119966471 

ADA: 204.54647830609318 

GB: 167.70392361935362 

Best Model: 
 RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features=5, max_leaf_nodes=None, min_impurity_split=1e-07,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=20, n_jobs=1,
           oob_score=False, random_state=100, verbose=0, warm_start=False)


..

2018-01-05
y_pred: [ 16504.73026667], y_proba: None



----------------------------------------------------------------------
Ran 3 tests in 140.056s

OK


In [14]:
%%writefile ./unit_tests/api_tests.py

import sys
import os
import unittest
import requests
import re
import numpy as np
import pandas as pd
import json


port = 8080

try:
    requests.post('http://localhost:{}/predict'.format(port))
    server_available = True
except:
    server_available = False
    

class ApiTest(unittest.TestCase):
    
    @unittest.skipUnless(server_available, 'local server is not running')
    def test_01_train(self):
        
        query = {'data_dir': './data/cs-train'}
        r = requests.post('http://localhost:{}/train'.format(port), json = query)
        
        train_complete = re.sub('\W+', '', r.text)
        self.assertEqual(train_complete, 'true')
    
    @unittest.skipUnless(server_available, 'local server is not running')
    def test_02_predict(self):
    
        query = {'country': 'all', 'year': '2018', 'month': '1', 'day': '5'}
        r = requests.post('http://localhost:{}/predict'.format(port), json = query)
        response = json.loads(r.text)

        self.assertTrue(isinstance(response['y_pred'][0], float))
    
    @unittest.skipUnless(server_available, 'local server is not running')
    def test_03_logs(self):
    
        file_name = 'train-test.log'
        request_json = {'file': 'train-test.log'}
        
        r = requests.get('http://localhost:{}/logs/{}'.format(port, file_name))
        
        with open(file_name, 'wb') as f:
            f.write(r.content)
        
        self.assertTrue(os.path.exists(file_name))

        if os.path.exists(file_name):
            os.remove(file_name)
            

if __name__ == '__main__':
    unittest.main()

Overwriting ./unit_tests/api_tests.py


In [15]:
%run unit_tests/api_tests.py

...
----------------------------------------------------------------------
Ran 3 tests in 144.129s

OK


In [17]:
%%writefile run_tests.py

import sys
import unittest

from unit_tests import *
unittest.main()

Overwriting run_tests.py


## Docker Container<a class="anchor" id="third-bullet"></a>

A text file is created containing the dependencies needed for this project. Then the Dockerfile is built to bundle the API, model, and tests. The Docker image ```capstone-ai-app``` is built, and then the container is run through the terminal:

```
~$ docker build capstone-ai-app .
~$ docker run -p 4000:8080 capstone-ai-app
```

In [23]:
%%writefile requirements.txt

cython
numpy
flask
pandas
scikit-learn
matplotlib
seaborn
requests

Overwriting requirements.txt


In [22]:
%%writefile Dockerfile

FROM python:3.7.5-stretch

RUN apt-get update && apt-get install -y \
python3-dev \
build-essential    
        
WORKDIR /app

ADD . /app

RUN pip install --upgrade pip
RUN pip install --no-cache-dir -r requirements.txt

EXPOSE 80

ENV NAME World

CMD ["python", "app.py"]

Overwriting Dockerfile


## Post-Production Analysis<a class="anchor" id="fourth-bullet"></a>