# Hello World API with Flask

In [3]:
# We'll create the script - we created our folder structure with cookiecutter 
import os
hello_world_script_file = os.path.join(os.path.pardir,'src','models','hello_world_api2.py')

In [13]:
%%writefile $hello_world_script_file

from flask import Flask, request
app = Flask(__name__)

@app.route('/api', methods=['POST']) #api will take an input, process it, and return it

def say_hello():
    data = request.get_json(force=True) #we will pass json, so use get_json to get extract the data
    name = data['name']
    return "hello {0}".format(name)

if __name__ == '__main__': # script entry point, the flask app will run on port 10001 - can be any available port
    app.run(port=10001, debug=True,use_reloader=False) # debug = true for troubleshooting in dev

Overwriting ../src/models/hello_world_api2.py


In [6]:
# We have started the process via the command line with python3 hellow_world_api2.py
import json

In [7]:
import requests

In [14]:
# create a call to the API endpoint
url = 'http://127.0.0.1:10001/api'
#create the data we are sending
data = json.dumps({'name':'graeme'}) #dumps creates the data in a json object
r = requests.post(url, data) #call the API and store response in r.

In [15]:
print(r.text)

hello graeme


In [16]:
# This is calling the API and returning correctly :D

# API for Machine Learning with Flask

In [18]:
# We'll create the script - we created our folder structure with cookiecutter 
import os
machine_learning_api_script_file = os.path.join(os.path.pardir,'src','models','machine_learning_api.py')

In [40]:
%%writefile $machine_learning_api_script_file
# Now this is the code we used from all the previous steps we performed.


from flask import Flask, request
import pandas as pd
import numpy as np
import json
import pickle
import os

app = Flask(__name__)

#load the model and scaler files
model_path = os.path.join(os.path.pardir, os.path.pardir,'models')
model_filepath = os.path.join(model_path, 'lr_model.pkl')
scaler_filepath = os.path.join(model_path, 'lr_scaler.pkl')

#load them in
scaler = pickle.load(open(scaler_filepath,'rb')) #remember to set read more binary
model  = pickle.load(open(model_filepath,'rb'))

# columns put in order that the ML model will expect
columns = [ u'Age', u'Fare', u'FamilySize', \
       u'IsMother', u'IsMale', u'Deck_A', u'Deck_B', u'Deck_C', u'Deck_D', \
       u'Deck_E', u'Deck_F', u'Deck_G', u'Deck_Z', u'Pclass_1', u'Pclass_2', \
       u'Pclass_3', u'Title_Lady', u'Title_Master', u'Title_Miss', u'Title_Mr', \
       u'Title_Mrs', u'Title_Officer', u'Title_Sir', u'Fare_Bin_very_low', \
       u'Fare_Bin_low', u'Fare_Bin_high', u'Fare_Bin_very_high', u'Embarked_C', \
       u'Embarked_Q', u'Embarked_S', u'AgeState_Adult', u'AgeState_Child'] 

@app.route('/api', methods=['POST'])
def make_predicitions():
    #This will be executed with the API is called
    #Read the json object and convert it to a json string
    data = json.dumps(request.get_json(force='TRUE'))
    #create a data frame from the json string
    df = pd.read_json(data)
    #extract the index passenger id
    passenger_ids = df['PassengerId'].ravel()
    # capture the actual survived values -- we do not have all the actuals, those are on Kaggle, but this is how
    # this API process would work, if we did have a store of all the actual survival data
    actuals = df['Survived'].ravel()
    # extract all the columns from the data and convert into a matrix
    X = df[columns].as_matrix().astype('float')
    # transform the data into the scaled object
    X_scaled = scaler.transform(X)
    # make the predicitions
    predictions = model.predict(X_scaled)
    # create response object dataframe
    df_response = pd.DataFrame({'PassengerId': passenger_ids, 'Predicted': predictions, 'Actual': actuals})
    # return our JSON object
    return df_response.to_json()

if __name__ == '__main__':
    #host the flask app
    app.run(port=10001, debug=True,use_reloader=False) # debug = true for troubleshooting in dev
    

Overwriting ../src/models/machine_learning_api.py


In [25]:
# now we run the flask server from the command line
# $ python3 machine_learning_api.py


## Invoke API using the Requests feature

In [50]:
import os
import numpy as np
import pandas as pd
processed_data_path = os.path.join(os.path.pardir,'data','processed')
train_file_path = os.path.join(processed_data_path, 'train.csv')
train_df = pd.read_csv(train_file_path)

In [42]:
# the processed training data will be used to check the API is working
# let's use 5 passengers to check to see if they Survived
survived_passengers = train_df[train_df['Survived'] == 1][:5]
survived_passengers


Unnamed: 0,PassengerId,Survived,Age,Fare,FamilySize,IsMother,IsMale,Deck_A,Deck_B,Deck_C,...,Title_Sir,Fare_Bin_very_low,Fare_Bin_low,Fare_Bin_high,Fare_Bin_very_high,Embarked_C,Embarked_Q,Embarked_S,AgeState_Adult,AgeState_Child
1,2,1,38.0,71.2833,2,0,0,0,0,1,...,0,0,0,0,1,1,0,0,1,0
2,3,1,26.0,7.925,1,0,0,0,0,0,...,0,0,1,0,0,0,0,1,1,0
3,4,1,35.0,53.1,2,0,0,0,0,1,...,0,0,0,0,1,0,0,1,1,0
8,9,1,27.0,11.1333,3,1,0,0,0,0,...,0,0,1,0,0,0,0,1,1,0
9,10,1,14.0,30.0708,2,0,0,0,0,0,...,0,0,0,1,0,1,0,0,0,1


In [45]:
# We should get same response from API.  Let's create a helper
import requests
def make_api_request(data):
    # url where the API is exposed
    url = "http://127.0.0.1:10001/api"
    #request
    r = requests.post(url, data)
    #return
    # return r.text - check we get something
    return r.json()

In [46]:
# This should retrn the same output of Survived, as a check that hte API is working
make_api_request(survived_passengers.to_json())

{'PassengerId': {'0': 2, '1': 3, '2': 4, '3': 9, '4': 10},
 'Predicted': {'0': 1, '1': 1, '2': 1, '3': 1, '4': 1},
 'Actual': {'0': 1, '1': 1, '2': 1, '3': 1, '4': 1}}

In [47]:
# As we can see, all Survived

In [48]:
# Now pass the entire Training df to the api function
# then convert the result to JSON and put it into a result df
# Have a look at the top 5, then check accuracy by compare Actual to Predicted
# Then we will convert that into a Mean value, to get the accuracy.
result = make_api_request(train_df.to_json())
df_result = pd.read_json(json.dumps(result))
df_result.head()

Unnamed: 0,PassengerId,Predicted,Actual
0,1,0,0
1,2,1,1
10,108,0,1
100,190,0,0
101,20,1,1


In [52]:
# what is the oveall accuracy?
np.mean(df_result.Actual == df_result.Predicted)

0.8383838383838383

In [53]:
# This is as expected from our previous modeling persistence demo
# So now we have a machine learning API
#
# How could we improve the API?
# We should be able to tinker to allow the raw data to be feed, to be processed then passing that dat to the model