# Initializing

In [9]:
#Active environment should be aws_env
!conda info | grep 'active env'

     active environment : aws_env
    active env location : /home/hassan101/anaconda3/envs/aws_env


In [10]:
#Get AWS credentials from environment
import os
aws_akid = os.environ['AWS_KID']
aws_sak = os.environ['AWS_AK']

#Importing libraries
import boto3
import pandas as pd
import io
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

# Getting processed data from S3 bucket

In [11]:
#Authentication
res_s3 = boto3.resource('s3', aws_access_key_id=aws_akid, aws_secret_access_key= aws_sak)
client_s3 = boto3.client('s3', aws_access_key_id=aws_akid, aws_secret_access_key= aws_sak)

# List all objects in bucket
bucket_name = 'reg-dataset-processed'

response = client_s3.list_objects_v2(Bucket=bucket_name)
for obj in response['Contents']:
    print(obj)

response = client_s3.get_object(Bucket=bucket_name, Key="lifexp-processed.csv")

status = response.get("ResponseMetadata", {}).get("HTTPStatusCode")

if status == 200:
    print(f"Successful S3 get_object response. Status - {status}")
    df = pd.read_csv(response.get("Body"))
else:
    print(f"Unsuccessful S3 get_object response. Status - {status}")

#Seperate feature and labels
X=df.iloc[:,:-1]
y=df.iloc[:,[-1]]

#Perform OHE
X_ohe = pd.get_dummies(X)
print(X_ohe.shape)
X_ohe.head()

{'Key': 'lifexp-processed.csv', 'LastModified': datetime.datetime(2023, 2, 9, 9, 16, 24, tzinfo=tzutc()), 'ETag': '"9e138a0af6f65a9b4338da770613527c"', 'Size': 98812, 'StorageClass': 'STANDARD'}
Successful S3 get_object response. Status - 200
(2556, 8)


Unnamed: 0,HIV/AIDS,Income composition of resources,Adult mortality,Under-five deaths,Thinness 5-9 years,Polio,Status_Developed,Status_Developing
0,0.1,0.479,263,83,17.3,6,0,1
1,0.1,0.476,271,86,17.5,58,0,1
2,0.1,0.47,268,89,17.7,62,0,1
3,0.1,0.463,272,93,18.0,67,0,1
4,0.1,0.454,275,97,18.2,68,0,1


# DEPLOYMENT AND MAKING PREDICTIONS (single sample only)

# Prepping sample

Taking random sample

In [13]:
import random

random_number = random.randint(0, len(df))

sample = X_ohe.iloc[[random_number]]
sample_label = y.iloc[random_number]['Total expenditure']

print('Actual label for data:', sample_label)
print(f'These are the features with data shape:{sample.shape}')

sample.to_csv('sample.csv', index=True)
sample.head()

Actual label for data: 9.39
These are the features with data shape:(1, 8)


Unnamed: 0,HIV/AIDS,Income composition of resources,Adult mortality,Under-five deaths,Thinness 5-9 years,Polio,Status_Developed,Status_Developing
611,0.1,0.719,19,1,3.3,99,0,1


Prepping sample as dictionary

In [14]:
#Predicting with script (passing without additional header)
import pickle
import json
import numpy as np

model = pickle.load( open('model_fromscratch.pkl', 'rb') )
scaler = pickle.load( open('scaling.pkl', 'rb') )

#Importing sample data and preparing data in form of JSON
sample = pd.read_csv('sample.csv', index_col=0)
sample_dic = sample.to_dict('list') #This will only use column-values pair for dic, index will be ignored. To use index as header for JSON later, use 'index' as argument
sample_dic

{'HIV/AIDS': [0.1],
 'Income composition of resources': [0.719],
 'Adult mortality': [19],
 'Under-five deaths': [1],
 'Thinness 5-9 years': [3.3],
 'Polio': [99],
 'Status_Developed': [0],
 'Status_Developing': [1]}

A quick check on predictions locally (we will convert to JSON anyway for local testing)

In [59]:
#Converting dictionary to JSON
data_as_json=json.dumps(sample_dic)
data_as_json

#Loading JSON back to dictionary
data_as_dic = json.loads(data_as_json)

#Making predictions from data dictionary
data_as_array = np.array(list(data_as_dic.values())).reshape(1,-1)
print('Data shape:', data_as_array.shape)
data_sc=scaler.transform(data_as_array)
output=model.predict(data_sc)
print('Predicted value:', output[0])

Data shape: (1, 8)
Predicted value: 7.342199999999992




# Predictions from Flask

Note that Flask is not recommended to run in prod environment. It won't handle multiple requests. Gunicorn/docker/cloud are better alternatives.

For flask, converting to JSON isn't needed. Requests are sent as dictionary.

Write a script to execute in deployment environment

In [56]:
%%writefile app_api.py

from flask import Flask, request,jsonify
import pickle
import pandas as pd
import sklearn
from sklearn.preprocessing import StandardScaler
import numpy as np

app = Flask('lifeexp')
model = pickle.load( open('model_fromscratch.pkl', 'rb') )
scaler = pickle.load( open('scaling.pkl', 'rb') )

@app.route('/predict_api', methods=['POST'])
def predict_api():
    data_as_dic=request.get_json()
    data_as_array = np.array(list(data_as_dic.values())).reshape(1,-1)
    print('Data shape:', data_as_array.shape)
    print('Data shape:', data_as_array.shape)
    data_sc=scaler.transform(data_as_array)
    output=model.predict(data_sc)
    print('Predicted value:', output[0])
    return jsonify(output[0])

if __name__ == '__main__':
    app.run(debug=True, host='0.0.0.0', port=9696)

Overwriting app_api.py


Execute the above file in deployment environment as follows:

`
python app_api.py
`


Then make predictions like below:

In [15]:
import requests
url= 'http://localhost:9696/predict_api'
output=requests.post(url, json=sample_dic)
print('Predicted label:', output.json())

Predicted label: 7.958399999999994


# Local predictions with manual inputs

In [None]:
#Without additional header
import pickle
import json
import numpy as np

model = pickle.load( open('model_fromscratch.pkl', 'rb') )
scaler = pickle.load( open('scaling.pkl', 'rb') )

#Preparing data in form of JSON
dic = {
		"HIV/AIDS": 0.1,
		"Income composition of resources": 0.624,
		"Adult mortality": 213.0,
		"Under-five deaths": 237.0,
		"Thinness 5-9 years": 1.9,
		"Polio": 79.0,
		"Status_Developed": 0.0,
		"Status_Developing": 1.0
	}
data_as_json=json.dumps(dic)
data_as_json

#Loading JSON data on the app, and transforming for predictions
data_as_dic = json.loads(data_as_json)
data_as_array = np.array(list(data_as_dic.values())).reshape(1,-1)
print('Data shape:', data_as_array.shape)
data_sc=scaler.transform(data_as_array)
output=model.predict(data_sc)
print('Predicted value:', output[0])

Data shape: (1, 8)
Predicted value: 2.5392000000000023




In [None]:
#With additional "data"" header (STILL BUGGY !!!!!)
import pickle
import json
import numpy as np

model = pickle.load( open('model_fromscratch.pkl', 'rb') )
scaler = pickle.load( open('scaling.pkl', 'rb') )

#Preparing data in form of JSON
dic = {
    "data":{
		"HIV/AIDS": 0.1,
		"Income composition of resources": 0.624,
		"Adult mortality": 213.0,
		"Under-five deaths": 237.0,
		"Thinness 5-9 years": 1.9,
		"Polio": 79.0,
		"Status_Developed": 0.0,
		"Status_Developing": 1.0
	}
}
data_as_json=json.dumps(dic['data'])
data_as_json

#Loading JSON data on the app, and transforming for predictions
data_as_dic = json.loads(data_as_json)
data_as_array = np.array(list(data_as_dic.values())).reshape(1,-1)
print('Data shape:', data_as_array.shape)
data_sc=scaler.transform(data_as_array)
output=model.predict(data_sc)
print('Predicted value:', output[0])

Data shape: (1, 8)
Predicted value: 2.5392000000000023




# Predictions on Postman

# Data to put in Postman API body as raw data
{
    "HIV/AIDS": 0.1,
    "Income composition of resources": 0.624,
    "Adult mortality": 213.0,
    "Under-five deaths": 237.0,
    "Thinness 5-9 years": 1.9,
    "Polio": 79.0,
    "Status_Developed": 0.0,
    "Status_Developing": 1.0
}

#Use this URL
http://localhost:9696/predict_api

In [62]:
# Alternatively, this will work too:

sample_dic = {
    "HIV/AIDS": 0.1,
    "Income composition of resources": 0.624,
    "Adult mortality": 213.0,
    "Under-five deaths": 237.0,
    "Thinness 5-9 years": 1.9,
    "Polio": 79.0,
    "Status_Developed": 0.0,
    "Status_Developing": 1.0
}

import requests
url= 'http://localhost:9696/predict_api'
output=requests.post(url, json=sample_dic)
print('Predicted label:', output.json())

Predicted label: 2.5848000000000018


# Predictions on web

In [63]:
%%writefile app_web.py

from flask import Flask, request, app, jsonify, url_for, render_template
import pickle
import pandas as pd
import sklearn
from sklearn.preprocessing import StandardScaler
import numpy as np

app = Flask(__name__)
model = pickle.load( open('model_fromscratch.pkl', 'rb') )
scaler = pickle.load( open('scaling.pkl', 'rb') )

@app.route('/')
def home():
    return render_template('home.html')

@app.route('/predict_web', methods=['POST'])
def predict():
    data_as_float = [float(x) for x in request.form.values()]
    data_as_array = np.array(data_as_float).reshape(1,-1)
    print('Data shape:', data_as_array.shape)
    data_sc=scaler.transform(data_as_array)
    output=model.predict(data_sc)
    print('Predicted value:', output[0])
    return render_template('home.html', prediction_text= "The life expentency is {}".format(output[0]))

if __name__ == '__main__':
    app.run(debug=True, host='0.0.0.0', port=9696)

Writing app_web.py


# Predictions on gunicorn

Run this code in the deployment environment to start the service:

```
gunicorn --bind=0.0.0.0:9696 app_api:app
```

Next you can send POST request as usual to get the predictions

In [21]:
import requests
url= 'http://localhost:9696/predict_api'
output=requests.post(url, json=sample_dic)
print('Predicted label:', output.json())

Predicted label: 7.958399999999994


# Predictions on docker

Make an image with dockerfile instructions, then run this command in terminal (not in docker terminal)
```
docker run -it --rm -p 9696:9696 lifeexp
```
You need to make sure not to run this in docker terminal with VSC, since the port will be used up by VSC. Don't forget to map docker port with local by 9696:9696 mapping. The --rm removed all temporary files after container is closed.

In [69]:
#Once container is started, use the POST request to get predictions
import requests
url= 'http://localhost:9696/predict_api'
output=requests.post(url, json=sample_dic)
print('Predicted label:', output.json())

Predicted label: 2.5848000000000018


# Predictions on AWS Elastic beanstalk

First, you need to install the elastic beanstalk library in working environment

In [1]:
# !pip install awsebcli

Check if installation is working fine:

In [2]:
!eb --version

EB CLI 3.20.3 (Python 3.8.0)


Initialize docker-based project:

In [4]:
!eb init -p docker life-exp-service --profile usr_hassan

This profile usr_hassan contains AWS credentials stored in  ~/.aws/config file under [profile usr_hassan]

Check config.yml file for easticbeanstalk and make sure default_region is set to ap-southeast-2

In [8]:
!cat .elasticbeanstalk/config.yml

branch-defaults:
  main:
    environment: null
    group_suffix: null
global:
  application_name: life-exp-service
  branch: null
  default_ec2_keyname: null
  default_platform: Docker
  default_region: ap-southeast-2
  include_git_submodules: true
  instance_profile: null
  platform_name: null
  platform_version: null
  profile: usr_hassan
  repository: null
  sc: git
  workspace_type: Application


Test by executing locally (make sure Docker is operational)

`
eb local run --port 9696
`

Check if predictions are working locally:

In [20]:
import requests
url= 'http://localhost:9696/predict_api'
output=requests.post(url, json=sample_dic)
print('Predicted label:', output.json())

Predicted label: 7.958399999999994


Now, create the service on cloud (this is not free):

`
eb create life-exp-service
`


Once finished, copy URL from this line:


2023-02-14 00:47:19    INFO    Application available at life-exp-service.eba-snms8sq2.ap-southeast-2.elasticbeanstalk.com.

Modify URL section between {http://} and {/predict_api} for POST request and make predictions:

In [19]:
import requests
url= 'http://life-exp-service.eba-snms8sq2.ap-southeast-2.elasticbeanstalk.com/predict_api'
output=requests.post(url, json=sample_dic)
print('Predicted label:', output.json())

Predicted label: 7.958399999999994


Terminate the service from cloud afterwards to avoid paying a hefty bill