In [6]:
import os
import pandas as pd
from xgboost import XGBRegressor
from flask import Flask, request, Response
from api.preprocessing.Rossman import RossmanPreprocessing
import math
import numpy as np
import json
import requests

# API handler

In [None]:
# Loading model
model = XGBRegressor()
model.load_model('/home/iury_unix/ml_projects/rossman_sales_prediction/model/xgb_rossman.json') 

# Initializing API
app = Flask(__name__)

@app.route('/rossman/predict', methods = ['POST'])
def rossman_predict():
    test_json = request.get_json()

    if test_json:
        if isinstance(test_json, dict):                     # If there's only one example
            test_raw = pd.DataFrame(test_json, index=[0])
        else:
            test_raw = pd.DataFrame(test_json, columns= test_json[0].keys())    # If there's multiple examples
            
        # Instantiate preprocessing class
        pipeline = RossmanPreprocessing()

        # Data Cleaning
        df1 = pipeline.data_cleaning(test_raw)

        # Feature Engineering
        df2 = pipeline.feature_engineering(df1)

        # Data Preparation
        df3 = pipeline.data_preparation(df2)

        # Prediction
        df_response = pipeline.get_prediction(model, test_raw, df3)

        return df_response

    else:
        return Response('{}', status='200', mimetype='/application/json')
    
if __name__ == '__main__':
    app.run('0.0.0.0')

# API Tester

In [12]:
df_load = pd.read_csv('/home/iury_unix/ml_projects/rossman_sales_prediction/data/raw/test.csv')
df_store = pd.read_csv('/home/iury_unix/ml_projects/rossman_sales_prediction/data/raw/store.csv')

# Merge test and store
df_test = pd.merge(df_load, df_store, how='left', on='Store')

# Choosing one specific store to test
df_test = df_test[df_test['Store'].isin([24, 22, 30, 12])]

# Removing unused columns
df_test = df_test[df_test['Open'] != 0]
df_test = df_test[~df_test['Open'].isnull()]
df_test = df_test.drop('Id', axis=1)

# Converting to json
data = json.dumps(df_test.to_dict(orient='records'))

# API call
url = 'http://0.0.0.0:5000/rossman/predict'
header = {'Content-type': 'application/json'}
data = data

r = requests.post(url=url, data=data, headers=header)
print(f'Status Code: {r.status_code}')

Status Code: 200


In [13]:
d1 = pd.DataFrame(r.json(), columns=r.json()[0].keys())

In [14]:
d2 = d1[['store', 'predictions']].groupby('store').sum().reset_index()

for i in range(len(d2)):
    print('Store Number {} will sell R$ {:,.2f} in the next 6 weeks.'.format(d2.loc[i,'store'], 
                                                                            d2.loc[i,'predictions']))


Store Number 12 will sell R$ 244,327.48 in the next 6 weeks.
Store Number 22 will sell R$ 233,661.64 in the next 6 weeks.
Store Number 24 will sell R$ 348,137.94 in the next 6 weeks.
Store Number 30 will sell R$ 195,846.39 in the next 6 weeks.
