## <font Color="Blue">10.1 Rossmann Class</font>

In [None]:
# ------- Import Libraries ------------------
import pickle
import inflection
import pandas as pd
import numpy as np
import math
import datetime

class Rossmann( object ):
    
    # ----- Load Trained Model ------------------------------------------------
    def __init__( self ):
        self.home_path ='C:/Users/frmoriya/Documents/repos/ds_producao_hosmann/'
        self.competition_distance_scaler   = pickle.load( open( self.home_path + 'parameters/competition_distance_scaler.pkl', 'rb'))
        self.competition_time_month_scaler = pickle.load( open( self.home_path + 'parameters/competition_time_month_scaler.pkl', 'rb'))
        self.promo_time_week_scaler        = pickle.load( open( self.home_path + 'parameters/promo_time_week_scaler.pkl', 'rb'))
        self.year_scaler                   = pickle.load( open( self.home_path + 'parameters/year_scaler.pkl', 'rb'))
        self.store_type_scaler             = pickle.load( open( self.home_path + 'parameters/store_type_scaler.pkl', 'rb'))
        
        
    def data_cleaning( self, df1):
        
        # ---- 1.1 Rename Columns ----------------------------------------------
        cols_old =['Store', 'DayOfWeek', 'Date', 'Open', 'Promo', 'StateHoliday', 
                   'SchoolHoliday', 'StoreType', 'Assortment', 'CompetitionDistance', 'CompetitionOpenSinceMonth',
                    'CompetitionOpenSinceYear', 'Promo2', 'Promo2SinceWeek', 'Promo2SinceYear', 'PromoInterval']

        snakecase = lambda x: inflection.underscore( x )
        cols_new = list( map( snakecase, cols_old) )

        # rename columns
        df1.columns = cols_new

        # ------ 1.3. Data Types ----------------------------------------------
        df1['date'] = pd.to_datetime( df1['date'] )

        # ------- 1.5. Fillout NA ---------------------------------------------
        # competition_distance
        df1['competition_distance'] = df1['competition_distance'].apply( lambda x: 200000.0 if math.isnan( x ) else x )

        # competition_open_since_month
        df1['competition_open_since_month'] = df1.apply( lambda x: x['date'].month if math.isnan( x['competition_open_since_month'] ) else x['competition_open_since_month'], axis=1 )

        # competition_open_since_year
        df1['competition_open_since_year'] = df1.apply( lambda x: x['date'].year if math.isnan( x['competition_open_since_year'] ) else x['competition_open_since_year'], axis=1 )

        # promo2_since_week
        df1['promo2_since_week'] = df1.apply( lambda x: x['date'].week if math.isnan( x['promo2_since_week'] ) else x['promo2_since_week'], axis=1 )

        # promo2_since_year
        df1['promo2_since_year'] = df1.apply( lambda x: x['date'].year if math.isnan( x['promo2_since_year'] ) else x['promo2_since_year'], axis=1 )

        # promo_interval
        month_map = {1: 'Jan', 2: 'Fev', 3: 'Mar', 4: 'Apr', 5: 'May', 6: 'Jun', 7: 'Jul', 8: 'Aug', 9: 'Sep', 10: 'Oct', 11: 'Nov', 12: 'Dez' }

        df1['promo_interval'].fillna( 0, inplace=True )
        df1['month_map'] = df1['date'].dt.month.map( month_map)
        df1['is_promo'] = df1[['promo_interval', 'month_map']].apply( lambda x: 0 if x['promo_interval'] == 0 else 1 if x['month_map'] in x['promo_interval'].split( ',' ) else 0, axis=1 )

        # ------ 1.6. Change Types -------------------------------------------
        # competition
        df1['competition_open_since_month'] = df1['competition_open_since_month'].astype('int64')
        df1['competition_open_since_year'] = df1['competition_open_since_year'].astype('int64')

        # promo2
        df1['promo2_since_week'] = df1['promo2_since_week'].astype('int64')
        df1['promo2_since_year'] = df1['promo2_since_year'].astype('int64')

        return df1
    
    def feature_engineering( self, df2 ):

        # year
        df2['year'] = df2['date'].dt.year

        # month
        df2['month'] = df2['date'].dt.month

        # day
        df2['day'] = df2['date'].dt.day

        # week of year
        df2['week_of_year'] = df2['date'].dt.weekofyear

        # year week
        df2['year_week'] = df2['date'].dt.strftime('%Y-%W')

        # competition since
        df2['competition_since'] = df2.apply( lambda x: datetime.datetime( year=x['competition_open_since_year'], month=x['competition_open_since_month'], day=1 ), axis=1 )
        df2['competition_time_month'] = (( df2['date'] - df2['competition_since'])/30).apply(lambda x: x.days).astype(int)

        # promo since
        df2['promo_since'] = df2['promo2_since_year'].astype( str ) + '-' + df2['promo2_since_week'].astype( str )
        df2['promo_since'] = df2['promo_since'].apply(lambda x: datetime.datetime.strptime( x + '-1', '%Y-%W-%w') - datetime.timedelta(days=7) )
        df2['promo_time_week'] = (( df2['date'] -  df2['promo_since'])/7).apply(lambda x: x.days).astype( int )

        # assortment
        df2['assortment'] = df2['assortment'].apply(lambda x: 'basic' if x =='a' else 'extra' if x =='b' else 'extended' )

        # state holiday
        df2['state_holiday'] = df2['state_holiday'].apply(lambda x: 'public_holiday' if x =='a' else 'easter_holiday' if x =='b' else 'christmas' if x =='c' else 'regulary_day' )

        # ------ 3.0 STEP 3 - VARIABLES FILTERING -----------------------
        # 3.1. lines Filtering
        df2 = df2[df2['open'] !=0]
        
        # 3.2. Columns Selections
        cols_drop =['open', 'promo_interval', 'month_map']
        df2 = df2.drop(cols_drop, axis=1)
        
        return df2
    
    def data_preparation( self, df5 ):
        # ------- 5.2. Rescaling -------------------------------------
        # competition distance
        df5['competition_distance'] = self.competition_distance_scaler.fit_transform( df5[['competition_distance']].values )
        
        # competition_time_month
        df5['competition_time_month'] = self.competition_time_month_scaler.fit_transform( df5[['competition_time_month']].values )
        
        # promo_time_week
        df5['promo_time_week'] = self.promo_time_week_scaler.fit_transform( df5[['promo_time_week']].values )
        
        # year
        df5['year'] = self.year_scaler.fit_transform( df5[['year']].values )
        
        # ------ 5.5.1. Encoding -------------------------------------
        # State_holiday - one Hot Encoding
        df5 = pd.get_dummies( df5, prefix=['state_holiday'], columns=['state_holiday'] )

        # Store_type - Label encoding
        df5['store_type'] = self.store_type_scaler.fit_transform( df5['store_type'] )

        # assortment - Ordinal Encoding
        assortment_dict={'basic': 1, 'extra': 2, 'extended':3}
        df5['assortment'] = df5['assortment'].map(assortment_dict)
        
        # ------ 5.3.2 Nature Transformation -------------------------        
        # day_of_week
        df5['day_of_week_sin'] = df5['day_of_week'].apply( lambda x: np.sin( x * ( 2. * np.pi/7)))
        df5['day_of_week_cos'] = df5['day_of_week'].apply( lambda x: np.cos( x * ( 2. * np.pi/7)))

        # month
        df5['month_sin'] = df5['month'].apply( lambda x: np.sin( x * ( 2. * np.pi/12)))
        df5['month_cos'] = df5['month'].apply( lambda x: np.cos( x * ( 2. * np.pi/12)))

        # day
        df5['day_sin'] = df5['day'].apply( lambda x: np.sin( x * ( 2. * np.pi/30)))
        df5['day_cos'] = df5['day'].apply( lambda x: np.cos( x * ( 2. * np.pi/30)))

        # week_of_year
        df5['week_of_year_sin'] = df5['week_of_year'].apply( lambda x: np.sin( x * ( 2. * np.pi/52)))
        df5['week_of_year_cos'] = df5['week_of_year'].apply( lambda x: np.cos( x * ( 2. * np.pi/52)))

        # Boruta columns selected
        cols_selected = [ 'store', 'promo', 'store_type', 'assortment', 'competition_distance', 'competition_open_since_month',
                          'competition_open_since_year', 'promo2', 'promo2_since_week', 'promo2_since_year', 'competition_time_month',
                          'promo_time_week', 'day_of_week_sin', 'day_of_week_cos', 'month_sin', 'month_cos', 'day_sin', 'day_cos',
                          'week_of_year_sin', 'week_of_year_cos']       
        
        return df5[ cols_selected ]
            
    def get_prediction( self, model, original_data, test_data ):
       
        # prediction
        pred = model.predict( test_data )
        
        # join pred into the original data
        original_data['prediction'] = np.expm1( pred )
        
        return original_data.to_json( orient='records', date_format='iso')
        

## <font Color="Blue">10.2 API Hander</font>

In [None]:
# ------ Import Libraries
import pickle
import pandas as pd
from flask             import Flask, request, Response
from rossmann.Rossmann import Rossmann

# loading model
model = pickle.load( open('C:/Users/frmoriya/Documents/repos/ds_producao_hosmann/model_result/model_rossmann.pkl', 'rb') )

# initialize API
app = Flask( __name__ )

@app.route( '/rossmann/predict', methods=['POST'] )

def rossmann_predict():
    test_json = request.get_json()
    
    # Test json Return
    if test_json: # there is data 
        
        if isinstance( test_json, dict ): # unique example
            test_raw = pd.DataFrame( test_json, index[0] )
            
        else: #multiple example
            test_raw = pd.DataFrame( test_json, columns=test_json[0].keys() )
            
        # instantiate Rossmann Class
        pipeline = Rossmann( )
        
        # data cleaning
        df1 = pipeline.data_cleaning( test_raw )
        
        # feature engineering
        df2 = pipeline.feature_engineering( df1 )
        
        # data preparation
        df3 = pipeline.data_preparation( df2 )
        
        # prediction
        df_response = pipeline.get_prediction( model, test_raw, df3 )
        
        return df_response
    
    else:
        return Response( '{}', status=200, mimetype='application/json')
    

if __name__ == '__main__':
    #app.run( '0.0.0.0', debug=True )
    app.run( '0.0.0.0' )
    

## <font Color="Blue">10.3 Api Tester</font>

In [3]:
import pandas as pd
import requests
import json

In [6]:
#Data Store (contem as caracteristicas das lojas)
df_store_raw = pd.read_csv( 'data/store.csv', low_memory=False )

In [16]:
# Load test dataset

df10 = pd.read_csv('C:/Users/frmoriya/Documents/repos/ds_producao_hosmann/data/test.csv')

In [23]:
# merge test dataset + store
df_test = pd.merge( df10, df_store_raw, how='left', on='Store')

# choose store for predictions
#df_test = df_test[df_test['Store'] == 200]
df_test = df_test[df_test['Store'].isin( [20,10,100,] )]

# remove closed days
df_test = df_test[df_test['Open'] !=0]
df_test = df_test[~df_test['Open'].isnull()]
df_test = df_test.drop( 'Id', axis=1 )


In [24]:
# convert Dataframe to json
data =  json.dumps( df_test.to_dict( orient='records' ) )


In [25]:
# API CALL

#url = 'https://rossmann-predict-model.herokuapp.com/rossmann/predict'
url = 'https://rossman-web.herokuapp.com//rossmann/predict'
header = {'content-type':'application/json'}
data = data

r = requests.post( url, data=data, headers=header )
print( 'Status Code {}'.format( r.status_code) )

Status Code 200


In [26]:
d1 = pd.DataFrame( r.json(), columns=r.json()[0].keys() )

In [27]:
d2 = d1[['store', 'prediction']].groupby( 'store' ).sum().reset_index()

for i in range ( len(d2)):
    print( 'Store Number:{}, will sell R$ {:,.2f} in the next 6 weeks.'.format(
            d2.loc[i, 'store'],
            d2.loc[i, 'prediction'] ) )


Store Number:10, will sell R$ 213,932.25 in the next 6 weeks.
Store Number:20, will sell R$ 301,375.82 in the next 6 weeks.
Store Number:100, will sell R$ 298,694.26 in the next 6 weeks.
