# Falsk App
- The purpose of htis  notebook is to perform POC to deploy a model on on Flask server , which runs locally on the machine.
- Once this works, the same script will be made into app.py and deploy it on to AWS EC2 server

## Staley 16 Logistic Regression Model Flask Endpoint (POC)<a id="st16">
Load the ST16 Logistic Regression Model and expose as flask app end point 

- Below cell reads the logisticRegressionModel.pkl file, which is a trained ST16 Model LR file.We use this model to make prediction 
- Once the below cell is run, the app will be serving on localhost:5000 port
- Afer succesfull execution of the below cell, we can test this by pasting below URL in the browser http://127.0.0.1:5000/predict/?rainFall=20.4&siteId=2
- Here siteId, is the used to identiy a particular record from ST16 data and the rainFall will be used as i15 parameter
- we can change the 2 parameters to test the predict function
- we can read the config data ( parameters ) from config.json file

- Note : to stop the app running in jupyter notebook, you can either press "Interrupt Kernel" in Kernel from menu

In [1]:
seed=27
from flask import Flask,request
import pandas as pd
import numpy as np
import pickle
import json

import geopandas as gpd
import sklearn
import os
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, roc_auc_score, RocCurveDisplay
from sklearn.preprocessing import StandardScaler


import torch
import torch.nn as nn
import torch.optim as optim
import pickle
app = Flask(__name__)

#read parameters from config file
with open("config.json","r") as jsonfile:
    config = json.load(jsonfile)

#read data into pandas df
sdf = pd.read_csv(config["data"]["lr_data_file"])

# add ID column , which can be used to identity a paritculay record
sdf['ID'] = np.arange(sdf.shape[0])

#load ST16 LR Model from pickle file 
with open(config["model"]["lr_pickle_file"],'rb') as file:
    lr_model = pickle.load(file)

#URL Binding     
@app.route('/predict/', methods=['GET'])
def predict():
    # read parameters from the URL
    parameters = request.args.to_dict()
    i15 = float(parameters['rainFall'])
    siteId = int(parameters['siteId'])
    
    #identify the site's record
    rec = sdf[sdf['ID']==siteId][["PropHM23","dNBR/1000",'KF']]
    
    # multiply the peak rainfall intensity ( value is derived from parameters)
    rec["PropHM23_x_i15"] = rec["PropHM23"] * i15
    rec["dNBR_x_i15"] = rec["dNBR/1000"] * i15
    rec["KF_x_i15"] = rec["KF"] * i15

    #select the features which are required for model prediction
    rec = rec[["PropHM23_x_i15","dNBR_x_i15","KF_x_i15"]]

    #predict
    y_test_pred = lr_model.predict(rec)   
    #return the output
    return 'Chance of having debris flow : %s' %y_test_pred[0]
                 
app.run()





 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
[33mPress CTRL+C to quit[0m
127.0.0.1 - - [03/Jun/2023 18:04:26] "GET /predict/?rainFall=20.4&siteId=2 HTTP/1.1" 200 -


## Flask App to expose NN model ( two layer) as end point

- After running the below cell, flask app will be running at the ip 127.0.0.1:500  

- click on the hyperlink http://127.0.0.1:5000/predictForAllSites/?rainFall=100 to get response from the app. we can change the rainfall parameter to check the values 

- to stop the app running in jupyter notebook, you can either press "Interrupt Kernel" in Kernel from menu 

In [3]:
import sys
sys.path.append("../../model/")
from flask import Flask,request
import json
import pandas as pd
import pickle
from sklearn.preprocessing import StandardScaler
import numpy as np
import ModelNN as Net
import torch
app = Flask(__name__)

#intialize values
seed=27
data_dir = '../../data/'
model_dir = '../../model/'
# nn_model = 'two_layer'
nn_model = 'TwoLayer_750_epochs_optimized_roc_auc_score'
peak_mmh = 'peak_i15_mmh'

# data_file = data_dir + 'data_v09_consolidated.parquet'
data_file = data_dir + 'data_v08_consolidated.parquet'

#read parameters from config file
with open(model_dir + "model_parameters.json","r") as jsonfile:
    params= json.load(jsonfile)[nn_model]

#read model data from file 
X_train_df = pd.read_parquet(data_file)
y_train_df = X_train_df['response']

#select only required columns/features
nn_data = X_train_df[params['features']]    
col_indx = nn_data.columns.get_loc(peak_mmh)

#scale data 
ss = StandardScaler()
nn_data = ss.fit_transform(nn_data)

#now get the mean and std dev for rain internsity parameter ( from already fitted StandardScaler class)
i15_feature_indx = ss.feature_names_in_.tolist().index(peak_mmh)
i15_mean= ss.mean_.tolist()[i15_feature_indx]
i15_std = np.sqrt(ss.var_.tolist()[i15_feature_indx])

#intialize model params
input_size = nn_data.shape[1]
hidden_size = params['hidden_size']
learning_rate = params['lr'] 
dropout_rate = params['dropout_rate']
output_size = 1 

#intialize model architechure
model = Net.TwoLayer(input_size, hidden_size, output_size, dropout_rate)

#load weights

# model.load_state_dict(torch.load(model_dir+params["weights"]))
if torch.cuda.is_available():
    model.load_state_dict(torch.load(model_dir+params["weights"]))
else:
    model.load_state_dict(torch.load(model_dir+params["weights"],map_location=torch.device('cpu')))

model.eval()

#URL Binding     
@app.route('/predictForAllSites/', methods=['GET'])
def predictForAllSites():
    
    # read parameters from the URL
    parameters = request.args.to_dict()
    i15 = float(parameters['rainFall'])
    
    #scale the rainFall parameter
    peak_i15_scaled = (i15 - i15_mean)/i15_std
    
    #update the data with peak_i15_scaled value for all sites
    #we are predicting the DF across all sites for given amount of storm rainfall
    nn_data[:,col_indx] = peak_i15_scaled
    
    tensor_data = torch.from_numpy(nn_data).float()

    #use model to predict
    y_pred = model(tensor_data)
    
    #convert y_pred tensor to list 
    df_prob_list =  [np.round(i[0],2) for i in y_pred.detach().numpy().tolist()]
    
    #return the list of probabilities
    return df_prob_list

app.run()

# """
# test the process by uncommenting below code
# check the train accuracy by loading model and predicting ( No Training)
# """

# #convert to tensors
# nn_data = torch.from_numpy(nn_data).float()

# # #predict 
# y_pred = model(nn_data)

# # check accuracy
# y_train =y_train_df
# train_pred_correct = sum(X_train_df['response'] == np.round(y_pred.detach().numpy().flatten()))
# train_accuracy = (train_pred_correct / y_train_df.shape[0])
# print(train_accuracy)

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
[33mPress CTRL+C to quit[0m
127.0.0.1 - - [03/Jun/2023 18:06:52] "GET /predictForAllSites/?rainFall=100 HTTP/1.1" 200 -
127.0.0.1 - - [03/Jun/2023 18:06:59] "GET /predictForAllSites/?rainFall=100 HTTP/1.1" 200 -
127.0.0.1 - - [03/Jun/2023 18:07:04] "GET /predictForAllSites/?rainFall=10 HTTP/1.1" 200 -


## Scrath pad ( ignore below cells)

In [4]:
if __name__ == '__main__':
    # Load data
    data = df.copy()
    
    X = data[features_15]
    y = data['response']
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)
    
    #scale the data X_train and X_test
    cols = X_train.columns
    sc = StandardScaler()
    X_train = pd.DataFrame(sc.fit_transform(X_train), columns=cols)
    X_test = pd.DataFrame(sc.transform(X_test), columns=cols)
    
    X_train = torch.tensor(X_train.values).float()
    y_train = torch.tensor(y_train.values).float().view(-1, 1)
    X_test = torch.tensor(X_test.values).float()
    y_test = torch.tensor(y_test.values).float().view(-1, 1)
    
    input_size = X_train.shape[1]
    hidden_size = 500
    output_size = 1
    dropout_rate = 0.2
    learning_rate = 0.001 # 0.001 is default value for Adam optimizer
    
    model = Net(input_size, hidden_size, output_size, dropout_rate)
    model.load_state_dict(torch.load('../../data/model.pth'))

    # now final outputs
    y_train_pred = model(X_train)
    #y_train_prob = torch.sigmoid(y_train_pred) # already a probability

    y_test_pred = model(X_test)
    # y_test_prob = torch.sigmoid(y_test_pred) # already a probability


    # print("\n")

    # accuracy
    train_pred_correct = sum(y_train.detach().numpy() == np.round(y_train_pred.detach().numpy()))
    train_accuracy = (train_pred_correct / y_train.shape[0])[0]
    print(f'Training accuracy: {train_accuracy}')

    test_pred_correct = sum(y_test.detach().numpy() == np.round(y_test_pred.detach().numpy()))
    test_accuracy = (test_pred_correct / y_test.shape[0])[0]
    print(f'Test accuracy: {test_accuracy}')

    # # f1
    f1_output = f1_score(y_test, np.round(y_test_pred.detach().numpy()))
    print("\n")
    print(f'F1 Score (test): {f1_output}')


    # # extract AUC for printing
    auc_test = roc_auc_score(
        y_test.detach().numpy(), 
        y_test_pred.detach().numpy()
    )
    print(f'AUC (test): {auc_test}')
    print('\n')

# Training loss: 0.023463357239961624
# Validation loss: 2.032917022705078


# Training accuracy: 0.9908151549942594
# Test accuracy: 0.8256880733944955


# F1 Score (test): 0.6666666666666666
# AUC (test): 0.8762721555174386

NameError: name 'features_15' is not defined

In [122]:
import sys
sys.path.append("../../model/")
from flask import Flask,request
import json
import pandas as pd
import pickle
from sklearn.preprocessing import StandardScaler
import numpy as np
import ModelNN as Net
import torch
app = Flask(__name__)

#intialize values
seed=27
data_dir = '../../data/'
model_dir = '../../model/'
nn_model = 'two_layer'
peak_mmh = 'peak_i15_mmh'

data_file = data_dir + 'data_v09_feature_consolidation.parquet'
data_file_pkl = data_dir+ 'train_test_data.pkl'

#read parameters from config file
with open(model_dir + "model_parameters.json","r") as jsonfile:
    params= json.load(jsonfile)[nn_model]

#read model data from file 
X_train_df, X_test_df, y_train_df, y_test_df = pickle.load(open(data_file_pkl, "rb"))
nn_data = X_train_df[params['features']]    
col_indx = nn_data.columns.get_loc(peak_mmh)

#scale data 
ss = StandardScaler()
nn_data = ss.fit_transform(nn_data)

#not get the mean and std dev for rain internsity parameter
i15_feature_indx = ss.feature_names_in_.tolist().index(peak_mmh)
i15_mean= ss.mean_.tolist()[i15_feature_indx]
i15_std = np.sqrt(ss.var_.tolist()[i15_feature_indx])

#intialize model params
input_size = nn_data.shape[1]
hidden_size = params['hidden_size']
learning_rate = params['lr'] 
dropout_rate = params['dropout_rate']
output_size = 1 

#intialize model architechure
model = Net.TwoLayer(input_size, hidden_size, output_size, dropout_rate)

#load weights
model.load_state_dict(torch.load(model_dir+params["weights"]))
model.eval()
# #URL Binding     
# @app.route('/predictForAllSites/', methods=['GET'])
# def predictForAllSites():
    
#     # read parameters from the URL
#     parameters = request.args.to_dict()
#     i15 = float(parameters['rainFall'])
    
#     #scale the rainFall parameter
#     peak_i15_scaled = (i15 - i15_mean)/i15_std
    
#     #update the data with peak_i15_scaled value for all sites
#     #we are predicting the DF across all sites for given amount of storm rainfall
#     nn_data[:,col_indx] = peak_i15_scaled
#     tensor_data = torch.from_numpy(nn_data).float()
    
#     #use model to predict
#     y_pred = model(tensor_data)
    
#     #convert y_pred tensor to list 
#     df_prob_list =  [np.round(i[0],2) for i in y_pred.detach().numpy().tolist()]
    
#     #return the list of probabilities
#     return df_prob_list[15:30]

# app.run()

# """
# test the process by uncommenting below code
# check the train accuracy by loading model and predicting ( No Training)
# """

#convert to tensors
nn_data = torch.from_numpy(nn_data).float()

# #predict 
y_pred = model(nn_data)

# check accuracy
y_train = torch.tensor(y_train_df.values).float().view(-1, 1)
train_pred_correct = sum( y_train.detach().numpy() == np.round(y_pred.detach().numpy()))
train_accuracy = (train_pred_correct / y_train.shape[0])[0]
print(train_accuracy)


0.8018539976825029


In [None]:
# Import libraries ( add new ones to requirments.txt)

# load data file 

# scale the data(all columns)

# get the mean and std. dev for rainfall param ( check if we get this value from scaler)

# scale the value passed in by UI which is rainfall parameter

# update the scaled data df , with new scaled rainfall ( same value for all recs) 


# read the model from NN-Model-Class.py

# fit the model from .pth file 

# predict the debrisflow for all sites in the scaled data file 


# does the UI , process one site at a time ?


# there are 600/1000 unique sites, we have 600 records in the test set? (assumption: one call )


# can the flask api return the pandas df  or np.array with site-id and response variable ?
# or does it have to another way? 

# does the nn-model does return 0 or 0

    """column has one static name 
    
    1.Input rain ( unsclaed value) ; 2.DBF_Prob (0-1)
    
    2.600 rec pandas df 
    
    3.visualizaiton is looking at column 2 ; seeded at ( prob as-is)
    
    
    """


