### This is a helper file to break down and understand the data processing at server end

### A dummy input data [3, 13702, 0.09, 48, 0.2, 5] is taken for demonstration

In [11]:
import numpy as np 
import pandas as pd 
import json
from flask import request, jsonify
from flask import Flask, Blueprint
from flask import render_template

import os,pickle

import warnings
warnings.filterwarnings('ignore')

In [12]:
model = pickle.load(open('logreg.pickle', 'rb' ))

# Load the scaler from the pickle file
with open('scaler.pickle', 'rb') as f2:
    scaler_pkl = pickle.load(f2)
    
# Load encoder from the pickle file
with open('encoder.pickle', 'rb') as f3:
    encoder_pkl = pickle.load(f3)
    

In [13]:
def convert_to_df(arr):
    column_names = ['Administrative', 'ProductRelated_Duration', 'BounceRates', 'PageValues', 'SpecialDay', 'Month']
    # Create a DataFrame
    df = pd.DataFrame([arr], columns=column_names)
    df = df.astype(float)

    return df

def one_hot_encoding(df):
    encoded_data = encoder_pkl.transform(df[['SpecialDay', 'Month']])
    # Convert the encoded data to a DataFrame with meaningful column names
    encoded_df = pd.DataFrame(
        encoded_data,
        columns=[f"{col}_{val}" for col, vals in zip(['SpecialDay', 'Month'], encoder_pkl.categories_) for val in vals]
    )

    # Concatenate the original DataFrame with the encoded DataFrame
    encoded_data2 = pd.concat([df, encoded_df], axis=1)

    # Drop the original columns that were encoded
    encoded_data2 = encoded_data2.drop(['SpecialDay', 'Month'], axis=1)
    return encoded_data2

def scale_input_data(df):
    input_scaled = scaler_pkl.transform(df)
    return input_scaled

In [14]:
input_arr = [3, 13702, 0.09, 48, 0.2, 5]

In [15]:
input_df = convert_to_df(input_arr)
input_df

Unnamed: 0,Administrative,ProductRelated_Duration,BounceRates,PageValues,SpecialDay,Month
0,3.0,13702.0,0.09,48.0,0.2,5.0


In [16]:
one_hot_encoded_df = one_hot_encoding(input_df)
one_hot_encoded_df

Unnamed: 0,Administrative,ProductRelated_Duration,BounceRates,PageValues,SpecialDay_0.0,SpecialDay_0.2,SpecialDay_0.4,SpecialDay_0.6,SpecialDay_0.8,SpecialDay_1.0,Month_2,Month_3,Month_5,Month_6,Month_7,Month_8,Month_9,Month_10,Month_11,Month_12
0,3.0,13702.0,0.09,48.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [17]:
input_scaled=scale_input_data(one_hot_encoded_df)
input_scaled

array([[ 0.2119758 ,  6.84009297,  1.44946314,  2.23314808, -3.01379228,
         8.24912117, -0.14217161, -0.16435008, -0.1669344 , -0.10771897,
        -0.11823816, -0.43154223,  1.65517413, -0.15675666, -0.19333858,
        -0.19204507, -0.19366081, -0.21460434, -0.56990258, -0.40495914]])

In [18]:
y_pred = model.predict(input_scaled)
print(y_pred[0])

1


In [19]:
y_prob = model.predict_proba(input_scaled)
y_prob = np.round(y_prob, 3)
print(y_prob[0])


[0.316 0.684]


In [20]:
result_dict = {
        "predicted_class": y_pred.tolist(),
        "probabilities": y_prob.tolist()
    }
json_data = json.dumps(result_dict)
json_data

'{"predicted_class": [1], "probabilities": [[0.316, 0.684]]}'