<a href="https://colab.research.google.com/github/ashutosh3060/friday-burger-mojito/blob/master/eda_model_build.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Table of Contents:

0. Libraries
1. User-Defined Functions
2. Import Data
4. Exploratory Data Analysis
6. Data Preparation for Model Building
7. Model Build
8. Hyperparameter Tuning
9. Evaluation
10. Final Recommendation

## 0. Libraries

In [1]:
# warnings
import warnings
warnings.filterwarnings("ignore")

# Dataframe, numerical exp and other python-native libraries
import time
from collections import Counter
import pickle
import numpy as np
import pandas as pd

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns
from pprint import pprint

# sklearn
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.model_selection import train_test_split, cross_val_score, KFold, RandomizedSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score, roc_curve, auc, accuracy_score, confusion_matrix, classification_report, f1_score
# XGBoost
from xgboost import XGBClassifier, plot_importance

# imblearn for imbalanced data handling
from imblearn.over_sampling import RandomOverSampler, SMOTE
from imblearn.under_sampling import RandomUnderSampler

# Display Settings
sns.set_style('whitegrid')
sns.set(font_scale=1.25)
pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_colwidth', 100)

## 1. User-Defined Functions

In [None]:
def bivar_contns(nonreturn_df, return_df, contns_col):
    '''
    Plots the continuous variable's histogram for the returning and non-returning customers

    Inputs
    ----------
    nonreturn_df : dataframe
        Dataframe containing Non-returning customers only
    return_df : dataframe
        Dataframe containing Returning customers only
    contns_col : srtring
        Continuous variable name

    Output
    -------
    Histogram in subplots (2 plots)
        Side-by-side histograms of the continuous variable for Non-returning and Returning Customers
    '''
    fig, axes = plt.subplots(1, 2)
    fig.set_size_inches(12, 4)
    nonreturn_df.hist(contns_col, bins=100, ax=axes[0])
    axes[0].set_title('non-returning')
    axes[0].set_xlabel(contns_col)
    return_df.hist(contns_col, bins=100, ax=axes[1])
    axes[1].set_title('returning')
    axes[1].set_xlabel(contns_col)
    plt.show()