In [None]:


# Import librairies
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.figure_factory as ff
import plotly.graph_objects as go
import seaborn as sns

import warnings


plt.style.use("dark_background")

for param in ['text.color', 'axes.labelcolor', 'xtick.color', 'ytick.color']:
    plt.rcParams[param] = 'white'  # very light grey

for param in ['figure.facecolor', 'axes.facecolor', 'savefig.facecolor']:
    plt.rcParams[param] = '#0E1117'  # bluish dark grey
    
plt.rcParams['font.family'] = 'DejaVu Sans'
plt.rcParams['font.serif'] = 'Abramo'
plt.rcParams['font.monospace'] = 'Ubuntu Mono'
plt.rcParams['font.size'] = 35
plt.rcParams['axes.labelsize'] = 6
plt.rcParams['axes.titlesize'] = 6
plt.rcParams['xtick.labelsize'] = 6
plt.rcParams['ytick.labelsize'] = 6
plt.rcParams['legend.fontsize'] = 6
plt.rcParams['figure.titlesize'] = 10



st.set_option('deprecation.showPyplotGlobalUse', False)

warnings.filterwarnings("ignore")

# Load dataset
data = pd.read_csv(r'C:\Users\Ingrid\Desktop\DAFT Nov 21\Projects\Final project\Churn_Modelling (1).csv')
data.head(10)


# dataset for the Machine Learning
data_n = pd.read_csv(r'C:\Users\Ingrid\Desktop\DAFT Nov 21\Projects\Final project\churn_encoded.csv')


# We can drop 3 columns that have no impact on the customer decision to leave the bank: 'RowNumber', 'CustomerId' and 'Surname'
data.drop(columns = ['RowNumber', 'CustomerId' ,'Surname'], inplace = True)


# Encoding
from sklearn.preprocessing import LabelEncoder

lbl_ebcode = LabelEncoder()

data.Geography = lbl_ebcode.fit_transform(data.Geography)
data.Gender = lbl_ebcode.fit_transform(data.Gender)


# We define X and y
X= data.drop('Exited', axis=1)
y=data['Exited']


# We split the dataset

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.33, 
random_state = 2, shuffle = True, stratify = y)


from imblearn.under_sampling import ClusterCentroids

undersampler = ClusterCentroids()

X_res, y_res = undersampler.fit_resample(X_train, y_train)

# We decide to balance all our dataset
undersampler = ClusterCentroids()
X_new, y_new = undersampler.fit_resample(X, y)
X_new["Target"]=y_new

# We create a new dataframe with balanced data
data_new=X_new





# Title of the dashboard
st.title(" Churn Dashboard ðŸ“ˆ")



    
# Install the widget on the side bar/ 1st selection
st.sidebar.title("Options :")
add_selectbox1 = st.sidebar.selectbox("Choose your table:",("Dataset", "Explore our data", "Target",
    "ML Supervised Models", "ML Unsupervised Models"))

if add_selectbox1=="Dataset":
    if st.checkbox('Shape'):
        st.write(data.shape)
    if st.checkbox('Describe'):
        st.write(data.describe())
    if st.checkbox('Data types'):
        st.write(data.dtypes)
    if st.checkbox('Show/Hide data table'):
        st.write('The first 5 rows :', data.head(5) )




if add_selectbox1 == 'Explore our data':
    st.subheader ("A look on our data")
    

    #Display 2 charts
    with st.container():
        col1, col2= st.columns([2,2])
        
        with col1:
            
            #Chart 1 - Bar Chart 'Exited'

    # Plotting the distribution of the classes will give us insights about the imbalance if present            
            fig1, ax = plt.subplots()
            
            ax = data['Exited'].value_counts().plot.bar(color = ['gainsboro','black'])
            ax.set_title('Exited', fontsize = 18)
            plt.xlabel("Exited", fontsize = 10)
            plt.ylabel("Number of customers", fontsize = 10)
            st.pyplot(fig1, use_container_width=True)
        
        with col2:
            
            #Chart 2 - Density
            
            fig2, ax = plt.subplots()  
            ax = sns.distplot(data.Exited.value_counts(), color = 'black')
            ax.set_title('Exited', fontsize = 18)
            st.pyplot(fig2,use_container_width=True)
        
        # Chart for the column 'Geography'
        plt.figure(figsize = (12,8))
        data['Geography'].value_counts().plot.bar(color = ['royalblue','y','r'])
        plt.title('Geography', fontsize = 18)
        plt.xlabel("Countries", fontsize = 10)
        plt.ylabel("Number of customers", fontsize = 10)
        st.pyplot()
        

    

    with st.container():
        
        col1, col2= st.columns([2,2])
        
        with col1:
            
            # Chart 3 - Histogram of 'Gender'
            
            fig3, ax = plt.subplots()
            data['Gender'].value_counts().plot.bar(color = ['mediumblue','pink'])
            plt.title('Gender', fontsize = 18)
            plt.xlabel("Gender", fontsize = 10)
            plt.ylabel("Number of customers", fontsize = 10)
            st.pyplot(fig3, use_container_width=True)

        with col2:
            
            #Chart 4 - Density
            
            fig4, ax = plt.subplots()  
            sns.countplot(y = 'Gender', hue = 'NumOfProducts', data = data)
            plt.title('Number of products per gender', fontsize = 18)
            plt.xlabel('Number of customers',fontsize = 10)
            plt.ylabel('Gender',fontsize = 10)
            plt.legend(fontsize = 10)
            data['Gender'].value_counts()
            st.pyplot(fig4,use_container_width=True)
     
# 2 charts for Age to plot    
        
        
    # with st.container():
        
    #     col1, col2= st.columns([2,2])
        
    #     with col1:
        
    #     # Chart 5 - Age
        
    #     fig5, ax = plt.subplots() 
    #     sb.kdeplot(data['Age'], color = 'khaki', shade = True)
    #     data['Age'].plot(color = 'khaki', shade = True)
    #     plt.title('Age', fontsize = 18)
    #     plt.xlabel("Age", fontsize = 10)
    #     plt.ylabel("Density", fontsize = 10)
    #     st.pyplot(fig5,use_container_width=True)
        
        
        
    #     with col2:
            
    #     #Chart 6 - Distribution of Age
        
    #     fig6, ax = plt.subplots() 
    #     data['Age'].hist(bins=15, color = 'khaki')
    #     plt.title('Age', fontsize = 18)
    #     plt.xlabel("Age", fontsize = 10)
    #     plt.ylabel("Number of customers", fontsize = 10)
    #     st.pyplot(fig6,use_container_width=True)
 
    
 # 2 charts for CreditScore to plot   
    
          # plt.figure(figsize = (20,15))
          # plt.subplot(321)

          # sb.kdeplot(data['CreditScore'], color = 'firebrick', shade = True)
          # plt.title('Credit Score', fontsize = 18)
          # plt.xlabel("Credit Score",  fontsize = 10)
          # plt.ylabel("Density", fontsize = 10)


          # plt.subplot(322)
          # data['CreditScore'].hist(color = 'firebrick')
          # plt.title('Credit Score', fontsize = 18)
          # plt.xlabel("Credit Score", fontsize = 10)
          # plt.ylabel("Number of customers", fontsize = 10)
          # plt.xlabel("Credit Score");


        #Boxplot to notice the outliers
        f, ax = plt.subplots(figsize=(17, 8))
        sns.boxplot(data=data_new)
        st.pyplot()
        
        # We delete the 2 outliers
        data_new = data_new[data_new.Balance != data_new['Balance'].max()]
        data_new = data_new[data_new.Balance != data_new['Balance'].max()]
        
        # Boxplot to show that there is no more outliers
        # f, ax = plt.subplots(figsize=(17, 8))
        # sns.boxplot(data=data_new)
        # st.pyplot()
        
        
        #Correlation matrix
        corr = data_new.corr()
        
        # Heatmap
        f, ax = plt.subplots(figsize=(15, 10))
        sns.heatmap(corr, annot=True, annot_kws={"size":13}, cmap="BuPu")
        st.pyplot()
        
        # Scatter matrix
        import seaborn as sb 
        sb.pairplot(data = data_new, hue = 'Target')
        st.pyplot()
        
        
        
if add_selectbox1 == 'Target':
    st.subheader ("Exited â€” whether or not the customer left the bank. This is what we have to predict. (0=No,1=Yes)")
            
        
    plt.figure(figsize = (20,15))
        
    # Subplots
    plt.subplot(321)
    colors = ['black', 'b']
    sns.countplot(y = 'Gender', hue = 'Exited', data = data, color =  'black')
    data['Gender'].value_counts()
    plt.legend(fontsize = 10)
        
        
    plt.subplot(322)
    sns.countplot(y = 'HasCrCard', hue = 'Exited', data = data, color =  'black')
    data['HasCrCard'].value_counts()
    plt.legend(fontsize = 10)
        
        
    plt.subplot(323)
    sns.countplot(y = 'IsActiveMember', hue = 'Exited', data = data, color =  'black')
    data['IsActiveMember'].value_counts()
    plt.legend(fontsize = 10)
        
        
    plt.subplot(324)
    sns.countplot(y = 'Tenure', hue = 'Exited', data = data, color =  'black')
    data['Tenure'].value_counts()
    plt.legend(fontsize = 10)
    st.pyplot()



if add_selectbox1 == 'ML Supervised Models':
    st.subheader('RandomForestClassifier')
    
    from PIL import Image
    image = Image.open('sunrise.jpg')
    st.image(image, caption='Confusion Matrix')
    
    
    image = Image.open('sunrise.jpg')
    st.image(image, caption='#Correlation between true positive and false positive rate')

if add_selectbox1 == 'ML Unsupervised Models':
    st.subheader('X Model')


  
# # We want 3 colomns

# Att_yes = IBM[(IBM['Attrition'] == 'Yes')]
# Att_no = IBM[(IBM['Attrition'] == 'No')]
# Total = Att_yes + Att_no

# kpi1, kpi2, kpi3 = st.columns(3)
# kpi1.metric(label = 'Number of customers' , value = len(Total))
# kpi2.metric(label = 'Active employees' , value = len(Att_yes))
# kpi3.metric(label = 'Non active employees' , value = len(Att_no))

    
  