In [62]:
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt

customer_attrition_status = pd.read_csv('Customer Data/Customer Attrition Status.csv')
customer_demographics = pd.read_csv('Customer Data/Customer Demographics.csv')
customer_investment_snapshot = pd.read_csv('Customer Data/Customer Investment Snapshot.csv')
customer_portfolio_snapshot = pd.read_csv('Customer Data/Customer Portfolio Snapshot.csv')

dataframe1 = pd.merge(customer_attrition_status,
                      customer_demographics[['CustomerId','Surname',
                      'Geography','Gender','Age']],on='CustomerId')
dataframe2 = pd.merge(dataframe1,customer_investment_snapshot[[
    'CustomerId','Tenure','CreditScore','EstimatedSalary','Balance']],on='CustomerId')
dataframe3 = pd.merge(dataframe2,customer_portfolio_snapshot[[
    'CustomerId','NumOfProducts','HasChckng','IsActiveMember']],on='CustomerId')
dataframe4=dataframe3.drop_duplicates()
convertdict = {'RowNumber':int,'CustomerId':int,'Surname':'string',
               'CreditScore':int,'Geography':'string',
               'Gender':'string','Age':int,'Tenure':int,'Balance':float,
               'NumOfProducts':int,'HasChckng':int,'IsActiveMember':int,
               'EstimatedSalary':float,'Exited':int}
dataframe5 = dataframe4[['RowNumber','CustomerId','Surname','CreditScore',
                         'Geography','Gender','Age','Tenure','Balance',
                         'NumOfProducts','HasChckng','IsActiveMember',
                         'EstimatedSalary','Exited']]

dataframe5['Balance']=dataframe5['Balance'].fillna(dataframe5['Balance'].mean())
dataframe5['CreditScore']=dataframe5['CreditScore'].fillna(dataframe5['CreditScore'].mean())

dataframe6=dataframe5.astype(convertdict)
dataframe6.set_index('RowNumber',inplace=True)

### how many users are from different zones ####
dataframe7=dataframe6.groupby(['Geography']).CustomerId.count().reset_index(name="count")
explode = (0.1, 0, 0)
fig1, ax1 = plt.subplots()
ax1.pie(dataframe7['count'],explode=explode,  labels=dataframe7['Geography'], autopct='%1.1f%%',
        shadow=True, startangle=90)
ax1.axis('equal')
plt.show()
dataframe7.plot.pie(y="count",subplots=True,figsize=(11, 6))

### how many users exited from different geography ###
dataframe8=dataframe6[['Geography','Exited','Gender']].groupby(['Geography','Exited','Gender']).size().reset_index(name="count")
sns.set_theme(style="whitegrid")
sns.catplot(x="Geography",y="count",hue="Exited",kind="bar",data=dataframe8)
sns.catplot(x="Gender",y="count",hue="Exited",kind="bar",data=dataframe8)



### how many male and female exited from different category ###
sns.relplot(data=dataframe8,x='Geography',y='count',hue='Exited',col="Gender")



### number of products available gender wise and geography wise
dataframe9=dataframe6.groupby(['Geography','Gender']).NumOfProducts.size().reset_index(name="count")
sns.relplot(data=dataframe9,x='Geography',y='count',hue='Gender',col="Gender")

### How many of are active and Non Active Members in the service which are not exited ###
dataframe10=dataframe6[(dataframe6.Exited==1)].groupby(['IsActiveMember']).CustomerId.count().reset_index(name="count")
sns.barplot(data=dataframe10,x='IsActiveMember',y='count',hue="IsActiveMember")
### write dataframe data to csv file ###
dataframe6.to_csv('file1.csv')

### Generate Heatmap ###
sns.heatmap(dataframe6.corr())




Unnamed: 0,CustomerId,CreditScore,Age,Tenure,Balance,NumOfProducts,HasChckng,IsActiveMember,EstimatedSalary,Exited
CustomerId,1.0,0.005658,0.008627,-0.015261,-0.006942,0.016972,-0.014025,0.001665,0.015271,-0.006248
CreditScore,0.005658,1.0,-0.004117,-0.001961,-0.003107,0.011966,-0.006043,0.025732,-0.002199,-0.027792
Age,0.008627,-0.004117,1.0,-0.009025,-0.010111,-0.028665,-0.013279,0.082351,-0.009181,0.280904
Tenure,-0.015261,-0.001961,-0.009025,1.0,0.004487,0.017197,0.023078,-0.02918,0.008055,-0.012178
Balance,-0.006942,-0.003107,-0.010111,0.004487,1.0,-0.000814,0.005403,-0.016314,-0.00148,0.014595
NumOfProducts,0.016972,0.011966,-0.028665,0.017197,-0.000814,1.0,0.003183,0.009612,0.014204,-0.04782
HasChckng,-0.014025,-0.006043,-0.013279,0.023078,0.005403,0.003183,1.0,-0.011866,-0.009933,-0.007138
IsActiveMember,0.001665,0.025732,0.082351,-0.02918,-0.016314,0.009612,-0.011866,1.0,-0.011421,-0.156128
EstimatedSalary,0.015271,-0.002199,-0.009181,0.008055,-0.00148,0.014204,-0.009933,-0.011421,1.0,0.012097
Exited,-0.006248,-0.027792,0.280904,-0.012178,0.014595,-0.04782,-0.007138,-0.156128,0.012097,1.0
