#### Import Libaries

In [55]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats
import plotly.express as px

#### Load the data

In [7]:
df = pd.read_csv("Churn_Modelling.csv")

#### Geographic Distribution of Churn

Null Hypothesis (H0): Churn rates are the same across different geographies.<br>
Alternative Hypothesis (H1): Churn rates differ across geographies.

In [53]:
contingency_table = pd.crosstab(df['Geography'], df['Exited'])
print(contingency_table)

print("\n")
chi2, p, dof, ex = stats.chi2_contingency(contingency_table)
p = float("{:.50f}".format(p))
print(f"Chi-Square Test: chi2 = {chi2}, p-value = {p}")

print("\n")
if p < 0.05:
    print("Reject the null hypothesis: Churn rates differ across geographies.")
else:
    print("Fail to reject the null hypothesis: Churn rates are the same across geographies.")

Exited        0    1
Geography           
France     4204  810
Germany    1695  815
Spain      2064  413


Chi-Square Test: chi2 = 302.26368202943, p-value = 0.0


Reject the null hypothesis: Churn rates differ across geographies.


<br>
<br>

#### Age and Customer Churn

Null Hypothesis (H0): There is no difference in age between churned and non-churned customers.<br>
Alternative Hypothesis (H1): There is a difference in age between churned and non-churned customers.

In [34]:
churned = df[df['Exited'] == 1]['Age']
non_churned = df[df['Exited'] == 0]['Age']

t_stat, p_val = stats.ttest_ind(churned, non_churned)

print(f"T-Test: t-statistic = {t_stat}, p-value = {p_val}\n")
if p_val < 0.05:
    print("Reject the null hypothesis: There is a difference in age between churned and non-churned customers.")
else:
    print("Fail to reject the null hypothesis: No significant difference in age between churned and non-churned customers.")

T-Test: t-statistic = nan, p-value = nan

Fail to reject the null hypothesis: No significant difference in age between churned and non-churned customers.


<br>
<br>

#### Credit Score and Churn Rate

Null Hypothesis (H0): There is no correlation between credit score and churn. <br>
Alternative Hypothesis (H1): There is a correlation between credit score and churn.

In [35]:
corr, p_val = stats.pearsonr(df['CreditScore'], df['Exited'])

print(f"Pearson Correlation: correlation = {corr}, p-value = {p_val}\n")
if p_val < 0.05:
    print("Reject the null hypothesis: There is a correlation between credit score and churn.")
else:
    print("Fail to reject the null hypothesis: No significant correlation between credit score and churn.")


Pearson Correlation: correlation = -0.026908506171037905, p-value = 0.007118042877179715

Reject the null hypothesis: There is a correlation between credit score and churn.


<br>
<br>

#### Balance and Customer Churn

Null Hypothesis (H0): There is no difference in balance between churned and non-churned customers.<br>
Alternative Hypothesis (H1): There is a difference in balance between churned and non-churned customers.

In [36]:
churned = df[df['Exited'] == 1]['Balance']
non_churned = df[df['Exited'] == 0]['Balance']

# T-test
t_stat, p_val = stats.ttest_ind(churned, non_churned)

print(f"T-Test: t-statistic = {t_stat}, p-value = {p_val}\n")
if p_val < 0.05:
    print("Reject the null hypothesis: There is a difference in balance between churned and non-churned customers.")
else:
    print("Fail to reject the null hypothesis: No significant difference in balance between churned and non-churned customers.")


T-Test: t-statistic = 11.929566470979756, p-value = 1.3803689194402165e-32

Reject the null hypothesis: There is a difference in balance between churned and non-churned customers.


<br>
<br>

In [37]:
contingency_table = pd.crosstab(df['NumOfProducts'], df['Exited'])
chi2, p, dof, ex = stats.chi2_contingency(contingency_table)


print(f"Chi-Square Test: chi2 = {chi2}, p-value = {p}\n")
if p < 0.05:
    print("Reject the null hypothesis: Churn rates differ across different numbers of products held.")
else:
    print("Fail to reject the null hypothesis: Churn rates are the same across different numbers of products held.")


Chi-Square Test: chi2 = 1501.731218374043, p-value = 0.0

Reject the null hypothesis: Churn rates differ across different numbers of products held.


<br>
<br>

#### Impact of Credit Card Ownership on Churn

Null Hypothesis (H0): Churn rates are the same regardless of credit card ownership.<br>
Alternative Hypothesis (H1): Churn rates differ based on credit card ownership.

In [38]:
contingency_table = pd.crosstab(df['HasCrCard'], df['Exited'])

chi2, p, dof, ex = stats.chi2_contingency(contingency_table)

print(f"Chi-Square Test: chi2 = {chi2}, p-value = {p}\n")
if p < 0.05:
    print("Reject the null hypothesis: Churn rates differ based on credit card ownership.")
else:
    print("Fail to reject the null hypothesis: Churn rates are the same regardless of credit card ownership.")


Chi-Square Test: chi2 = 0.4538116222050993, p-value = 0.5005304116181003

Fail to reject the null hypothesis: Churn rates are the same regardless of credit card ownership.


<br>
<br>

#### Active Membership and Churn

Null Hypothesis (H0): Churn rates are the same regardless of active membership.<br>
Alternative Hypothesis (H1): Churn rates differ based on active membership.

In [39]:
contingency_table = pd.crosstab(df['IsActiveMember'], df['Exited'])
chi2, p, dof, ex = stats.chi2_contingency(contingency_table)

print(f"Chi-Square Test: chi2 = {chi2}, p-value = {p}\n")
if p < 0.05:
    print("Reject the null hypothesis: Churn rates differ based on active membership.")
else:
    print("Fail to reject the null hypothesis: Churn rates are the same regardless of active membership.")


Chi-Square Test: chi2 = 243.20556280578884, p-value = 7.866266336234955e-55

Reject the null hypothesis: Churn rates differ based on active membership.


<br>
<br>

#### Tenure and Customer Churn

Null Hypothesis (H0): There is no difference in tenure between churned and non-churned customers.<br>
Alternative Hypothesis (H1): There is a difference in tenure between churned and non-churned customers.

In [40]:
churned = df[df['Exited'] == 1]['Tenure']
non_churned = df[df['Exited'] == 0]['Tenure']

# T-test
t_stat, p_val = stats.ttest_ind(churned, non_churned)

print(f"T-Test: t-statistic = {t_stat}, p-value = {p_val}\n")
if p_val < 0.05:
    print("Reject the null hypothesis: There is a difference in tenure between churned and non-churned customers.")
else:
    print("Fail to reject the null hypothesis: No significant difference in tenure between churned and non-churned customers.")


T-Test: t-statistic = -1.411853051886773, p-value = 0.15802434865441628

Fail to reject the null hypothesis: No significant difference in tenure between churned and non-churned customers.


#### Estimated Salary and Customer Churn

Null Hypothesis (H0): There is no difference in estimated salary between churned and non-churned customers.<br>
Alternative Hypothesis (H1): There is a difference in estimated salary between churned and non-churned customers.

In [41]:

churned = df[df['Exited'] == 1]['EstimatedSalary']
non_churned = df[df['Exited'] == 0]['EstimatedSalary']

# T-test
t_stat, p_val = stats.ttest_ind(churned, non_churned)

print(f"T-Test: t-statistic = {t_stat}, p-value = {p_val}\n")
if p_val < 0.05:
    print("Reject the null hypothesis: There is a difference in estimated salary between churned and non-churned customers.")
else:
    print("Fail to reject the null hypothesis: No significant difference in estimated salary between churned and non-churned customers.")


T-Test: t-statistic = 1.2124213491979505, p-value = 0.2253797753825045

Fail to reject the null hypothesis: No significant difference in estimated salary between churned and non-churned customers.


<br>
<br>

#### Gender and Customer Churn

Null Hypothesis (H0): Churn rates are the same across genders.<br>
Alternative Hypothesis (H1): Churn rates differ across genders.

In [42]:
contingency_table = pd.crosstab(df['Gender'], df['Exited'])

# Chi-Square test
chi2, p, dof, ex = stats.chi2_contingency(contingency_table)

print(f"Chi-Square Test: chi2 = {chi2}, p-value = {p}\n")
if p < 0.05:
    print("Reject the null hypothesis: Churn rates differ across genders.")
else:
    print("Fail to reject the null hypothesis: Churn rates are the same across genders.")


Chi-Square Test: chi2 = 112.37073626834965, p-value = 2.963709957332313e-26

Reject the null hypothesis: Churn rates differ across genders.
