##### Copyright 2018 The TensorFlow Authors.

In [28]:
from google.colab import files
uploaded=files.upload()

Saving archive.zip to archive (6).zip


In [29]:
import zipfile
import os

# Get the name of the uploaded zip file
zip_file_name = list(uploaded.keys())[0]

# Unzip the file
with zipfile.ZipFile(zip_file_name, 'r') as zip_ref:
    zip_ref.extractall()

# Get the name of the extracted CSV file (assuming there's only one CSV in the archive)
extracted_files = os.listdir('.')
csv_file_name = [f for f in extracted_files if f.endswith('.csv')][0]

print(f"Extracted CSV file: {csv_file_name}")

Extracted CSV file: Bank Customer Churn Prediction.csv


In [30]:
import pandas as pd
df=pd.read_csv("Bank Customer Churn Prediction.csv")
print(df.head())

   customer_id  credit_score country  gender  age  tenure    balance  \
0     15634602           619  France  Female   42       2       0.00   
1     15647311           608   Spain  Female   41       1   83807.86   
2     15619304           502  France  Female   42       8  159660.80   
3     15701354           699  France  Female   39       1       0.00   
4     15737888           850   Spain  Female   43       2  125510.82   

   products_number  credit_card  active_member  estimated_salary  churn  
0                1            1              1         101348.88      1  
1                1            0              1         112542.58      0  
2                3            1              0         113931.57      1  
3                2            0              0          93826.63      0  
4                1            1              1          79084.10      0  


In [31]:
df.shape

(10000, 12)

In [32]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 12 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   customer_id       10000 non-null  int64  
 1   credit_score      10000 non-null  int64  
 2   country           10000 non-null  object 
 3   gender            10000 non-null  object 
 4   age               10000 non-null  int64  
 5   tenure            10000 non-null  int64  
 6   balance           10000 non-null  float64
 7   products_number   10000 non-null  int64  
 8   credit_card       10000 non-null  int64  
 9   active_member     10000 non-null  int64  
 10  estimated_salary  10000 non-null  float64
 11  churn             10000 non-null  int64  
dtypes: float64(2), int64(8), object(2)
memory usage: 937.6+ KB


In [33]:
df_encoded = pd.get_dummies(df, columns=['country', 'gender'], drop_first=False)
print("DataFrame after get_dummies:")
print(df_encoded)

DataFrame after get_dummies:
      customer_id  credit_score  age  tenure    balance  products_number  \
0        15634602           619   42       2       0.00                1   
1        15647311           608   41       1   83807.86                1   
2        15619304           502   42       8  159660.80                3   
3        15701354           699   39       1       0.00                2   
4        15737888           850   43       2  125510.82                1   
...           ...           ...  ...     ...        ...              ...   
9995     15606229           771   39       5       0.00                2   
9996     15569892           516   35      10   57369.61                1   
9997     15584532           709   36       7       0.00                1   
9998     15682355           772   42       3   75075.31                2   
9999     15628319           792   28       4  130142.79                1   

      credit_card  active_member  estimated_salary  churn 

In [34]:
df=pd.get_dummies(df,columns=['gender'])

In [35]:
df=pd.get_dummies(df,columns=['country'])

In [37]:
df.columns

Index(['customer_id', 'credit_score', 'age', 'tenure', 'balance',
       'products_number', 'credit_card', 'active_member', 'estimated_salary',
       'churn', 'gender_Female', 'gender_Male', 'country_France',
       'country_Germany', 'country_Spain'],
      dtype='object')

In [39]:
X=df.drop(['churn','customer_id'],axis=1)
Y=df['churn']

In [40]:
X

Unnamed: 0,credit_score,age,tenure,balance,products_number,credit_card,active_member,estimated_salary,gender_Female,gender_Male,country_France,country_Germany,country_Spain
0,619,42,2,0.00,1,1,1,101348.88,True,False,True,False,False
1,608,41,1,83807.86,1,0,1,112542.58,True,False,False,False,True
2,502,42,8,159660.80,3,1,0,113931.57,True,False,True,False,False
3,699,39,1,0.00,2,0,0,93826.63,True,False,True,False,False
4,850,43,2,125510.82,1,1,1,79084.10,True,False,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,771,39,5,0.00,2,1,0,96270.64,False,True,True,False,False
9996,516,35,10,57369.61,1,1,1,101699.77,False,True,True,False,False
9997,709,36,7,0.00,1,0,1,42085.58,True,False,True,False,False
9998,772,42,3,75075.31,2,1,0,92888.52,False,True,False,True,False


In [41]:
Y

Unnamed: 0,churn
0,1
1,0
2,1
3,0
4,0
...,...
9995,0
9996,0
9997,1
9998,1


In [42]:
from sklearn.model_selection import train_test_split

In [46]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42,stratify=y)

In [48]:
X_train.shape

(8000, 13)

In [49]:
X_test.shape

(2000, 13)

In [50]:
y_test.shape

(2000,)

In [51]:
y_train.shape

(8000,)

In [52]:
from sklearn.linear_model import LogisticRegression

In [53]:
model=LogisticRegression(max_iter=100000)
model.fit(X_train,y_train)

In [54]:
y_pred=model.predict(X_test)

In [55]:
y_pred.shape

(2000,)

In [56]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [57]:
# Accuracy
acc = accuracy_score(y_test, y_pred)
print(f"\nModel Accuracy: {acc:.2f}")

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
print("\nConfusion Matrix:")
print(cm)

# Detailed Classification Report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))



Model Accuracy: 0.81

Confusion Matrix:
[[1542   51]
 [ 330   77]]

Classification Report:
              precision    recall  f1-score   support

           0       0.82      0.97      0.89      1593
           1       0.60      0.19      0.29       407

    accuracy                           0.81      2000
   macro avg       0.71      0.58      0.59      2000
weighted avg       0.78      0.81      0.77      2000



In [59]:
from sklearn.svm import SVC

In [60]:
model = SVC(kernel='rbf', max_iter=100000)
model.fit(X_train, y_train)