In [None]:
import pandas as pd

# Load the dataset
file_path = ('/content/Churn_Modelling.csv')
data = pd.read_csv(file_path)

# Display the first few rows of the dataset
data.head(), data.columns

(   RowNumber  CustomerId   Surname  CreditScore Geography  Gender  Age  \
 0          1    15634602  Hargrave          619    France  Female   42   
 1          2    15647311      Hill          608     Spain  Female   41   
 2          3    15619304      Onio          502    France  Female   42   
 3          4    15701354      Boni          699    France  Female   39   
 4          5    15737888  Mitchell          850     Spain  Female   43   
 
    Tenure    Balance  NumOfProducts  HasCrCard  IsActiveMember  \
 0       2       0.00              1          1               1   
 1       1   83807.86              1          0               1   
 2       8  159660.80              3          1               0   
 3       1       0.00              2          0               0   
 4       2  125510.82              1          1               1   
 
    EstimatedSalary  Exited  
 0        101348.88       1  
 1        112542.58       0  
 2        113931.57       1  
 3         93826.63     

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Drop unnecessary columns
data = data.drop(columns=['RowNumber', 'CustomerId', 'Surname'])

# Encode categorical variables
label_encoder_geography = LabelEncoder()
data['Geography'] = label_encoder_geography.fit_transform(data['Geography'])

label_encoder_gender = LabelEncoder()
data['Gender'] = label_encoder_gender.fit_transform(data['Gender'])

# Split the data into features and target variable
X = data.drop(columns=['Exited'])
y = data['Exited']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

X_train[:5], X_test[:5]


(array([[ 0.35649971, -0.9055496 ,  0.91324755, -0.6557859 ,  0.34567966,
         -1.21847056,  0.80843615,  0.64920267,  0.97481699,  1.36766974],
        [-0.20389777,  0.30164867,  0.91324755,  0.29493847, -0.3483691 ,
          0.69683765,  0.80843615,  0.64920267,  0.97481699,  1.6612541 ],
        [-0.96147213,  1.50884694,  0.91324755, -1.41636539, -0.69539349,
          0.61862909, -0.91668767,  0.64920267, -1.02583358, -0.25280688],
        [-0.94071667, -0.9055496 , -1.09499335, -1.13114808,  1.38675281,
          0.95321202, -0.91668767,  0.64920267, -1.02583358,  0.91539272],
        [-1.39733684, -0.9055496 ,  0.91324755,  1.62595257,  1.38675281,
          1.05744869, -0.91668767, -1.54035103, -1.02583358, -1.05960019]]),
 array([[-0.57749609,  0.30164867,  0.91324755, -0.6557859 , -0.69539349,
          0.32993735,  0.80843615, -1.54035103, -1.02583358, -1.01960511],
        [-0.29729735, -0.9055496 ,  0.91324755,  0.3900109 , -1.38944225,
         -1.21847056,  0.80843

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Initialize the models
log_reg = LogisticRegression(random_state=42)
rf_clf = RandomForestClassifier(random_state=42)
gb_clf = GradientBoostingClassifier(random_state=42)

# Train the Logistic Regression model
log_reg.fit(X_train, y_train)
y_pred_log_reg = log_reg.predict(X_test)

# Train the Random Forest model
rf_clf.fit(X_train, y_train)
y_pred_rf_clf = rf_clf.predict(X_test)

# Train the Gradient Boosting model
gb_clf.fit(X_train, y_train)
y_pred_gb_clf = gb_clf.predict(X_test)

# Evaluate the models
metrics = {
    "Logistic Regression": {
        "Accuracy": accuracy_score(y_test, y_pred_log_reg),
        "Precision": precision_score(y_test, y_pred_log_reg),
        "Recall": recall_score(y_test, y_pred_log_reg),
        "F1 Score": f1_score(y_test, y_pred_log_reg)
    },
    "Random Forest": {
        "Accuracy": accuracy_score(y_test, y_pred_rf_clf),
        "Precision": precision_score(y_test, y_pred_rf_clf),
        "Recall": recall_score(y_test, y_pred_rf_clf),
        "F1 Score": f1_score(y_test, y_pred_rf_clf)
    },
    "Gradient Boosting": {
        "Accuracy": accuracy_score(y_test, y_pred_gb_clf),
        "Precision": precision_score(y_test, y_pred_gb_clf),
        "Recall": recall_score(y_test, y_pred_gb_clf),
        "F1 Score": f1_score(y_test, y_pred_gb_clf)
    }
}

metrics


{'Logistic Regression': {'Accuracy': 0.815,
  'Precision': 0.5966386554621849,
  'Recall': 0.1806615776081425,
  'F1 Score': 0.27734375},
 'Random Forest': {'Accuracy': 0.8645,
  'Precision': 0.7479674796747967,
  'Recall': 0.4681933842239186,
  'F1 Score': 0.5758998435054773},
 'Gradient Boosting': {'Accuracy': 0.8655,
  'Precision': 0.7540983606557377,
  'Recall': 0.4681933842239186,
  'F1 Score': 0.5777080062794349}}