# Subscription Churn Prediction

## Step 1: Import Libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

## Step 2: Load and Explore the Data

In [2]:
# Load the dataset
file_path = 'Churn_Modelling.csv'
data = pd.read_csv(file_path)

# Display the first few rows of the dataset
data.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


## Step 3: Preprocess the Data

In [3]:
# Drop unnecessary columns
data = data.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1)

# Encode categorical variables
label_encoder_geography = LabelEncoder()
label_encoder_gender = LabelEncoder()
data['Geography'] = label_encoder_geography.fit_transform(data['Geography'])
data['Gender'] = label_encoder_gender.fit_transform(data['Gender'])

# Define feature matrix and target vector
X = data.drop('Exited', axis=1)
y = data['Exited']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the feature matrix
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

## Step 4: Train the Gradient Boosting Model

In [4]:
# Initialize and train the model
gbc = GradientBoostingClassifier()
gbc.fit(X_train, y_train)

## Step 5: Make Predictions and Evaluate the Model

In [5]:
# Make predictions
y_pred = gbc.predict(X_test)

# Evaluate the model
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

print("\nAccuracy Score:")
print(accuracy_score(y_test, y_pred))

Confusion Matrix:
[[1547   60]
 [ 208  185]]

Classification Report:
              precision    recall  f1-score   support

           0       0.88      0.96      0.92      1607
           1       0.76      0.47      0.58       393

    accuracy                           0.87      2000
   macro avg       0.82      0.72      0.75      2000
weighted avg       0.86      0.87      0.85      2000


Accuracy Score:
0.866


## Step 6: Save the Model

In [6]:
import joblib

# Save the trained model to a file
joblib.dump(gbc, 'gradient_boosting_model.pkl')


['gradient_boosting_model.pkl']

## Step 7: Predict Churn for All Customers

In [9]:
# Standardize the entire dataset (excluding the target variable)
X_all = scaler.transform(X)

# Predict churn for all customers
y_all_pred = gbc.predict(X_all)

# Add predictions to the original dataframe
data['ChurnPrediction'] = y_all_pred

# Save the dataframe with predictions to a CSV file
output_file_path = 'Churn_Predictions.csv'
data.to_csv(output_file_path, index=False)

print(f"Predictions saved to {output_file_path}")

Predictions saved to Churn_Predictions.csv
