<a href="https://colab.research.google.com/github/baranikeshava19/CodSoft/blob/main/Customer_Churn_Prediction_Task_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Step 1: Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# Step 2: Upload the dataset
from google.colab import files
uploaded = files.upload()

# Load the dataset (ensure the filename matches the uploaded file)
data = pd.read_csv('Churn_Data_Sample.csv')

# Step 3: Handle missing data by imputing the 'InternetService' column
# Encode categorical variables first
data['InternetService'] = data['InternetService'].fillna('No')  # Fill missing with a default category like 'No'

# Step 4: Encoding categorical features using LabelEncoder
label_encoder = LabelEncoder()

# Apply LabelEncoder to all categorical columns
data['Gender'] = label_encoder.fit_transform(data['Gender'])
data['PhoneService'] = label_encoder.fit_transform(data['PhoneService'])
data['Contract'] = label_encoder.fit_transform(data['Contract'])
data['PaymentMethod'] = label_encoder.fit_transform(data['PaymentMethod'])
data['InternetService'] = label_encoder.fit_transform(data['InternetService'])

# Step 5: Separate features and target variable
X = data.drop(columns=['CustomerID', 'Churn'])
y = data['Churn']

# Step 6: Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 7: Train Logistic Regression model
log_reg = LogisticRegression(solver='liblinear', max_iter=200)
log_reg.fit(X_train, y_train)

# Step 8: Train Random Forest model
rf = RandomForestClassifier()
rf.fit(X_train, y_train)

# Step 9: Train Gradient Boosting model
gb = GradientBoostingClassifier()
gb.fit(X_train, y_train)

# Step 10: Make predictions and evaluate the models
log_reg_preds = log_reg.predict(X_test)
rf_preds = rf.predict(X_test)
gb_preds = gb.predict(X_test)

# Step 11: Print results for Logistic Regression
print("Logistic Regression Results:")
print(classification_report(y_test, log_reg_preds))
print("Confusion Matrix:")
print(confusion_matrix(y_test, log_reg_preds))
print("Accuracy:", accuracy_score(y_test, log_reg_preds))

# Step 12: Print results for Random Forest
print("\nRandom Forest Results:")
print(classification_report(y_test, rf_preds))
print("Confusion Matrix:")
print(confusion_matrix(y_test, rf_preds))
print("Accuracy:", accuracy_score(y_test, rf_preds))

# Step 13: Print results for Gradient Boosting
print("\nGradient Boosting Results:")
print(classification_report(y_test, gb_preds))
print("Confusion Matrix:")
print(confusion_matrix(y_test, gb_preds))
print("Accuracy:", accuracy_score(y_test, gb_preds))


Saving Churn_Data_Sample.csv to Churn_Data_Sample.csv
Logistic Regression Results:
              precision    recall  f1-score   support

           0       0.54      0.65      0.59        46
           1       0.64      0.52      0.57        54

    accuracy                           0.58       100
   macro avg       0.59      0.59      0.58       100
weighted avg       0.59      0.58      0.58       100

Confusion Matrix:
[[30 16]
 [26 28]]
Accuracy: 0.58

Random Forest Results:
              precision    recall  f1-score   support

           0       0.48      0.59      0.53        46
           1       0.57      0.46      0.51        54

    accuracy                           0.52       100
   macro avg       0.53      0.52      0.52       100
weighted avg       0.53      0.52      0.52       100

Confusion Matrix:
[[27 19]
 [29 25]]
Accuracy: 0.52

Gradient Boosting Results:
              precision    recall  f1-score   support

           0       0.36      0.48      0.41        4