In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [None]:
file_path = '/WA_Fn-UseC_-Telco-Customer-Churn.csv'
data = pd.read_csv(file_path)

# Preview the first few rows
print(data.head())

In [None]:
print(data.isnull().sum())

In [None]:
data = data.dropna()

In [None]:
# Convert the 'Churn' column to 0 and 1
data['Churn'] = data['Churn'].apply(lambda x: 1 if x == 'Yes' else 0)

# Use one-hot encoding for other categorical columns
data = pd.get_dummies(data, drop_first=True)

In [None]:
# Define features (X) and target (y)
X = data.drop('Churn', axis=1)
y = data['Churn']

# Split into training and testing datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Initialize the Random Forest classifier
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
rf_model.fit(X_train, y_train)

In [None]:
# Predict on the test set
y_pred = rf_model.predict(X_test)

In [None]:
# Print classification metrics
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nAccuracy Score:")
print(accuracy_score(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

In [None]:
# Get feature importances
feature_importances = pd.DataFrame({
    'Feature': X.columns,
    'Importance': rf_model.feature_importances_
}).sort_values(by='Importance', ascending=False)

print(feature_importances)