In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report

# Load dataset
data = pd.read_csv('telecom_churn.csv')

# Check for non-numeric columns
print(data.dtypes)

# Convert categorical columns to numeric using LabelEncoder
label_encoder = LabelEncoder()
for column in data.select_dtypes(include=['object']).columns:
    data[column] = label_encoder.fit_transform(data[column])

# Separate features and target variable
X = data.drop(columns=['Churn'])
y = data['Churn']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


State                      object
Account length              int64
Area code                   int64
International plan         object
Voice mail plan            object
Number vmail messages       int64
Total day minutes         float64
Total day calls             int64
Total day charge          float64
Total eve minutes         float64
Total eve calls             int64
Total eve charge          float64
Total night minutes       float64
Total night calls           int64
Total night charge        float64
Total intl minutes        float64
Total intl calls            int64
Total intl charge         float64
Customer service calls      int64
Churn                        bool
dtype: object
Accuracy: 0.9490254872563718
              precision    recall  f1-score   support

       False       0.95      0.99      0.97       566
        True       0.94      0.71      0.81       101

    accuracy                           0.95       667
   macro avg       0.94      0.85      0.89       667
weigh

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report

# Load dataset
data = pd.read_csv('telecom_churn.csv')

# Convert categorical columns to numeric using LabelEncoder
label_encoder = LabelEncoder()
for column in data.select_dtypes(include=['object']).columns:
    data[column] = label_encoder.fit_transform(data[column])

# Separate features and target variable
X = data.drop(columns=['Churn'])
y = data['Churn']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

# Add a dummy single input to predict
dummy_input = {
    'State': 0,  # Example encoded value
    'Account length': 120,
    'Area code': 415,
    'International plan': 1,  # Encoded: 1 for 'Yes', 0 for 'No'
    'Voice mail plan': 0,     # Encoded: 1 for 'Yes', 0 for 'No'
    'Number vmail messages': 10,
    'Total day minutes': 300.0,
    'Total day calls': 100,
    'Total day charge': 50.0,
    'Total eve minutes': 200.0,
    'Total eve calls': 90,
    'Total eve charge': 40.0,
    'Total night minutes': 250.0,
    'Total night calls': 80,
    'Total night charge': 20.0,
    'Total intl minutes': 15.0,
    'Total intl calls': 3,
    'Total intl charge': 5.0,
    'Customer service calls': 1
}

# Convert to DataFrame
dummy_input_df = pd.DataFrame([dummy_input])

# Predict churn for the dummy input
dummy_prediction = model.predict(dummy_input_df)
dummy_prediction_proba = model.predict_proba(dummy_input_df)

# Display the result
if dummy_prediction[0] == 0:
    print("Prediction: Not Churn")
    print(f"Probability: {dummy_prediction_proba[0][0]:.2f} (Not Churn), {dummy_prediction_proba[0][1]:.2f} (Churn)")
else:
    print("Prediction: Churn")
    print(f"Probability: {dummy_prediction_proba[0][0]:.2f} (Not Churn), {dummy_prediction_proba[0][1]:.2f} (Churn)")


Accuracy: 0.9490254872563718
              precision    recall  f1-score   support

       False       0.95      0.99      0.97       566
        True       0.94      0.71      0.81       101

    accuracy                           0.95       667
   macro avg       0.94      0.85      0.89       667
weighted avg       0.95      0.95      0.95       667

Prediction: Churn
Probability: 0.19 (Not Churn), 0.81 (Churn)
