In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [None]:
# Load training data
train_df = pd.read_csv('churn_train.csv')

In [None]:
def preprocess_data(df):
    df = df.copy()

    # Convert categorical columns to numerical values
    label_encoders = {}
    categorical_columns = ['State', 'International plan', 'Voice mail plan', 'Churn']

    for col in categorical_columns:
        le = LabelEncoder()
        df[col] = le.fit_transform(df[col])
        label_encoders[col] = le

    return df, label_encoders

In [None]:
# Preprocess training data
train_df, label_encoders = preprocess_data(train_df)

In [None]:
# Split features and target
X_train = train_df.drop(columns=['Churn'])
y_train = train_df['Churn']

In [None]:
# Train Decision Tree model
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train, y_train)

In [None]:
# Load test data
test_df = pd.read_csv('churn_test.csv')

In [None]:
# Preprocess test data using the same label encoders
def preprocess_test_data(df, label_encoders):
    df = df.copy()
    for col in label_encoders:
        df[col] = label_encoders[col].transform(df[col])
    return df

In [None]:
test_df = preprocess_test_data(test_df, label_encoders)

In [None]:
# Split features and target in test data
X_test = test_df.drop(columns=['Churn'])
y_test = test_df['Churn']

In [None]:
# Predict using trained model
y_pred = dt_model.predict(X_test)

In [None]:
# Evaluate model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.4f}')
print('Classification Report:\n', classification_report(y_test, y_pred))
print('Confusion Matrix:\n', confusion_matrix(y_test, y_pred))

Accuracy: 0.9175
Classification Report:
               precision    recall  f1-score   support

           0       0.95      0.95      0.95       572
           1       0.70      0.73      0.72        95

    accuracy                           0.92       667
   macro avg       0.83      0.84      0.83       667
weighted avg       0.92      0.92      0.92       667

Confusion Matrix:
 [[543  29]
 [ 26  69]]
