In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix
import matplotlib.pyplot as plt
import numpy as np

# -----------------------------------
# 1. Load Dataset
# -----------------------------------
df = pd.read_csv("logistic_regression_churn_5000.csv")
print(df.head())

# -----------------------------------
# 2. Preprocessing
# -----------------------------------

# Convert contract type to dummy variables
df = pd.get_dummies(df, columns=['contract_type'], drop_first=True)

# Select features and target
X = df[['monthly_charges', 'tenure', 'support_calls', 'internet_usage',
        'contract_type_One-year', 'contract_type_Two-year']]
y = df['churn']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42
)

# Scale numerical features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# -----------------------------------
# 3. Train Logistic Regression
# -----------------------------------
model = LogisticRegression()
model.fit(X_train, y_train)

# -----------------------------------
# 4. Evaluation
# -----------------------------------
y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))

# Confusion matrix
cm = confusion_matrix(y_test, y_pred)
print("\nConfusion Matrix:\n", cm)

# -----------------------------------
# 5. New Customer Churn Prediction
# -----------------------------------
new_customer = pd.DataFrame({
    'monthly_charges': [78],
    'tenure': [12],
    'support_calls': [3],
    'internet_usage': [45],
    'contract_type_One-year': [0],
    'contract_type_Two-year': [1]
})

# Scale input
new_customer_scaled = scaler.transform(new_customer)

prediction = model.predict(new_customer_scaled)
print("\nChurn prediction for new customer:", int(prediction[0]))

# -----------------------------------
# 6. VISUALIZATIONS
# -----------------------------------

# A) Churn distribution
plt.figure(figsize=(5,4))
df['churn'].value_counts().plot(kind='bar')
plt.title("Churn Count")
plt.xlabel("Churn (0 = Stay, 1 = Churn)")
plt.ylabel("Number of Customers")
plt.tight_layout()
plt.show()

# B) Monthly charges vs churn
plt.figure(figsize=(6,5))
df.boxplot(column='monthly_charges', by='churn')
plt.title("Monthly Charges vs Churn")
plt.suptitle("")  # Remove extra title
plt.xlabel("Churn")
plt.ylabel("Monthly Charges")
plt.tight_layout()
plt.show()

# C) Confusion Matrix Heatmap
plt.figure(figsize=(5,4))
plt.imshow(cm, cmap='Blues')
plt.title("Confusion Matrix")
plt.colorbar()
plt.xlabel("Predicted")
plt.ylabel("Actual")

# Add text values inside the heatmap
for i in range(cm.shape[0]):
    for j in range(cm.shape[1]):
        plt.text(j, i, cm[i, j], ha='center', va='center', color='black')

plt.tight_layout()
plt.show()


FileNotFoundError: [Errno 2] No such file or directory: 'telecom_customers.csv'