In [None]:

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix


In [None]:

df = pd.read_csv("WA_Fn-UseC_-Telco-Customer-Churn.csv")
df.head()


In [None]:

df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
df.dropna(subset=['TotalCharges'], inplace=True)
df.reset_index(drop=True, inplace=True)


In [None]:

plt.figure(figsize=(6,4))
sns.countplot(data=df, x='Churn')
plt.title("Churn Distribution")
plt.savefig("churn_distribution.png")
plt.show()


In [None]:

plt.figure(figsize=(8,5))
sns.countplot(data=df, x='Contract', hue='Churn')
plt.title("Churn by Contract Type")
plt.savefig("churn_by_contract.png")
plt.show()


In [None]:

plt.figure(figsize=(8,5))
sns.boxplot(data=df, x='Churn', y='MonthlyCharges')
plt.title("Monthly Charges by Churn")
plt.savefig("monthly_charges_by_churn.png")
plt.show()


In [None]:

plt.figure(figsize=(8,5))
sns.histplot(data=df, x='tenure', hue='Churn', bins=30, kde=True)
plt.title("Tenure Distribution by Churn")
plt.savefig("tenure_distribution_by_churn.png")
plt.show()


In [None]:

df_model = df.drop('customerID', axis=1)
for col in df_model.select_dtypes(include='object').columns:
    if col != 'Churn':
        df_model[col] = LabelEncoder().fit_transform(df_model[col])
df_model['Churn'] = df_model['Churn'].map({'No': 0, 'Yes': 1})


In [None]:

X = df_model.drop('Churn', axis=1)
y = df_model['Churn']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:

model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


In [None]:

importance = pd.DataFrame({
    'Feature': X.columns,
    'Coefficient': model.coef_[0],
    'AbsCoefficient': abs(model.coef_[0])
}).sort_values(by='AbsCoefficient', ascending=False)

plt.figure(figsize=(10,6))
sns.barplot(data=importance.head(10), x='Coefficient', y='Feature')
plt.title("Top 10 Features Influencing Customer Churn")
plt.tight_layout()
plt.savefig("feature_importance.png")
plt.show()


In [None]:

df_export = df.copy()
df_export['Churn'] = df_export['Churn'].map({'No': 0, 'Yes': 1})
df_export.to_csv("Churn_Cleaned.csv", index=False)
