In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

In [3]:
df_customer = pd.read_csv("/Users/adlnzmnzr/Downloads/archive-2/telecom_customer_churn.csv")
df_population = pd.read_csv("/Users/adlnzmnzr/Downloads/archive-2/telecom_zipcode_population.csv")


In [6]:
df_customer['Zip Code'] = df_customer['Zip Code'].astype(str).str.zfill(5)
df_population['Zip Code'] = df_population['Zip Code'].astype(str).str.zfill(5)
merged_df = pd.merge(df_customer, df_population, on='Zip Code', how='left')

In [7]:
service_features = [
    'Online Backup', 'Streaming Music', 'Streaming TV', 'Premium Tech Support',
    'Device Protection Plan', 'Unlimited Data', 'Online Security',
    'Internet Type', 'Streaming Movies'
]
merged_df[service_features] = merged_df[service_features].fillna('Unknown')
merged_df['Avg Monthly GB Download'] = merged_df['Avg Monthly GB Download'].fillna(0)
merged_df['Avg Monthly Long Distance Charges'] = merged_df['Avg Monthly Long Distance Charges'].fillna(0)
merged_df['Multiple Lines'] = merged_df['Multiple Lines'].fillna('Unknown')


In [8]:
drop_columns = ['Customer ID', 'Churn Reason', 'Churn Category', 'City', 'Latitude', 'Longitude']
clean_df = merged_df.drop(columns=drop_columns)

In [9]:
clean_df = clean_df.dropna(subset=['Customer Status'])

In [10]:
clean_df['Churn'] = clean_df['Customer Status'].apply(lambda x: 1 if x == 'Churned' else 0)
clean_df = clean_df.drop(columns=['Customer Status'])

In [11]:
label_encoders = {}
for column in clean_df.select_dtypes(include='object').columns:
    le = LabelEncoder()
    clean_df[column] = le.fit_transform(clean_df[column])
    label_encoders[column] = le

In [12]:
X = clean_df.drop(columns=['Churn'])
y = clean_df['Churn']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [13]:
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

In [14]:
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)


In [15]:
print("Model Accuracy: {:.2f}%".format(accuracy * 100))
print("\nClassification Report:\n", report)

Model Accuracy: 84.10%

Classification Report:
               precision    recall  f1-score   support

           0       0.87      0.93      0.90      1036
           1       0.75      0.60      0.67       373

    accuracy                           0.84      1409
   macro avg       0.81      0.76      0.78      1409
weighted avg       0.83      0.84      0.83      1409



In [19]:
import sqlite3
from IPython.display import FileLink

churn_db = 'churn.db'
conn = sqlite3.connect(churn_db)

clean_df.to_sql('clean_df', conn, if_exists = 'replace', index = False)

sql_dump_path = 'churn.sql'
with open(sql_dump_path, 'w') as f:
    for line in conn.iterdump():
        f.write(f'{line}\n')

FileLink(sql_dump_path)