In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import pickle

In [None]:
df = pd.read_csv('Telco-Customer-Churn.csv')

df.head()

In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.isna().sum()

In [None]:
df.duplicated().sum()

In [None]:
df.describe()

In [None]:
df = df.drop(columns=['customerID'])

df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
df['TotalCharges'].isna().sum()

In [None]:
df.head(2)

In [None]:
X = df.drop(columns=['Churn'])
y = df['Churn']

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
encoder = LabelEncoder()
y_train_encoded = encoder.fit_transform(y_train)
y_test_encoded = encoder.transform(y_test)

In [None]:
for col in df.columns:
    print(f"{col} - {df[col].nunique()}")

In [None]:
for col in df.columns:
    print(f"{col} - {df[col].dtype}")

In [None]:
numeric_cols = X.select_dtypes(include=['int64', 'float64']).columns
categorical_cols = X.select_dtypes(include=['object']).columns

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

numeric_cols_transformer = Pipeline(steps=[
    ('scaler', StandardScaler())
])

categorical_cols_transformer = Pipeline(steps=[
    ('encoder', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_cols_transformer, numeric_cols),
        ('cat', categorical_cols_transformer, categorical_cols)
    ], remainder='passthrough')

rf_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(random_state=42, n_estimators=50, max_depth=7, min_samples_split=5))
])

In [None]:
from sklearn.metrics import accuracy_score

rf_pipeline.fit(x_train, y_train)
y_pred = rf_pipeline.predict(x_train)
print(f"Train Accuracy: {accuracy_score(y_train, y_pred)}")

y_pred = rf_pipeline.predict(x_test)
print(f"Test Accuracy: {accuracy_score(y_test, y_pred)}")


In [None]:
with open('rf_pipeline.pkl', 'wb') as f:
    pickle.dump(rf_pipeline, f)