In [1]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [2]:
DF_PATH = "../data/dataframes/telecom_churn_scaled.csv"

df = pd.read_csv(DF_PATH)

In [3]:
# Train and test split
X = df.drop(columns=['Churn'])
y = df['Churn']
X_train, X_test, y_train, y_test =  train_test_split(X, y, test_size=0.2, shuffle=True, stratify=y, random_state=42)

In [10]:
# Instantiating a baseline model and fitting it
log_reg = LogisticRegression(penalty='elasticnet', C=10, solver='saga', class_weight='balanced', l1_ratio=1, max_iter=1000)
log_reg.fit(X_train, y_train)

# Evaluating predictions
preds = log_reg.predict(X_test)
results = classification_report(y_true=y_test, y_pred=preds, output_dict=True)
results



{'0': {'precision': 0.9419354838709677,
  'recall': 0.512280701754386,
  'f1-score': 0.6636363636363637,
  'support': 570.0},
 '1': {'precision': 0.22128851540616246,
  'recall': 0.8144329896907216,
  'f1-score': 0.34801762114537443,
  'support': 97.0},
 'accuracy': 0.5562218890554723,
 'macro avg': {'precision': 0.5816119996385651,
  'recall': 0.6633568457225538,
  'f1-score': 0.505826992390869,
  'support': 667.0},
 'weighted avg': {'precision': 0.8371337508258612,
  'recall': 0.5562218890554723,
  'f1-score': 0.6177367863925466,
  'support': 667.0}}

In [14]:
# Instantiating a RandomForest model
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier

tree = DecisionTreeClassifier()
bag_model = BaggingClassifier(tree, n_estimators=100, max_samples=0.8, random_state=42)
bag_model.fit(X_train, y_train)
preds = bag_model.predict(X_test)
results = classification_report(y_pred=preds, y_true=y_test, output_dict=True)
results

{'0': {'precision': 0.9390862944162437,
  'recall': 0.9736842105263158,
  'f1-score': 0.9560723514211886,
  'support': 570.0},
 '1': {'precision': 0.8026315789473685,
  'recall': 0.6288659793814433,
  'f1-score': 0.7052023121387283,
  'support': 97.0},
 'accuracy': 0.9235382308845578,
 'macro avg': {'precision': 0.870858936681806,
  'recall': 0.8012750949538796,
  'f1-score': 0.8306373317799585,
  'support': 667.0},
 'weighted avg': {'precision': 0.9192420554350129,
  'recall': 0.9235382308845578,
  'f1-score': 0.9195890023801111,
  'support': 667.0}}

In [15]:
randoforest = RandomForestClassifier(n_estimators=100, random_state=42)
randoforest.fit(X_train, y_train)
preds = randoforest.predict(X_test)
results = classification_report(y_pred=preds, y_true=y_test, output_dict=True)
results

{'0': {'precision': 0.93929173693086,
  'recall': 0.9771929824561404,
  'f1-score': 0.9578675838349097,
  'support': 570.0},
 '1': {'precision': 0.8243243243243243,
  'recall': 0.6288659793814433,
  'f1-score': 0.7134502923976608,
  'support': 97.0},
 'accuracy': 0.9265367316341829,
 'macro avg': {'precision': 0.8818080306275922,
  'recall': 0.8030294809187919,
  'f1-score': 0.8356589381162852,
  'support': 667.0},
 'weighted avg': {'precision': 0.9225723380960266,
  'recall': 0.9265367316341829,
  'f1-score': 0.9223226404025062,
  'support': 667.0}}