# Feature Engineering for Churn Detection

## Imports and Settings

In [1]:
# Helper libraries
import warnings

# Scientific libraries
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.preprocessing import OneHotEncoder, StandardScaler

# Local Imports
from churn_detection.utils import get_feature_names
from churn_detection.data import load_data
from churn_detection.preprocessing import preprocess_data, split_data
from churn_detection.features import ColumnPreprocessor, Transformation


%load_ext autoreload
%autoreload 2

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

# Various settings
warnings.filterwarnings("ignore")
np.set_printoptions(precision=4)
sns.set_theme()
pd.set_option("display.max_rows", 120)
pd.set_option("display.max_colwidth", 40)
pd.set_option("display.precision", 4)
pd.set_option("display.max_columns", None)

## Loading and Preparing Data

In [2]:
churn = load_data()

In [3]:
preprocessed_data = preprocess_data(churn)
numeric_variables, categorical_variables = get_feature_names(preprocessed_data)

prep_train, prep_test = train_test_split(
    preprocessed_data, 
    test_size=0.2, 
    random_state=1
)

X_train, y_train = split_data(prep_train)
X_test, y_test = split_data(prep_test)

## Feature Engineering Strategies

In [4]:
DISCRETE_FEATURES = {
    "name": "discrete", "variables": categorical_variables,
    "steps": [("encoder", OneHotEncoder(drop="if_binary", sparse_output=False))],
}

CONTINUOUS_FEATURES = {
    "name": "bimodal", "variables": numeric_variables,
    "steps": [("dummy", StandardScaler())],
}

In [5]:
pipeline = ColumnPreprocessor()

pipeline.add_transformation(
    Transformation(**DISCRETE_FEATURES)
)

pipeline.add_transformation(
    Transformation(**CONTINUOUS_FEATURES)
)