# **PS2 Sans Employment_Variation_Rate

In [150]:
import pandas as pd
import numpy as np

RANDOM_STATE = 89

import product_sub.settings as stg
from product_sub.infrastructure.eco_social import EcoSocioContext
from product_sub.infrastructure.bank_campaign import MarketingCampaign
from product_sub.infrastructure.dataset_builder import DatasetBuilder
from product_sub.domain.data_cleaning import CatImputer, NumImputer
from product_sub.domain.feature_creator import CategoricalCreatorFromNumerical, CategoricalFeatureCreator
from product_sub.domain.feature_encoder import FrequencyEncoder, OneHotEncoder

from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.compose import make_column_selector as selector
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.pipeline import FeatureUnion, Pipeline, make_pipeline, make_union
from sklearn.model_selection import train_test_split, StratifiedKFold, GridSearchCV

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, auc, confusion_matrix, mean_squared_error, plot_confusion_matrix

In [145]:
dataset_merged = DatasetBuilder("data.csv", "socio_eco.csv").create_dataset()

In [32]:
X = dataset_merged.drop(columns=stg.COL_RAW_SUBSCRIPTION)
y = dataset_merged[stg.COL_RAW_SUBSCRIPTION].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=RANDOM_STATE)

In [33]:
numeric_transformer = Pipeline(steps=[("num_imputer", NumImputer()),
                                      ("create_categorical",CategoricalCreatorFromNumerical(stg.DICT_TO_CREATE_COLS)),
                                      ("scaler", MinMaxScaler())])

categorical_transformer = Pipeline(steps=[("cat_imputer", CatImputer()),
                                          ("cat_creator", CategoricalFeatureCreator()),
                                          ("freq_encoder", FrequencyEncoder(stg.COLS_TO_FREQ_ENCODE)),
                                          ("one_hot_encoder", OneHotEncoder([stg.COL_RAW_JOB]))])

preprocessor = ColumnTransformer(transformers=[("num", numeric_transformer, selector(dtype_exclude="category")),
                                               ("cat", categorical_transformer, selector(dtype_include="category"))])

In [74]:
clf = Pipeline(steps=[('preprocessor' , preprocessor),
                      ('lr',  LogisticRegression())])