In [None]:
import os,time
import json
import numpy as np
import pandas as pd

from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder,FunctionTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
from sklearn.naive_bayes import GaussianNB,MultinomialNB
from sklearn.svm import SVC

from sklearn.metrics import confusion_matrix,classification_report,roc_auc_score

In [None]:
num_cols = []
cate_cols = []
all_cols = num_cols+cate_cols

In [None]:
train_x,test_x,train_y,test_y = train_test_split(data_df[all_cols],target_df,test_size = 0.3,random_state=0)
print(train_x.shape,train_y.shape)
print(test_x.shape,test_y.shape)

In [None]:
def log_transform(x):
    return np.log(1+x)

num_transformer = Pipeline(steps=[
    ('scaler',StandardScaler()),
    ('log_transform',FunctionTransformer(log_transform))
])

cat_transformer = Pipeline(steps=[
    ('one_hot',OneHotEncoder())
])

preprocessor = ColumnTransformer(transformers=[
    ('cat',cat_transformer,cate_cols),
    ('num',num_transformer,num_cols)
])

pipeline = Pipeline(steps=[
    ('preprocessor',preprocessor),
    ('model',LogisticRegression(C=1000,solver='liblinear'))
#     ('model',SVC(kernel='rbf'))
#     ('model',SVC(kernel='linear'))
#     ('model',xgb.XGBClassifier())
#     ('model',DecisionTreeClassifier(max_depth=100))
#     ('model',RandomForestClassifier(n_estimators=50,max_depth=100))
])

In [None]:
model = pipeline.fit(train_x,train_y)
model.score(test_x,test_y)

In [None]:
preds = model.predict(test_x)
pred_probs = model.predict_proba(test_x)[:,1]

In [None]:
print(confusion_matrix(test_y,preds))
print(classification_report(test_y,preds))