The general situation where Bayes' theorem is revelent is when:
- you have a hypothesis (denoted $H$)
- you have observed an evidence (denoted $E$)
- you want to know the probability that the hypothesis holds given that evidence is true $P(H|E)$

$$P(H|E)=\frac{P(H)P(E|H)}{P(E)}=\frac{P(H)P(E|H)}{P(H)P(E|H)+P(\lnot H)P(E|\lnot H)}$$

In [12]:
import numpy as np
import pandas as pd

from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

from hyperopt import hp, fmin, rand, tpe, Trials, STATUS_FAIL, STATUS_OK
from hyperopt.pyll import scope

In [42]:
df_train = pd.read_csv(r'data\titanic_train.csv')
df_test = pd.read_csv(r'data\titanic_test.csv')

In [13]:
df_train.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [44]:
encoder = LabelEncoder()
df_train = df_train.assign(Embarked=encoder.fit_transform(df_train.Embarked.fillna('-1')))
df_train = df_train.assign(Sex=encoder.fit_transform(df_train.Sex.fillna('-1')))
df_train = df_train.assign(Age=df_train.Age.mean())

In [47]:
X = df_train[['Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']].values
y = df_train[['Survived']].values.ravel()

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=7, test_size=0.2)

In [4]:
scope.define(XGBClassifier)
scope.define(LGBMClassifier)
scope.define(RandomForestClassifier)
scope.define(SVC)

In [58]:
search_space = hp.choice('classifier_type', [
    scope.RandomForestClassifier(
        n_estimators=hp.choice('rf_n_estimators', [8, 12, 16, 20, 24, 28, 32]),
        criterion=hp.choice('rf_criterion', ['entropy', 'gini']),
        max_features=hp.uniform('rf_max_features', 0.3, 0.8),
        min_samples_leaf=hp.choice('rf_min_samples_leaf', range(1, 10))),
    scope.SVC(
        C=hp.loguniform('svm_C', -4, 1),
        gamma=hp.loguniform('svm_gamma', -4, 1)),
    scope.LGBMClassifier(
        learning_rate=hp.uniform('lgbm_learning_rate', 0.05, 0.5),
        n_estimators=hp.choice('lgbm_n_estimators', [8, 12, 16, 20, 24, 28, 32]),
        num_leaves=hp.choice('lgbm_num_leaves ', [4, 8, 16])),
#     scope.XGBClassifier(
#         use_label_encoder=False,
#         n_estimators=hp.choice('xgb_n_estimators', [8, 12, 16, 20, 24, 28, 32]),
#         learning_rate=hp.uniform('xgb_learning_rate', 0.1, 0.5))
])

In [52]:
trials = Trials()

def objective_function(estimator):
    estimator.fit(X_train, y_train)
    y_hat = estimator.predict(X_test)
    return -1 * accuracy_score(y_test, y_hat)

In [59]:
best = fmin(
    fn=objective_function,
    space=search_space,
    algo=tpe.suggest,
    max_evals=50
)
print(best)

100%|███████████████████████████████████████████████| 50/50 [00:03<00:00, 15.74trial/s, best loss: -0.7932960893854749]
{'classifier_type': 0, 'rf_criterion': 1, 'rf_max_features': 0.5572811772748394, 'rf_min_samples_leaf': 0, 'rf_n_estimators': 6}


In [63]:
from bayes_opt import BayesianOptimization

In [None]:
def f()

---
*&#9829; By Quang Hung x Thuy Linh &#9829;*