In [None]:
import numpy as np
import pandas as pd

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.linear_model import LogisticRegression

In [None]:
from dm_utils import modeling
from dm_utils import model_preparation 
from dm_utils.model_wrappers import scikit_model 

In [None]:
import seaborn as sns

In [None]:
df = sns.load_dataset("titanic")
df.head(10)

In [None]:
df = df[df["embark_town"].notnull() & df["age"].notnull()]

In [None]:
# one-hot encoding for categorical variables

embark_town_columns = model_preparation.onehot_column(df, "embark_town")
df[["embark_town"]+embark_town_columns].head()

In [None]:
df["who"] = df["who"].replace({"child": 2, "woman": 1, "man": 0})
df["sex"] = df["sex"].replace({"female": 1, "male": 0})

In [None]:
feature_columns = embark_town_columns + ["pclass", "age", "sibsp"] + ["sex", "who"]

# Modeling

In [None]:
TARGET = "survived"

In [None]:
train_df, test_df = model_preparation.get_random_train_test(df, 0.7, TARGET)

### DecisionTree

In [None]:
_ = modeling.run_model(train_df, test_df, feature_columns, TARGET, DecisionTreeClassifier())

In [None]:
modeling.run_model_n_times(df, feature_columns, TARGET, 0.7, DecisionTreeClassifier(), 50)

In [None]:
clf = DecisionTreeClassifier(max_depth=3)
clf.fit(train_df[feature_columns], train_df[TARGET])

# visualize decision tree
scikit_model.print_decision_tree_with_names(clf.tree_, feature_columns, "    ")

### GBT

In [None]:
clf = GradientBoostingClassifier(max_depth=3, n_estimators=10)
clf.fit(train_df[feature_columns], train_df[TARGET])

# visualize gradient boosted trees
scikit_model.print_gbt_rules(clf, feature_columns, k=2)

### Logistic Regression

In [None]:
clf = LogisticRegression()
clf.fit(train_df[feature_columns], train_df[TARGET])

# show coefficients
scikit_model.get_lreg_coefficients(clf, feature_columns)