# Machine Learning with Python - Heuristics
Many times a simple rule-of-thumb performs better than advanced ML

### Human Learn
Super clever library to manage huristic models like ML scikit-learn models

In [None]:
# need to install this now becasue it isn't in the container.
!pip install human-learn

In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split

#read in our titanic data
df_og = pd.read_csv('data/train.csv') 

In [None]:
df_og.head()

In [None]:
#split the data set into train and test sets remove any non-numeric columns for the example
X, y = df_og.drop(columns=['PassengerId','Name','Ticket','Cabin','Embarked']), df_og['Survived']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

print('size of X_train') 
print(X_train.shape)
print('size of X_test')
print(X_test.shape)

In [None]:
X_train.head()


In [None]:
X_train['Survived'].mean()

The human learn package provides a number of methods to visually explore the data

In [None]:
(X_train.groupby(["Sex","Pclass"])
 .Survived
 .mean())

In [None]:

(X_train.groupby(["Sex","Pclass"])
 .Survived
 .count())

In [None]:
X_train[X_train['Survived']==1]['Age'].hist()

In [None]:
X_train[X_train['Survived']==0]['Age'].hist()

In [None]:
from hulearn.classification import FunctionClassifier
#with this information I'm going to build a function that uses a heuristic to predic
def sex_pclass(dataf):
    """
    The assumption is that women and children survive
    """
    
    women_rule = (dataf['Pclass'] < 3.0) & (dataf['Sex'] == 'female')
    children_rule = (dataf['Pclass'] < 3.0) & (dataf['Age'] <= 15)
    pred =  women_rule | children_rule
    
    return np.array(pred).astype(int)

heuristic_model = FunctionClassifier(sex_pclass)

In [None]:
sex_pclass(X_train)[0:10]

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

preds = heuristic_model.fit(X_train, y_train).predict(X_train)

print(('accuray:' + str(accuracy_score(y_train, preds))))
print(('precision:' + str(precision_score(y_train, preds))))
print(('recall:' + str(recall_score(y_train, preds))))
print(('f1:' + str(f1_score(y_train, preds))))
print('confusion_matrix')
print(confusion_matrix(y_train, preds))

In [None]:
#now see how the rule works on the hold out test set
preds = heuristic_model.fit(X_train, y_train).predict(X_test)

print(('accuray:' + str(accuracy_score(y_test, preds))))
print(('precision:' + str(precision_score(y_test, preds))))
print(('recall:' + str(recall_score(y_test, preds))))
print(('f1:' + str(f1_score(y_test, preds))))
print('confusion_matrix')
print(confusion_matrix(y_test, preds))