What is Model Evaluation?
After training a model, we must evaluate how well it performs.
* If the model is underfitting or overfitting
* Whether we can trust its predictions

In [17]:
import numpy as np
import pandas as pd
data = {
    'age': [22, 35, np.nan, 29, 41, 33, np.nan, 27],
    'income': [50000, 64000, 58000, np.nan, 72000, np.nan, 46000, 51000],
    'education': ['bachelors', 'masters', 'bachelors', 'phd', np.nan, 'masters', 'phd', 'bachelors'],
    'target': [1, 0, 1, 0, 1,1,1,1]
}
df = pd.DataFrame(data)
x = df.drop('target', axis=1)
y = df['target']

from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

numerical_fetures = ['age', 'income']
categorical_features = ['education']

numerical_transformer = Pipeline(steps=[('imputer', SimpleImputer(strategy='mean')),
                                        ('scaler', StandardScaler())])
categorical_transformer = Pipeline(steps=[('imputer', SimpleImputer(strategy='most_frequent')),
                                          ('encoder', OneHotEncoder(handle_unknown='ignore'))])
preprocessor = ColumnTransformer(transformers=[('num', numerical_transformer, numerical_fetures),
                                               ('cat', categorical_transformer, categorical_features)])
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

from sklearn.linear_model import LogisticRegression
logistic_pipeline = Pipeline([('preprocessing', preprocessor),
                              ('classifier', LogisticRegression())])
logistic_pipeline.fit(x_train, y_train)
y_pred_log = logistic_pipeline.predict(x_test)


In [18]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

print('accuracy:', accuracy_score(y_test, y_pred_log))
print('precision:', precision_score(y_test, y_pred_log))
print('recall:', recall_score(y_test, y_pred_log))
print('f1_score:', f1_score(y_test, y_pred_log))

accuracy: 0.5
precision: 0.5
recall: 1.0
f1_score: 0.6666666666666666


Cross validation

In [19]:
from sklearn.model_selection import cross_val_score

scores = cross_val_score(logistic_pipeline, x, y, cv=5)
print('Each cv score:', scores)
print('Average score:', scores.mean())

Each cv score: [1.  0.5 0.5 1.  1. ]
Average score: 0.8


