# Evaluation matrices (binary classification)
## Use `precision` score in grid search


## Setting up

- Breat cancer data
- 2 classes
- 30 features
- SVC

Classes:

- `0` = Malignant - Tumor grows rapidly, invade and destroy nearby normal tissues, and spread throughout the body.
- `1` = Benign - Tumor grows slowly and do not spread.


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score ,recall_score, f1_score

# Breast cancer data
from sklearn.datasets import load_breast_cancer

# Load data
dataObj = load_breast_cancer()
X = dataObj.data
y = dataObj.target

# Splitting data
X_train, X_test, y_train, y_test = train_test_split(X, y,
    test_size=0.20,
    stratify=y,
    random_state=1)

# Standardize features
sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

# Classfier
svc = SVC(random_state=1)

- Scoring: https://scikit-learn.org/stable/modules/model_evaluation.html
- Note that when using `scoring='precision'`, default parameters will be use, which means that `pos_label=1`.

### Incorrect


In [None]:
from sklearn.model_selection import GridSearchCV

c_gamma_range = [0.01, 0.1, 1.0, 10.0]
param_range = [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0]
set1 = {'C': param_range, 'kernel': ['linear']}
set2 = {'C': param_range, 'gamma': param_range, 'kernel': ['rbf']}
param_grid = [set1, set2]

gs = GridSearchCV(estimator=svc,
                  param_grid=param_grid,
                  # This means that pos_label=1
                  scoring='precision',
                  cv=10,
                  n_jobs=-1)
                  
gs = gs.fit(X_train_std, y_train)
print(gs.best_score_)
print(gs.best_params_)

### Correct


In [None]:
from sklearn.metrics import make_scorer

# Making scorer wrapper so that we can pass the desired argument.
scorer = make_scorer(precision_score, pos_label=0)

# Grid search.
gs = GridSearchCV(estimator=svc,
                  param_grid=param_grid,
                  # Use scorer here
                  scoring=scorer,
                  cv=10,
                  n_jobs=-1)
                  
gs = gs.fit(X_train_std, y_train)
print(gs.best_score_)
print(gs.best_params_)