In [1]:
import cadprep as cpr
import nblog
from nblog import logger as lg
from nblog import NBLog as nblc
import ipynbname

In [2]:
nb_fname = ipynbname.name()

In [3]:
nbl = nblc()
lg.info(nbl.newrun)
lg.info(f'nb name: {nb_fname}')

2021-11-08 22:02:46,264 - nblog - INFO - ------------------------- NEW RUN -------------------------
2021-11-08 22:02:46,266 - nblog - INFO - nb name: telem-SVC-mixed-encoding-breast


## Kaggle entry SVC
(original was on heart data - this is adapted to breast ca data)  
https://www.kaggle.com/iadelas/heart-disease-prediction-using-svm

## Step 1 - Importing Library

In [4]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, OrdinalEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.model_selection import cross_val_score

In [5]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import plot_model

2021-11-08 22:02:46.560870: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-11-08 22:02:46.560929: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


## Step 2 - Dataset Preparation

### 2.1 Dataset Importing and Overview

In [6]:
# define the location of the dataset
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/breast-cancer.csv"
# load the dataset
dataset = pd.read_csv(url, header=None)

1. Class: no-recurrence-events, recurrence-events
2. age: 10-19, 20-29, 30-39, 40-49, 50-59, 60-69, 70-79, 80-89, 90-99.
3. menopause: lt40, ge40, premeno.
4. tumor-size: 0-4, 5-9, 10-14, 15-19, 20-24, 25-29, 30-34, 35-39, 40-44, 45-49, 50-54, 55-59.
5. inv-nodes: 0-2, 3-5, 6-8, 9-11, 12-14, 15-17, 18-20, 21-23, 24-26, 27-29, 30-32, 33-35, 36-39.
6. node-caps: yes, no.
7. deg-malig: 1, 2, 3.
8. breast: left, right.
9. breast-quad: left-up, left-low, right-up, right-low, central.
10. irradiat: yes, no.

In [7]:
colnames = ['age', 'menopause', 'tum_sz', 'inv_nodes', 'node_caps', 'deg_malig', 'lat', 'quad', 'irr', 'recurrence_class']

In [8]:
dataset.columns = colnames
dataset

Unnamed: 0,age,menopause,tum_sz,inv_nodes,node_caps,deg_malig,lat,quad,irr,recurrence_class
0,'40-49','premeno','15-19','0-2','yes','3','right','left_up','no','recurrence-events'
1,'50-59','ge40','15-19','0-2','no','1','right','central','no','no-recurrence-events'
2,'50-59','ge40','35-39','0-2','no','2','left','left_low','no','recurrence-events'
3,'40-49','premeno','35-39','0-2','yes','3','right','left_low','yes','no-recurrence-events'
4,'40-49','premeno','30-34','3-5','yes','2','left','right_up','no','recurrence-events'
...,...,...,...,...,...,...,...,...,...,...
281,'50-59','ge40','30-34','6-8','yes','2','left','left_low','no','no-recurrence-events'
282,'50-59','premeno','25-29','3-5','yes','2','left','left_low','yes','no-recurrence-events'
283,'30-39','premeno','30-34','6-8','yes','2','right','right_up','no','no-recurrence-events'
284,'50-59','premeno','15-19','0-2','no','2','right','left_low','no','no-recurrence-events'


We can see there are 286 observations with 9 features. We can also see there are no numerical features other than deg_malig (which is currently a string).

In [9]:
# deploy any clean and subset methods
lg.info(f'cadprep run')

2021-11-08 22:02:49,750 - nblog - INFO - cadprep run


### 2.2 Checking Missing Values

In [10]:
dataset.isnull().sum()

age                 0
menopause           0
tum_sz              0
inv_nodes           0
node_caps           8
deg_malig           0
lat                 0
quad                1
irr                 0
recurrence_class    0
dtype: int64

In [11]:
pd.DataFrame(dataset.isnull().sum(), columns=["Cnt"]).style.bar(color = "pink")

Unnamed: 0,Cnt
age,0
menopause,0
tum_sz,0
inv_nodes,0
node_caps,8
deg_malig,0
lat,0
quad,1
irr,0
recurrence_class,0


In [12]:
# dataset['node_caps'] = dataset['node_caps'].fillna(np.round(dataset['node_caps'].mean(), 0))  # impute with mean
drop_idx1 = dataset[dataset.node_caps.isnull()].index.to_list()
drop_idx2 = dataset[dataset.quad.isnull()].index.to_list()
drop_idx = drop_idx1 + drop_idx2
print(len(drop_idx))
lg.info(f'cleaning by dropping {len(drop_idx)} records')
drop_idx

2021-11-08 22:02:49,872 - nblog - INFO - cleaning by dropping 9 records


9


[20, 31, 50, 54, 71, 92, 149, 264, 240]

In [13]:
dataset.drop(index=drop_idx, inplace=True)

In [14]:
lg.info(f'cleaned input data: {type(dataset).__name__} {dataset.shape}')

2021-11-08 22:02:49,924 - nblog - INFO - cleaned input data: DataFrame (277, 10)


### 2.3 Encoding Categorical Features - binary

In [15]:
dataset.select_dtypes(exclude="number").nunique()

age                  6
menopause            3
tum_sz              11
inv_nodes            7
node_caps            2
deg_malig            3
lat                  2
quad                 5
irr                  2
recurrence_class     2
dtype: int64

Cols 4, 6, 8 & 9 (node_caps, lat, irr and class) can be encoded with the straight binary label encoder. Rest can be done with one-hot or ordinal.

In [16]:
le = LabelEncoder()
dataset['node_caps']=le.fit_transform(dataset['node_caps'])
dataset['lat']=le.fit_transform(dataset['lat'])
dataset['irr']=le.fit_transform(dataset['irr'])
dataset['recurrence_class']=le.fit_transform(dataset['recurrence_class'])
lg.info(f'simple encoding on binary categories')
dataset

2021-11-08 22:02:49,977 - nblog - INFO - simple encoding on binary categories


Unnamed: 0,age,menopause,tum_sz,inv_nodes,node_caps,deg_malig,lat,quad,irr,recurrence_class
0,'40-49','premeno','15-19','0-2',1,'3',1,'left_up',0,1
1,'50-59','ge40','15-19','0-2',0,'1',1,'central',0,0
2,'50-59','ge40','35-39','0-2',0,'2',0,'left_low',0,1
3,'40-49','premeno','35-39','0-2',1,'3',1,'left_low',1,0
4,'40-49','premeno','30-34','3-5',1,'2',0,'right_up',0,1
...,...,...,...,...,...,...,...,...,...,...
281,'50-59','ge40','30-34','6-8',1,'2',0,'left_low',0,0
282,'50-59','premeno','25-29','3-5',1,'2',0,'left_low',1,0
283,'30-39','premeno','30-34','6-8',1,'2',1,'right_up',0,0
284,'50-59','premeno','15-19','0-2',0,'2',1,'left_low',0,0


For col 4 node_caps  yes = 1,   no = 0  
For col 6 lat right = 1, left = 0   
For col 8 irr  yes = 1,   no = 0  
For col 9 class recurrence = 1

### 2.4a Encoding Categorical Features ordinal

In [17]:
oe = OrdinalEncoder()
dataset['deg_malig_oe']=oe.fit_transform(dataset['deg_malig'].to_frame())
dataset.drop(columns='deg_malig', inplace=True)
lg.info(f'ordinal encoding on suitable categories')

2021-11-08 22:02:50,015 - nblog - INFO - ordinal encoding on suitable categories


In [18]:
dataset

Unnamed: 0,age,menopause,tum_sz,inv_nodes,node_caps,lat,quad,irr,recurrence_class,deg_malig_oe
0,'40-49','premeno','15-19','0-2',1,1,'left_up',0,1,2.0
1,'50-59','ge40','15-19','0-2',0,1,'central',0,0,0.0
2,'50-59','ge40','35-39','0-2',0,0,'left_low',0,1,1.0
3,'40-49','premeno','35-39','0-2',1,1,'left_low',1,0,2.0
4,'40-49','premeno','30-34','3-5',1,0,'right_up',0,1,1.0
...,...,...,...,...,...,...,...,...,...,...
281,'50-59','ge40','30-34','6-8',1,0,'left_low',0,0,1.0
282,'50-59','premeno','25-29','3-5',1,0,'left_low',1,0,1.0
283,'30-39','premeno','30-34','6-8',1,1,'right_up',0,0,1.0
284,'50-59','premeno','15-19','0-2',0,1,'left_low',0,0,1.0


### 2.4b Encoding Categorical Features - one-hot

In [19]:
dataset = pd.get_dummies(dataset, drop_first=False)
lg.info(f'one-hot encoding on remaining (unordered) categories')
dataset

2021-11-08 22:02:50,075 - nblog - INFO - one-hot encoding on remaining (unordered) categories


Unnamed: 0,node_caps,lat,irr,recurrence_class,deg_malig_oe,age_'20-29',age_'30-39',age_'40-49',age_'50-59',age_'60-69',...,inv_nodes_'15-17',inv_nodes_'24-26',inv_nodes_'3-5',inv_nodes_'6-8',inv_nodes_'9-11',quad_'central',quad_'left_low',quad_'left_up',quad_'right_low',quad_'right_up'
0,1,1,0,1,2.0,0,0,1,0,0,...,0,0,0,0,0,0,0,1,0,0
1,0,1,0,0,0.0,0,0,0,1,0,...,0,0,0,0,0,1,0,0,0,0
2,0,0,0,1,1.0,0,0,0,1,0,...,0,0,0,0,0,0,1,0,0,0
3,1,1,1,0,2.0,0,0,1,0,0,...,0,0,0,0,0,0,1,0,0,0
4,1,0,0,1,1.0,0,0,1,0,0,...,0,0,1,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
281,1,0,0,0,1.0,0,0,0,1,0,...,0,0,0,1,0,0,1,0,0,0
282,1,0,1,0,1.0,0,0,0,1,0,...,0,0,1,0,0,0,1,0,0,0
283,1,1,0,0,1.0,0,1,0,0,0,...,0,0,0,1,0,0,0,0,0,1
284,0,1,0,0,1.0,0,0,0,1,0,...,0,0,0,0,0,0,1,0,0,0


One-Hot Encoded the remaining categorical features.

### 2.5 Dataset Reordering

In [20]:
df = dataset.copy()
# dataset = df.copy()
# df

In [21]:
rec_class = dataset.recurrence_class
rec_class

0      1
1      0
2      1
3      0
4      1
      ..
281    0
282    0
283    0
284    0
285    0
Name: recurrence_class, Length: 277, dtype: int64

In [22]:
dataset = dataset.drop(columns='recurrence_class')

In [23]:
dataset.insert(loc=len(dataset.columns), column='y', value=rec_class)

In [24]:
dataset

Unnamed: 0,node_caps,lat,irr,deg_malig_oe,age_'20-29',age_'30-39',age_'40-49',age_'50-59',age_'60-69',age_'70-79',...,inv_nodes_'24-26',inv_nodes_'3-5',inv_nodes_'6-8',inv_nodes_'9-11',quad_'central',quad_'left_low',quad_'left_up',quad_'right_low',quad_'right_up',y
0,1,1,0,2.0,0,0,1,0,0,0,...,0,0,0,0,0,0,1,0,0,1
1,0,1,0,0.0,0,0,0,1,0,0,...,0,0,0,0,1,0,0,0,0,0
2,0,0,0,1.0,0,0,0,1,0,0,...,0,0,0,0,0,1,0,0,0,1
3,1,1,1,2.0,0,0,1,0,0,0,...,0,0,0,0,0,1,0,0,0,0
4,1,0,0,1.0,0,0,1,0,0,0,...,0,1,0,0,0,0,0,0,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
281,1,0,0,1.0,0,0,0,1,0,0,...,0,0,1,0,0,1,0,0,0,0
282,1,0,1,1.0,0,0,0,1,0,0,...,0,1,0,0,0,1,0,0,0,0
283,1,1,0,1.0,0,1,0,0,0,0,...,0,0,1,0,0,0,0,0,1,0
284,0,1,0,1.0,0,0,0,1,0,0,...,0,0,0,0,0,1,0,0,0,0


Re-ordered the label column into the rightmost of the dataset.

In [25]:
dataset.columns

Index(['node_caps', 'lat', 'irr', 'deg_malig_oe', 'age_'20-29'', 'age_'30-39'',
       'age_'40-49'', 'age_'50-59'', 'age_'60-69'', 'age_'70-79'',
       'menopause_'ge40'', 'menopause_'lt40'', 'menopause_'premeno'',
       'tum_sz_'0-4'', 'tum_sz_'10-14'', 'tum_sz_'15-19'', 'tum_sz_'20-24'',
       'tum_sz_'25-29'', 'tum_sz_'30-34'', 'tum_sz_'35-39'', 'tum_sz_'40-44'',
       'tum_sz_'45-49'', 'tum_sz_'5-9'', 'tum_sz_'50-54'', 'inv_nodes_'0-2'',
       'inv_nodes_'12-14'', 'inv_nodes_'15-17'', 'inv_nodes_'24-26'',
       'inv_nodes_'3-5'', 'inv_nodes_'6-8'', 'inv_nodes_'9-11'',
       'quad_'central'', 'quad_'left_low'', 'quad_'left_up'',
       'quad_'right_low'', 'quad_'right_up'', 'y'],
      dtype='object')

## Step 3 - Data Preprocessing

### 3.1 Convert Dataset to np array

In [26]:
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [27]:
print(X)

[[1. 1. 0. ... 1. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [1. 1. 0. ... 0. 0. 1.]
 [0. 1. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 1.]]


In [28]:
print(y)

[1 0 1 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 1 1 0 0 1 0 0 0 0
 0 1 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 1 1 0 0 0 1 0 1 0 1 1 0 0 0 1
 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 1 1 1 1 0 0 0 1 1 1 0 0 0 0 0 0 0 0 1
 0 0 0 1 0 0 1 0 0 0 0 1 0 0 0 1 1 0 1 0 0 1 0 1 0 1 0 0 0 1 0 0 1 0 0 1 0
 0 1 1 0 0 0 0 0 0 1 1 1 0 0 0 1 1 0 1 0 0 0 1 1 0 0 1 0 1 0 0 0 1 0 0 1 0
 1 0 0 0 1 0 0 1 0 0 0 0 0 1 0 1 1 0 1 1 0 0 0 0 1 0 1 0 0 0 0 0 1 0 0 0 0
 1 0 0 0 0 0 1 0 1 1 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 1 0 1 0 0 0 1 0 0 1
 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0]


Separated the features as x and the dependant variable as y. Both is transformed to numpy array for modelling function to work.

### 3.1 Separate Training and Test Sets

In [29]:
testsize=0.3
randomstate=0
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=testsize, random_state=randomstate)
lg.info(f'train-test split. Size: {testsize}, Rand state: {randomstate}')

2021-11-08 22:02:50,310 - nblog - INFO - train-test split. Size: 0.3, Rand state: 0


In [30]:
print(X_train)

[[0. 1. 0. ... 1. 0. 0.]
 [0. 0. 1. ... 1. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [1. 0. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 1. 0. 0.]
 [0. 1. 0. ... 0. 0. 1.]]


In [31]:
print(y_train)

[0 0 0 0 0 1 1 1 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 1 1 0
 1 0 0 0 1 1 1 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 1 1 0 1 0 0 0 0 0 0 1 1 1 0
 0 0 1 0 0 0 0 1 0 0 0 0 1 1 1 1 1 1 0 0 1 0 0 1 0 1 0 1 1 0 0 0 0 1 0 1 0
 1 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 1 0 0 0 1 1 1 1 1 0
 0 0 0 1 1 0 0 1 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1
 1 0 0 1 1 1 0 0]


In [32]:
print(X_test)

[[0. 1. 0. ... 1. 0. 0.]
 [0. 1. 0. ... 0. 1. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [1. 1. 0. ... 1. 0. 0.]]


In [33]:
print(y_test)

[0 0 0 0 1 1 0 0 0 0 0 1 0 0 0 0 0 1 0 1 0 0 0 1 0 1 0 0 1 1 1 1 0 0 0 0 1
 0 0 0 1 1 0 0 0 0 0 0 0 0 1 1 0 1 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 1 1 0 0 1
 0 0 0 0 0 1 0 0 0 0]


In [34]:
lg.info(f'encoded training data: {type(X_train).__name__} {X_train.shape}')

2021-11-08 22:02:50,847 - nblog - INFO - encoded training data: ndarray (193, 36)


### 3.2 Scaling Based on the Training Set

In [35]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [36]:
print(X_train)

[[-0.49513765  1.05871579 -0.55141097 ...  1.41972709 -0.25748465
  -0.36782349]
 [-0.49513765 -0.94454056  1.8135294  ...  1.41972709 -0.25748465
  -0.36782349]
 [-0.49513765 -0.94454056 -0.55141097 ... -0.70436073 -0.25748465
  -0.36782349]
 ...
 [ 2.01964041 -0.94454056 -0.55141097 ... -0.70436073 -0.25748465
  -0.36782349]
 [-0.49513765  1.05871579 -0.55141097 ...  1.41972709 -0.25748465
  -0.36782349]
 [-0.49513765  1.05871579 -0.55141097 ... -0.70436073 -0.25748465
   2.71869534]]


In [37]:
print(X_test)

[[-0.49513765  1.05871579 -0.55141097 ...  1.41972709 -0.25748465
  -0.36782349]
 [-0.49513765  1.05871579 -0.55141097 ... -0.70436073  3.88372673
  -0.36782349]
 [-0.49513765 -0.94454056 -0.55141097 ... -0.70436073 -0.25748465
  -0.36782349]
 ...
 [-0.49513765 -0.94454056 -0.55141097 ... -0.70436073 -0.25748465
  -0.36782349]
 [-0.49513765 -0.94454056 -0.55141097 ... -0.70436073 -0.25748465
  -0.36782349]
 [ 2.01964041  1.05871579 -0.55141097 ...  1.41972709 -0.25748465
  -0.36782349]]


Scale the value of training set to between -3 and 3 to make sure no feature overwhelm the others. The test set is scaled using same scale as training set.

## Step 4 - Classification Model Building using SVC with RBF Kernel

### 4.1 Model Building and Training with the Training Set

In [38]:
kernel='rbf'
classifier = SVC(kernel=kernel, random_state=randomstate)
lg.info(f'{classifier} kernel: {kernel}, random state: {randomstate}')
classifier.fit(X_train, y_train)

2021-11-08 22:02:50,923 - nblog - INFO - SVC(random_state=0) kernel: rbf, random state: 0


SVC(random_state=0)

### 4.2 Predicting the Test Set Result

In [39]:
y_hat = classifier.predict(X_test)

In [40]:
y_test

array([0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0,
       0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0])

In [41]:
y_hat

array([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [42]:
result_np = np.concatenate((y_hat.reshape(len(y_hat), 1), (y_test.reshape(len(y_test), 1))), 1)
result = pd.DataFrame(result_np, columns=['Prediction', 'Real_Value'])
result

Unnamed: 0,Prediction,Real_Value
0,0,0
1,0,0
2,0,0
3,0,0
4,0,1
...,...,...
79,0,1
80,0,0
81,0,0
82,0,0


We can see some wrong predictions. But we need a broader view from metrics.

## Step 5 - Measuring Model Accuracy

### 5.1 Get the Accuracy Report

In [43]:
print(classification_report(y_test, y_hat))

              precision    recall  f1-score   support

           0       0.74      0.90      0.81        61
           1       0.40      0.17      0.24        23

    accuracy                           0.70        84
   macro avg       0.57      0.54      0.53        84
weighted avg       0.65      0.70      0.66        84



We got accuracy of 69% using a SVC model with RBF kernel.

### 5.2 Get the Accuracy Report with K-Fold Cross Validation

In [44]:
val_score = cross_val_score(estimator=classifier, X = X_train, y=y_train, cv=10)
# print("Accuracy: {:.2f} %".format(val_score.mean()*100))
# print("Std. Dev: {:.2f} %".format(val_score.std()*100))
lg.info(f'accuracy: {val_score.mean()*100:.2f}')
lg.info(f' std dev: {val_score.std()*100:.2f}')

2021-11-08 22:02:51,087 - nblog - INFO - accuracy: 76.16
2021-11-08 22:02:51,088 - nblog - INFO -  std dev: 5.25


The accuracy measured using 10-Fold Cross Validation is 65-75% using SVC model with RBF kernel.

In [45]:
conf_mat = confusion_matrix(y_test, y_hat)
(tn, fp, fn, tp) = conf_mat.ravel()
print('       | pred n',  '| pred p')
print('-------------------------')
print('cond n | tn', tn, ' | fp', fp)
print('cond p | fn', fn, ' | tp', tp)

       | pred n | pred p
-------------------------
cond n | tn 55  | fp 6
cond p | fn 19  | tp 4


In [46]:
precision = tp/(tp+fp) # PPV
recall    = tp/(tp+fn) # sensitivity

lg.info(f' precision: {precision:.2f}')
lg.info(f'    recall: {recall:.2f}')

2021-11-08 22:02:51,118 - nblog - INFO -  precision: 0.40
2021-11-08 22:02:51,119 - nblog - INFO -     recall: 0.17
