In [None]:
%pip install -r requirements.txt

In [None]:
base_dir: bool = False

In [None]:
import os, sys

if not base_dir:
    os.chdir('../')
    base_dir = True

import numpy as np, pandas as pd, matplotlib.pyplot as plt, sklearn

from sklearn.metrics import accuracy_score

from utils.data_tools import inbreast, cbis_ddsm, image_processing
from utils.models import models

In [2]:
inbreast.download()
cbis_ddsm.download()

[32mPath already exists for INBreast![0m
[32mPath already exists for CBIS-DDSM![0m


In [3]:
# Get training features from cbis_ddsm and validation data from inbreast

class cbis_ddsm_data:

    x_train, x_test, y_train, y_test = data = cbis_ddsm.get_xy()

class inbreast_data:

    x_train, x_test, y_train, y_test = inbreast.get_xy()

    x, y = np.concat([x_train, x_test]), np.concat([y_train, y_test])

In [22]:
# Create model wrapper

M = models(cbis_ddsm_data.data)

In [6]:
searchable_space = {
    "max_depth": [None, 5, 10, 20, 50],
    'n_estimators': [50, 100, 200, 500],
    "min_samples_leaf": [5, 10, 20, 50],
    "max_leaf_nodes": [None, 5, 10, 20, 50],
    "max_samples": [None, 0.2, 0.5, 0.8],
}

best_model, saves = M.grid_search(M.random_forest, **searchable_space)

Random Forest Accuracy: 0.8811188811188811


In [13]:
# Cutoff for model validation on INBreast
cutoff: float = 0.9

validation_search_params = []

for params, score in saves:

    if score > cutoff:

        validation_search_params.append(params)

In [15]:
print(f"{(num_val_params := validation_search_params.__len__())}")

109


In [17]:
# Create search models for validation score

from IPython.display import clear_output

save_val = []

for i, vparam in enumerate(validation_search_params):

    print(f"{i + 1}/{num_val_params}")

    clear_output(wait=True)

    model = M.random_forest(**vparam)

    y_pred = model.predict(inbreast_data.x)

    acc = accuracy_score(inbreast_data.y, y_pred)

    save_val.append((vparam, acc))

Random Forest Accuracy: 0.9003496503496503


In [21]:
# Show acc on validation

print(*sorted(save_val, key=lambda s: s[1], reverse=True)[0:5], sep='\n')

({'max_depth': 10, 'n_estimators': 50, 'min_samples_leaf': 5, 'max_leaf_nodes': 50, 'max_samples': None}, 0.8728606356968215)
({'max_depth': None, 'n_estimators': 50, 'min_samples_leaf': 5, 'max_leaf_nodes': 50, 'max_samples': None}, 0.8484107579462102)
({'max_depth': 10, 'n_estimators': 100, 'min_samples_leaf': 5, 'max_leaf_nodes': 50, 'max_samples': None}, 0.8484107579462102)
({'max_depth': 20, 'n_estimators': 50, 'min_samples_leaf': 5, 'max_leaf_nodes': 50, 'max_samples': None}, 0.8484107579462102)
({'max_depth': 50, 'n_estimators': 50, 'min_samples_leaf': 5, 'max_leaf_nodes': 50, 'max_samples': None}, 0.8484107579462102)


In [None]:
# Save