In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import mlflow
import pandas as pd
from tqdm import tqdm

In [3]:
TRACKING_URI = 'http://localhost:5003'
EXPERIMENT_NAME = 'first_try'

In [4]:
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME)

In [5]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier


digits = datasets.load_digits()

n_samples = len(digits.images)
X = digits.images.reshape((n_samples, -1))
y = digits.target

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.8, random_state=0)

In [6]:
cfg = {'n_estimators': 100, 
       'max_depth': 5}

In [7]:
clf = RandomForestClassifier(**cfg, random_state=0)
clf.fit(X_train, y_train)
preds = clf.predict(X_test)

In [8]:
scores = classification_report(y_test, preds, output_dict = True)

In [9]:
df = pd.json_normalize(scores, sep='_')
df = df.to_dict(orient='records')[0]
print(df)

{'accuracy': 0.9165507649513213, '0_precision': 0.9864864864864865, '0_recall': 0.9931972789115646, '0_f1-score': 0.9898305084745763, '0_support': 147, '1_precision': 0.9051094890510949, '1_recall': 0.8435374149659864, '1_f1-score': 0.8732394366197184, '1_support': 147, '2_precision': 0.9172413793103448, '2_recall': 0.9637681159420289, '2_f1-score': 0.939929328621908, '2_support': 138, '3_precision': 0.9121621621621622, '3_recall': 0.9, '3_f1-score': 0.9060402684563759, '3_support': 150, '4_precision': 0.9565217391304348, '4_recall': 0.9635036496350365, '4_f1-score': 0.9600000000000001, '4_support': 137, '5_precision': 0.9459459459459459, '5_recall': 0.9150326797385621, '5_f1-score': 0.9302325581395349, '5_support': 153, '6_precision': 0.958041958041958, '6_recall': 0.9716312056737588, '6_f1-score': 0.9647887323943661, '6_support': 141, '7_precision': 0.852760736196319, '7_recall': 0.9928571428571429, '7_f1-score': 0.9174917491749175, '7_support': 140, '8_precision': 0.9642857142857143

In [10]:
with mlflow.start_run():
    mlflow.log_params(cfg)
    mlflow.log_metrics(df)

In [11]:
import sys
sys.path.append('../')

In [13]:
from src.train_mnist import train

In [14]:
for n_estimators in tqdm([50, 200]):
    for max_depth in [3, 5, 8, 13, 21]:
        for min_samples_split in [2, 3, 5]:
            for min_samples_leaf in [1, 2, 3]:
                cfg = {'n_estimators': n_estimators,
                       'max_depth': max_depth,
                       'min_samples_split': min_samples_split,
                       'min_samples_leaf': min_samples_leaf,
                      }
                train(cfg)

100%|██████████| 3/3 [01:06<00:00, 22.12s/it]
