In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import mlflow
import pandas as pd
from tqdm import tqdm
from pprint import pprint

In [3]:
import sys
sys.path.append('../')

In [4]:
from src.train_mnist import train

In [5]:
TRACKING_URI = 'http://localhost:5000'
EXPERIMENT_NAME = 'mnist'

In [6]:
#os.system("conda env export > environment.yaml")
#autocommit(file_paths=['.'], message='Another version of random forest')

In [7]:
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME, )

In [8]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier


digits = datasets.load_digits()

n_samples = len(digits.images)
X = digits.images.reshape((n_samples, -1))
y = digits.target

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.8, random_state=0)

In [9]:
cfg = {'n_estimators': 100, 
       'max_depth': 5}

In [10]:
clf = RandomForestClassifier(**cfg, random_state=0)
clf.fit(X_train, y_train)
preds = clf.predict(X_test)

In [11]:
scores = classification_report(y_test, preds, output_dict = True)

In [12]:
df = pd.json_normalize(scores, sep='_')
df = df.to_dict(orient='records')[0]
pprint(df)

{'0_f1-score': 0.9898305084745763,
 '0_precision': 0.9864864864864865,
 '0_recall': 0.9931972789115646,
 '0_support': 147,
 '1_f1-score': 0.8732394366197184,
 '1_precision': 0.9051094890510949,
 '1_recall': 0.8435374149659864,
 '1_support': 147,
 '2_f1-score': 0.939929328621908,
 '2_precision': 0.9172413793103448,
 '2_recall': 0.9637681159420289,
 '2_support': 138,
 '3_f1-score': 0.9060402684563759,
 '3_precision': 0.9121621621621622,
 '3_recall': 0.9,
 '3_support': 150,
 '4_f1-score': 0.9600000000000001,
 '4_precision': 0.9565217391304348,
 '4_recall': 0.9635036496350365,
 '4_support': 137,
 '5_f1-score': 0.9302325581395349,
 '5_precision': 0.9459459459459459,
 '5_recall': 0.9150326797385621,
 '5_support': 153,
 '6_f1-score': 0.9647887323943661,
 '6_precision': 0.958041958041958,
 '6_recall': 0.9716312056737588,
 '6_support': 141,
 '7_f1-score': 0.9174917491749175,
 '7_precision': 0.852760736196319,
 '7_recall': 0.9928571428571429,
 '7_support': 140,
 '8_f1-score': 0.8372093023255813,

In [13]:
with mlflow.start_run():
    mlflow.log_params(cfg)
    mlflow.log_metrics(df)

In [14]:
for n_estimators in tqdm([50, 200]):
    for max_depth in [10, 20, 50]:
        for min_samples_split in [2, 4]:
            for min_samples_leaf in [1, 2]:
                cfg = {'n_estimators': n_estimators,
                       'max_depth': max_depth,
                       'min_samples_split': min_samples_split,
                       'min_samples_leaf': min_samples_leaf,
                      }
                train(cfg)

  0%|          | 0/2 [00:00<?, ?it/s]

0.940830985947341
0.9400257324745269
0.9348424267386152
0.9400257324745269
0.94222720162125
0.9421449808096364
0.9363122601662223
0.9421449808096364
0.94222720162125
0.9421449808096364


 50%|█████     | 1/2 [00:02<00:02,  2.27s/it]

0.9363122601662223
0.9421449808096364
0.9448416988927469
0.9421598910468012
0.9477303692626858
0.9421598910468012
0.9469716555512907
0.94144054088634
0.9463475291449288
0.94144054088634
0.9469716555512907
0.94144054088634
0.9463475291449288


100%|██████████| 2/2 [00:07<00:00,  3.92s/it]

0.94144054088634



