In [15]:
import errno
import glob
import json
import os
import re
from types import SimpleNamespace
import torch
import warnings
from lightning_objects import LightningModel
warnings.filterwarnings('ignore')
from config import Configuration
import pandas as pd
from common_utils import stratify_split, make_holdout_df, set_seeds
from train_manager import TrainManager

In [None]:
%load_ext autoreload
%autoreload 2

In [92]:
import numpy as np
a = np.array([1,2,3])
b = np.array([1,2,3])
c = np.array([4,5,6])
predictions = np.vstack((a, b))
predictions = np.vstack((predictions, c))
predictions

array([[1, 2, 3],
       [1, 2, 3],
       [4, 5, 6]])

In [93]:
out = []
out = out.append(a)
print(out)

None


In [84]:
from scipy.stats import stats
stats.mode(predictions, axis=0)[0], np.round(np.mean(predictions, axis=0), decimals=0)

(array([[1, 2, 3]]), array([2., 3., 4.]))

In [None]:
m = LightningModel(Configuration(), None, lr=0.1, fold=3,
                 fc_nodes=0, pretrained=True)
# frozen batch norm parameters
for name, p in m.named_parameters():
    print(name, p.requires_grad)

In [None]:
def main(experiment_name: str, config: Configuration, resume=False):
    experiment_dir = os.path.abspath(config.save_dir + f'/{experiment_name}')
    print('Experiment directory', experiment_dir)
    try:
        # -------- SETUP --------
        # if resuming, get checkpoint parameters
        checkpoint_params = get_checkpoint_params(experiment_dir, resume)

        if not checkpoint_params:
            make_experiment_directory(experiment_dir)

            # -------- LOAD DATA FROM TRAIN FILE --------
            data_df = pd.read_csv(config.data_dir + '/train.csv', engine='python')
            data_df, holdout_df = make_holdout_df(data_df, seed=config.seed)
            folds_df = stratify_split(data_df, config.fold_num, config.seed, config.target_col)

            # -------- SAVE FILES (for experiment state) --------
            folds_df.to_csv(experiment_dir + '/folds.csv', index=False)
            # save holdout to a csv file for final inference (so we don't run inference on training examples)
            holdout_df.to_csv(experiment_dir + '/holdout.csv', index=False)
            # save the settings for this experiment to its directory
            with open(experiment_dir + '/experiment_config.json', 'w') as f:
                json.dump(config.__dict__, f)
        else:
            print('resuming...')
            # LOAD DATA FROM SAVED FILES
            with open(experiment_dir + '/experiment_config.json', 'r') as f:
                config = json.load(f, object_hook=lambda d: SimpleNamespace(**d))
            folds_df = pd.read_csv(experiment_dir + '/folds.csv', engine='python')
            holdout_df = pd.read_csv(experiment_dir + '/holdout.csv', engine='python')

        trainer = TrainManager(experiment_name=experiment_name, experiment_dir=experiment_dir,
                               folds_df=folds_df, holdout_df=holdout_df,
                               checkpoint_params=checkpoint_params, config=config)
        trainer.run()
    finally:
        torch.cuda.empty_cache()

def make_experiment_directory(basename):
    try:
        os.makedirs(basename)
    except FileExistsError as e:
        print('Experiment already exists. Be sure to resume training appropriately or start a new experiment.')
        if e.errno == errno.EEXIST: raise


def get_checkpoint_params(basename, resume):
    """
    We can restart from the middle of a fold or start from the beginning of a fold.

    checkpoint_params: {"restart_from": fold, "start_beginning_of": fold, "checkpoint_file_path": file}
        restart_from (int): start from middle of a fold - typically used when a training session was cancelled mid fold
            checkpoint_file_path (str) is required in this case
        start_beginning_of (int): train a particular fold
    """

    checkpoint_params = None
    if resume:
        checkpoint_params = {}
        model_filenames = glob.glob(basename + '/*fold*.ckpt')
        trained_folds = [re.findall(r'fold\d+', f)[0][len('fold'):] for f in model_filenames]
        most_recent_fold = int(max(trained_folds)) if len(trained_folds) > 0 else 0

        checkpoint_params['restart_from'] = most_recent_fold
        checkpoint_params['checkpoint_file_path'] = f'{basename}/{config.model_arch}_fold{most_recent_fold}.pth'
    return checkpoint_params

In [None]:
if __name__ == '__main__':
    try:
        debug = False
        print('Running in debug mode:', debug)
        config = Configuration()
        set_seeds(config.seed)
        config.debug = debug
        main(experiment_name='exp8_sgd_frozen_batch_norm', resume=False, config=config)
    except KeyboardInterrupt:
        pass