# Functions for running training loops

## K-Fold Training and Cross-Validation

In [None]:
#default_exp trainers
#export
import datetime
import os
import tempfile

import torch
import pandas as pd
import pytorch_lightning as lit
import wandb
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint

from reappraisalmodel.lightningreapp import LightningReapp

# Default configuration for hyperparameters
default_config = {
    'lr': 1e-3,
    'hidden_layer_size': 50
    }


def kfold_train(k: int, ldhdata, strat, **trainer_kwargs) -> None:
    """Fits a LightningReapp instance with k-fold cross-validation.
    Args:
        k (int):
        ldhdata : See `reappraisalmodel.ldhdata.LDHDataModule`
    """
    all_metrics = []

    early_stop_checkpoint = EarlyStopping(
      monitor='val_loss',
      mode='min',
      min_delta=0.001,
      patience=5,
      verbose=False
    )

    max_epochs = trainer_kwargs.pop('max_epochs', 20)
    gpus = trainer_kwargs.pop('gpus', 1 if torch.cuda.is_available() else None)

    today = datetime.date.today().strftime('%Y%m%d-%h%m%s')

    with tempfile.TemporaryDirectory() as tempdir:
        print(f'Created temporary directory: {tempdir}')

        for i in range(k):
            # Select the dataloaders for the given split.
            split = i
            train_dl = ldhdata.get_train_dataloader(split)
            val_dl = ldhdata.get_val_dataloader(split)

            callback_checkpoint = ModelCheckpoint(
                monitor='val_loss',
                mode='min',
                dirpath=tempdir,
                filename=f'{str(today)}-{strat}' + '-{epoch:2d}-{val_loss:.02f}',
                verbose=False,
                save_last=False,
                save_top_k=1,
                save_weights_only=False,
                period=1, # once every epoch
            )

            model = LightningReapp(default_config)
            trainer = lit.Trainer(
                gpus = gpus,
                gradient_clip_val=1.0,
                progress_bar_refresh_rate=30,
                max_epochs=max_epochs,
                terminate_on_nan=True,
                num_sanity_val_steps= 2 if split == 0 else 0,
                callbacks=[callback_checkpoint, early_stop_checkpoint],
                **trainer_kwargs)
            print(f"Training on split {i + 1}")
            trainer.fit(model, train_dl, val_dl)
            all_metrics.append({
                'metrics': trainer.logged_metrics,
                'checkpoint': callback_checkpoint.best_model_path,
                'num_epochs': trainer.current_epoch
            })
            
            

        bucket = boto3.Session().resource('s3').Bucket('ldhdata')
        print(f"s3 Bucket: {bucket}")

        all_metrics = []
        for split in all_metrics:
            val_loss = split['metrics']['val_loss'].item()
            train_loss = split['metrics']['train_loss'].item()
            num_epochs = split['num_epochs']

            ckpt_path = split['checkpoint']
            filename = os.path.split(ckpt_path)[-1]
            
            upload_file()
            bucket.Object(ckpt_path).upload_file(os.path.join('outputs', filename))

            row = [val_loss, train_loss, num_epochs]
            print(row)
            all_metrics.append(row)
    return all_metrics

## Hyperparameter Tuning

Sources:
- [Scaling Up PyTorch Lightning Hyperparameter Tuning w/ Ray](https://medium.com/distributed-computing-with-ray/scaling-up-pytorch-lightning-hyperparameter-tuning-with-ray-tune-4bd9e1ff9929
)

In [None]:
# export
from functools import partial
from argparse import ArgumentParser

import torch
import pytorch_lightning as lit
from ray.tune.integration.pytorch_lightning import TuneReportCallback
from ray import tune

from reappraisalmodel.lightningreapp import LightningReapp

parser = ArgumentParser()


callback_tuner = TuneReportCallback(
    {
        "loss": "val_loss",
        # "mean_accuracy": "val_accuracy"
    },
    on="validation_end",
)

default_tune_config = {
    "lr": tune.loguniform(1e-4, 1e-1), # loguniform samples by magnitude
    "hidden_layer_size": tune.quniform(10, 50, 1)
}

### TUNING HYPERPARAMETERS
def train_tune(config, **tuner_kwargs):
    model = LightningReapp(config)

    max_epochs = tuner_kwargs.get('max_epochs', 10)
    trainer = lit.Trainer(
        num_folds=3,
        fast_dev_run=1,
        max_epochs=max_epochs,
        gpus= 1 if torch.cuda.is_available() else None,
        progress_bar_refresh_rate=30,
        callbacks=[callback_tuner],
    )
    trainer.fit(model, ldhdata)


# tune.run(train_tune, config=default_tune_config, num_samples=2)

In [None]:
#export
import boto3
from botocore.exceptions import ClientError

def upload_file(file_name, bucket, object_name=None):
    """Upload a file to an S3 bucket

    :param file_name: File to upload
    :param bucket: Bucket to upload to
    :param object_name: S3 object name. If not specified then file_name is used
    :return: True if file was uploaded, else False
    """

    # If S3 object_name was not specified, use file_name
    if object_name is None:
        object_name = file_name

    # Upload the file
    s3_client = boto3.client('s3')
    try:
        response = s3_client.upload_file(file_name, bucket, object_name)
    except ClientError as e:
        logging.error(e)
        return False
    return True

In [None]:
!nbdev_build_lib

Converted LDHData.ipynb.
Converted LightningReapp.ipynb.
Converted Trainers.ipynb.
Converted Untitled.ipynb.
