# 02-Exploration and extension to pytorch lightning 

Goal of this notebook is to just explore the dataset, set up some basic utilities 

In [1]:
%%bash
pip install attrdict
pip install timm
pip install pytorch-lightning==1.4.0

Collecting attrdict
  Downloading attrdict-2.0.1-py2.py3-none-any.whl (9.9 kB)
Installing collected packages: attrdict
Successfully installed attrdict-2.0.1
Collecting timm
  Downloading timm-0.4.12-py3-none-any.whl (376 kB)
Installing collected packages: timm
Successfully installed timm-0.4.12
Collecting pytorch-lightning==1.4.0
  Downloading pytorch_lightning-1.4.0-py3-none-any.whl (913 kB)
Installing collected packages: pytorch-lightning
  Attempting uninstall: pytorch-lightning
    Found existing installation: pytorch-lightning 1.4.4
    Uninstalling pytorch-lightning-1.4.4:
      Successfully uninstalled pytorch-lightning-1.4.4
Successfully installed pytorch-lightning-1.4.0




In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load


# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session


import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import random
import matplotlib.pyplot as plt
import os
import tqdm

import seaborn as sns
from torchvision.io import read_image
import torchvision.transforms as T
from torchvision.utils import make_grid
from attrdict import AttrDict
import torch
import yaml
from sklearn.model_selection import StratifiedKFold
import copy
import pickle
# from tqdm import tqdm_notebook

# additional lightning 

import pytorch_lightning as pl
from pytorch_lightning.utilities.seed import seed_everything
from pytorch_lightning import callbacks
from pytorch_lightning.callbacks.progress import ProgressBarBase
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning import LightningDataModule, LightningModule

In [None]:
# import utility (this is for kaggle kernel)
# if you are in actual notebook use this -----
# import sys
# sys.path.append("../")
# from pawnet.utility import *


# if in kaggle
# you need to import the utility script by uploading to kaggle as pawnet_utility/pawnet_utility.py
from pawnet_utility import *

In [3]:
# check the package version to get reproducible env 
# source: https://www.kaggle.com/rtatman/get-the-versions-of-imported-packages

"""
To be used for kaggle notebook
"""

import pkg_resources
import types
def get_imports():
    for name, val in globals().items():
        if isinstance(val, types.ModuleType):
            # Split ensures you get root package, 
            # not just imported function
            name = val.__name__.split(".")[0]

        elif isinstance(val, type):
            name = val.__module__.split(".")[0]

        # Some packages are weird and have different
        # imported names vs. system names
        if name == "PIL":
            name = "Pillow"
        elif name == "sklearn":
            name = "scikit-learn"

        yield name
imports = list(set(get_imports()))

requirements = []
for m in pkg_resources.working_set:
    if m.project_name in imports and m.project_name!="pip":
        requirements.append((m.project_name, m.version))

for r in requirements:
    print("{}=={}".format(*r))

tqdm==4.62.1
torchvision==0.8.2+cu110
torch==1.7.1+cu110
seaborn==0.11.2
scikit-learn==0.23.2
pandas==1.2.5
numpy==1.19.5
matplotlib==3.4.3


In [4]:



"""
To add to utility.py
"""

# def seed_everything(seed=1234):
#     """
#     Utility function to seed everything
#     source: https://www.kaggle.com/bminixhofer/deterministic-neural-networks-using-pytorch
#     """
#     random.seed(seed)
#     os.environ['PYTHONHASHSEED'] = str(seed)
#     np.random.seed(seed)
#     torch.manual_seed(seed)
#     torch.cuda.manual_seed(seed)
#     torch.backends.cudnn.deterministic = True

    

def read_yaml(filename):
    """
    Read yaml configuation and returns the dict

    Parameters
    ----------
    filename: string
        Path including yaml file name
    """

    with open(filename) as f:
        config = yaml.safe_load(f)

    return config


    
# configs

# config is different in kaggle


class BaseConfigLoader:
    
    def __init__(self,config_path):
        self.config = read_yaml(config_path)
            
    def load_config(self):
        return AttrDict(self.config)

In [5]:
# load config
# this object manages all the configurations

base_config_manager = BaseConfigLoader("../input/config/config.yaml")

# TODO: edit this for each base image model
model_config = base_config_manager.model.densenet121

# Loading Data

We will load the data by creating torch datasets as well as dataloader

In [6]:
# this is specific to kaggle
# if running in GCS, replace with our GCP bucket 
# get cache location of the dataset 
# GCS_DS_PATH = KaggleDatasets().get_gcs_path()
file_path = base_config_manager.load_config().filepath.kaggle #"/kaggle/input/petfinder-pawpularity-score/"


train_df = pd.read_csv(os.path.join(file_path,"train.csv"))
test_df = pd.read_csv(os.path.join(file_path,"test.csv"))

In [7]:
train_df.head()

Unnamed: 0,Id,Subject Focus,Eyes,Face,Near,Action,Accessory,Group,Collage,Human,Occlusion,Info,Blur,Pawpularity
0,0007de18844b0dbbb5e1f607da0606e0,0,1,1,1,0,0,1,0,0,0,0,0,63
1,0009c66b9439883ba2750fb825e1d7db,0,1,1,0,0,0,0,0,0,0,0,0,42
2,0013fd999caf9a3efe1352ca1b0d937e,0,1,1,1,0,0,0,0,1,1,0,0,28
3,0018df346ac9c1d8413cfcc888ca8246,0,1,1,1,0,0,0,0,0,0,0,0,15
4,001dc955e10590d3ca4673f034feeef2,0,0,0,1,0,0,1,0,0,0,0,0,72


In [8]:
test_df.head()

Unnamed: 0,Id,Subject Focus,Eyes,Face,Near,Action,Accessory,Group,Collage,Human,Occlusion,Info,Blur
0,4128bae22183829d2b5fea10effdb0c3,1,0,1,0,0,1,1,0,0,1,0,1
1,43a2262d7738e3d420d453815151079e,0,1,0,0,0,0,1,1,0,0,0,0
2,4e429cead1848a298432a0acad014c9d,0,0,0,1,0,1,1,1,0,1,1,1
3,80bc3ccafcc51b66303c2c263aa38486,1,0,1,0,0,0,0,0,0,0,1,0
4,8f49844c382931444e68dffbe20228f4,1,1,1,0,1,1,0,1,0,1,1,0


In [9]:
class pawnetDataset(torch.utils.data.Dataset):
    """
    Dataset
    Based on template https://pytorch.org/tutorials/beginner/basics/data_tutorial.html
    """
    def __init__(self,annotation_df, img_dir,transform=None,target_transform=None,test=False,custom_len=None):
        self.annotation_df = annotation_df
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform
        self.test=test # if dataset contains labels
        self.custom_len=custom_len # if we want to define our own epoch
        
        
    def __len__(self):
        """Define 1 epoch"""
        if self.custom_len is None:
            return len(self.annotation_df)
        else:
            return self.custom_len
    
    def __getitem__(self,idx):
        """called batch num of times"""
        img_path = os.path.join(self.img_dir, self.annotation_df.iloc[idx, 0]) # ID is column index 0
        image = read_image(img_path+".jpg")
        if self.test:
            label = 0
        else:
            label = self.annotation_df.iloc[idx, 13] # Pawpularity is column index 13
            
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label

# Train basic models 

In [10]:
import torch
from torch import nn
import torch.nn.functional as F
from timm import create_model


In [11]:


class pawNetBasic(pl.LightningModule):
    """
    First cut basic pawNet model
    we will improve on this - this serves as skeleton code
    for other models
    
    timm contains collection of several pretrained models
    
    This is a lightning variant *
    
    
    lightning model requires the following methods:
    1. forward 
    2. training_step (logic inside the iteration loop) , validation_step, test_step (not stable on tpu)
    3. training_epoch_end, validation_epoch_end
    4. configure_optimizers 
    
    other configurable list here https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html
    
    """
    
    def __init__(self,criterion, dropout=0.5,lr=0.00001):
        super().__init__()
        self.dropout = dropout
        self.lr = lr
        self._criterion = criterion        
        # initialize layers
        # https://fastai.github.io/timmdocs/tutorial_feature_extractor
        # remove FCL by setting num_classes=0
        self.pretrained = create_model(
            model_config.pretrained, 
            pretrained=True, 
            num_classes=0, 
            in_chans=3
        )
        
        # create layers based on pretrained model selected (this affects the feature map size)
        # self.global_avg_pooling = torch.nn.AdaptiveAvgPool2d(1) # timm pretrain comes with pooling    
        self.linear_1 = torch.nn.Linear(in_features=model_config.feature_map_out_size,out_features=1000)
        self.prelu = torch.nn.PReLU()
        self.linear_2 = torch.nn.Linear(in_features=1000,out_features=1)
        
    def forward(self,x):
        out = self.pretrained(x)
#         out = out.view(out.size(0), -1) # reshape for linear. timm already returns with CHW flatten
        out = torch.nn.Dropout(self.dropout)(out)
        out = self.linear_1(out)
        out = self.prelu(out)
        out = self.linear_2(out)
        
        
        
        return out
    
    
    def training_step(self, batch, batch_idx):
        """
        logic instead batch loop
        """
        loss, pred, labels = self.__share_step(batch, 'train')
        
        return {'loss': loss, 'pred': pred, 'labels': labels}
        
    def validation_step(self, batch, batch_idx):
        """
        logic instead batch loop for validation
        """
        
        loss, pred, labels = self.__share_step(batch, 'val')
        return {'loss': loss, 'pred': pred, 'labels': labels}
    
    def __share_step(self, batch, mode):
        images, labels = batch
        labels = labels.float() / 100.0
        
        logits = self.forward(images).squeeze(1)
        loss = self._criterion(logits, labels)
        
        # return logloss for training mode, scaled for others
        pred = logits.sigmoid().detach().cpu() * 100.
        labels = labels.detach().cpu() * 100.
        return loss, pred, labels
        
    def training_epoch_end(self, outputs):
        """
        called every end of epoch, contains logic
        at end of epoch
        """
        self.__share_epoch_end(outputs, mode = 'train')

    def validation_epoch_end(self, outputs):
        self.__share_epoch_end(outputs, mode = 'val')    
        
    def __share_epoch_end(self, outputs, mode):
        """
        output is a list of output defined in
        `training_step` as well as `validation_step`.
        Need to iterate through each iteration's output.
        the output was a dict
        """
        preds = []
        labels = []
        losses = []
        for out in outputs:
            pred, label, loss = out['pred'], out['labels'], out["loss"]
            preds.append(pred)
            labels.append(label)
            losses.append(loss.view(-1,1))
        preds = torch.cat(preds)
        labels = torch.cat(labels)
        losses = torch.cat(losses)
        if mode == "train":
            loglogss_metrics = losses.mean() # average logloss across iterations
            self.log(f'{mode}_logloss', loglogss_metrics, prog_bar=True)
        else:
            print(f"{mode}: skip logging for logloss")
            
        # RMSE
        metrics = torch.sqrt(((labels - preds) ** 2).mean())
        # https://pytorch-lightning.readthedocs.io/en/stable/extensions/logging.html
        # automatic accumulation at end of epoch for training, true always for test,validation loops
        self.log(f'{mode}_RMSE_loss', metrics, prog_bar=True)
        
        
    def configure_optimizers(self):
        """
        https://pytorch-lightning.readthedocs.io/en/latest/api/pytorch_lightning.core.lightning.html#pytorch_lightning.core.lightning.LightningModule.configure_optimizers
        
        Any of these 6 options.

        Single optimizer.

        List or Tuple of optimizers.

        Two lists - The first list has multiple optimizers, and the second has multiple LR schedulers (or multiple lr_scheduler_config).

        Dictionary, with an "optimizer" key, and (optionally) a "lr_scheduler" key whose value is a single LR scheduler or lr_scheduler_config.

        Tuple of dictionaries as described above, with an optional "frequency" key.

        None - Fit will run without any optimizer.
        """
        #opt = torch.optim.Adam(self.parameters())
        # TODO: add learning rate to config
        opt = torch.optim.AdamW(self.parameters(),lr=self.lr)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(opt,T_0=20,eta_min=1e-4)
  
        return [opt]

# Preparing for training - lightning variant 

* create relevant transformations, validation splits
* what this version differs is that we will be using the pytorch lightning framework - this allows easy TPU access for training 

lightning walkthrough: 
https://pytorch-lightning.readthedocs.io/en/latest/starter/introduction_guide.html
<br>
https://devblog.pytorchlightning.ai/train-anything-with-lightning-custom-loops-4be32314c961
<br>
https://github.com/PyTorchLightning/pytorch-lightning/blob/master/pl_examples/loop_examples/mnist_lite.py
<br>

In [12]:
"""
All pre-trained models expect input images normalized in the same way, 
i.e. mini-batches of 3-channel RGB images of shape (3 x H x W), 
where H and W are expected to be at least 224. 
The images have to be loaded in to a range of [0, 1] and then 
normalized using mean = [0.485, 0.456, 0.406] and std = [0.229, 0.224, 0.225]. 
You can use the following transform to normalize:

https://pytorch.org/vision/stable/models.html

"""
train_transformation = T.Compose(
            [
                T.Resize([224,224]),# imgnet needs at least 224
                T.RandomHorizontalFlip(),
                T.RandomVerticalFlip(),
                T.RandomAffine(15, translate=(0.1, 0.1), scale=(0.9, 1.1)),
                T.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1),
                T.ConvertImageDtype(torch.float),
                T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] ), # imgnet requirements 
            ]
        )
# train_data = pawnetDataset(annotation_df=train_df,img_dir = os.path.join(file_path,"train"),transform = train_transformation)
# # batchsize should be parameter in config
# train_loader = torch.utils.data.DataLoader(train_data,batch_size=64,num_workers =2, shuffle=True)


test_transformation = T.Compose([
                T.Resize([224,224]),# imgnet needs at least 224
                T.ConvertImageDtype(torch.float),
                T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] ), # imgnet requirements 
                ]
            )

In [13]:
class PetfinderDataModule(LightningDataModule):
    """
    Lightning datamodule to handle all loaders
    """
    def __init__(
        self,
        img_dir, # os.path.join(file_path,"train")
        train_df,
        val_df,
        train_transformation,
        test_transformation,
        batch_size = 64,
        num_workers = 2
    ):
        super().__init__()
        self.img_dir = img_dir
        self._train_df = train_df
        self._val_df = val_df
        self.train_transformation = train_transformation
        self.test_transformation = test_transformation
        self.batch_size = batch_size
        self.num_workers = num_workers


    def train_dataloader(self):
        train_data = pawnetDataset(annotation_df=self._train_df,img_dir = self.img_dir ,transform = self.train_transformation) # can set custom len to let model exceed training size (since we are augmenting)
        return torch.utils.data.DataLoader(train_data,batch_size=self.batch_size,num_workers =self.num_workers, shuffle=True)

    def val_dataloader(self):
        val_data = pawnetDataset(annotation_df=self._val_df,img_dir = self.img_dir, transform = self.test_transformation)
        return torch.utils.data.DataLoader(val_data,batch_size=self.batch_size,num_workers =self.num_workers, shuffle=False)

# Training Loop

In [14]:
seed_everything(1)


# perfrom stratified sampling k fold model training
skf = StratifiedKFold(n_splits = model_config.n_splits, shuffle = True, random_state = 1)
splits = skf.split(train_df["Id"],train_df["Pawpularity"])


skf_train_list = [] # to store across folds
skf_valid_list = [] # to store across folds

for i, (train_index, test_index) in enumerate(splits):
    print("\n Starting: fold {}".format(i+1))
    
    # initialize model 
    criterion = torch.nn.BCEWithLogitsLoss()

    X_train, X_valid = train_df.iloc[train_index], train_df.iloc[test_index]
    X_train.reset_index(inplace=True,drop=True)
    X_valid.reset_index(inplace=True,drop=True)
    
    # build datamodule
    datamodule = PetfinderDataModule(img_dir = os.path.join(file_path,"train"),
                                     train_df=X_train,
                                     val_df=X_valid,
                                     train_transformation=train_transformation,
                                     test_transformation=test_transformation,
                                     batch_size=model_config.batch_size,
                                     num_workers=model_config.num_workers)
    
    
    model = pawNetBasic(criterion=criterion,model_config=model_config)
    print(model.summarize())
    earystopping = EarlyStopping(monitor="val_RMSE_loss",patience=5)
    lr_monitor = callbacks.LearningRateMonitor()
    loss_checkpoint = callbacks.ModelCheckpoint(
        filename="best_loss",
        monitor="val_RMSE_loss",
        save_top_k=1,
        mode="min",
        save_last=True,
    )
    logger = TensorBoardLogger(model_config.model_name)
    
    trainer = pl.Trainer(
        logger=logger, # tensorboard logger
        max_epochs=model_config.epoch,
        callbacks=[lr_monitor, loss_checkpoint, earystopping],
#          callbacks=[lr_monitor, loss_checkpoint],
         gpus=1,progress_bar_refresh_rate=1,accumulate_grad_batches=1
    )
    trainer.fit(model, datamodule=datamodule)




 Starting: fold 1


Downloading: "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/resnet34-43635321.pth" to /root/.cache/torch/hub/checkpoints/resnet34-43635321.pth
  f"Argument `mode` in `LightningModule.summarize` is deprecated in v1.4"
2021-11-24 02:01:53.092561: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0


Validation sanity check: 0it [00:00, ?it/s]

val: skip logging for logloss




Training: -1it [00:00, ?it/s]



Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss


  "`Trainer.train_loop` has been renamed to `Trainer.fit_loop` and will be removed in v1.6."



 Starting: fold 2


  f"Argument `mode` in `LightningModule.summarize` is deprecated in v1.4"


Validation sanity check: 0it [00:00, ?it/s]

val: skip logging for logloss


Training: -1it [00:00, ?it/s]



Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss

 Starting: fold 3


Validation sanity check: 0it [00:00, ?it/s]

val: skip logging for logloss


Training: -1it [00:00, ?it/s]



Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss

 Starting: fold 4


Validation sanity check: 0it [00:00, ?it/s]

val: skip logging for logloss


Training: -1it [00:00, ?it/s]



Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss

 Starting: fold 5


Validation sanity check: 0it [00:00, ?it/s]

val: skip logging for logloss


Training: -1it [00:00, ?it/s]



Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss




Validating: 0it [00:00, ?it/s]

val: skip logging for logloss


In [15]:
# from tensorboard.backend.event_processing.event_accumulator import EventAccumulator

# # https://stackoverflow.com/questions/36700404/tensorflow-opening-log-data-written-by-summarywriter


# for i in range(5):
#     print(f"\n Fold: {i}==================================================")
#     path = [x for x in os.listdir(f"./pawnet_lightning_resnet/default/version_{i}/") if x.startswith("events")][0]
#     event_acc = EventAccumulator(os.path.join(f"pawnet_lightning_resnet/default/version_{i}/",path), size_guidance={'scalars': 0})
#     event_acc.Reload()

#     scalars = {}
#     for tag in event_acc.Tags()['scalars']:
#         events = event_acc.Scalars(tag)
#         scalars[tag] = [event.value for event in events]


#     print(scalars)


{'hp_metric': [-1.0], 'val_RMSE_loss': [19.52751922607422, 19.132272720336914, 18.87443733215332, 18.861148834228516, 18.899457931518555, 18.690818786621094, 18.812700271606445, 18.757587432861328, 18.62613868713379, 18.728683471679688, 18.774940490722656, 18.699066162109375, 18.955469131469727, 18.835399627685547], 'epoch': [0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0, 5.0, 5.0, 6.0, 6.0, 7.0, 7.0, 8.0, 8.0, 9.0, 9.0, 10.0, 10.0, 11.0, 11.0, 12.0, 12.0, 13.0, 13.0], 'train_logloss': [0.670686662197113, 0.677442729473114, 0.6378645896911621, 0.6460471749305725, 0.6374718546867371, 0.629768967628479, 0.6267431974411011, 0.6342352628707886, 0.6266129612922668, 0.636796236038208, 0.6515862941741943, 0.6502782106399536, 0.6501520872116089, 0.6343890428543091], 'train_RMSE_loss': [18.831972122192383, 23.18875503540039, 18.445117950439453, 16.643558502197266, 16.294763565063477, 13.197731018066406, 13.094585418701172, 13.403560638427734, 17.513381958007812, 17.054443359375, 17.99784278

In [16]:
# # this allow us to plot all metrics
# scalars

{'hp_metric': [-1.0],
 'val_RMSE_loss': [19.472293853759766,
  19.053701400756836,
  18.727155685424805,
  18.422914505004883,
  18.459199905395508,
  18.361417770385742,
  18.243032455444336,
  18.246681213378906,
  18.372053146362305,
  18.332294464111328,
  18.411405563354492,
  18.468154907226562],
 'epoch': [0.0,
  0.0,
  1.0,
  1.0,
  2.0,
  2.0,
  3.0,
  3.0,
  4.0,
  4.0,
  5.0,
  5.0,
  6.0,
  6.0,
  7.0,
  7.0,
  8.0,
  8.0,
  9.0,
  9.0,
  10.0,
  10.0,
  11.0,
  11.0],
 'train_logloss': [0.6535713076591492,
  0.645282506942749,
  0.6430951952934265,
  0.6565972566604614,
  0.6553606390953064,
  0.6226446032524109,
  0.6474259495735168,
  0.6576272249221802,
  0.6406980752944946,
  0.6422581672668457,
  0.6334529519081116,
  0.6395557522773743],
 'train_RMSE_loss': [20.533912658691406,
  19.70570182800293,
  14.783390045166016,
  18.769914627075195,
  19.209030151367188,
  16.65874481201172,
  17.494142532348633,
  19.672609329223633,
  17.441211700439453,
  16.2853507995605

In [17]:
# save
# with open("scalars.pkl","wb") as fout:
#     pickle.dump(scalars,fout)

# Load tensorboard (doesnt seem to work on kaggle) 

In [18]:
# %load_ext tensorboard

# %tensorboard --logdir ./pawnet_lightning_resnet