<a href="https://colab.research.google.com/github/hits-sdo/hits-sdo-similaritysearch/blob/experimentation-1/search_byol/byol_train.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Notebook to Intialize HITS-SDO self-similarity search environment
- Run all cells to initalize environment, and restart runtime if prompted to use updated versions. You will need to rerun the cells again to ensure that all dependencies have been installed.

# Download and Unzip Data

In [None]:
# Download Data
!gdown 15C5spf1la7L09kvWXll2qt67Ec0rwLsY

Downloading...
From: https://drive.google.com/uc?id=15C5spf1la7L09kvWXll2qt67Ec0rwLsY
To: /content/aia_171_color_1perMonth.tar.gz
100% 146M/146M [00:00<00:00, 213MB/s]


In [None]:
# Unzip file
!tar -zxf aia_171_color_1perMonth.tar.gz

In [None]:
# Print some files to see that they exist
!du aia_171_color_1perMonth/. -l -h

40K	aia_171_color_1perMonth/./20170604_000036_aia.lev1_euv_12s_4k/tile_meta_data
3.2M	aia_171_color_1perMonth/./20170604_000036_aia.lev1_euv_12s_4k/tiles
3.2M	aia_171_color_1perMonth/./20170604_000036_aia.lev1_euv_12s_4k
40K	aia_171_color_1perMonth/./20190712_000036_aia.lev1_euv_12s_4k/tile_meta_data
3.2M	aia_171_color_1perMonth/./20190712_000036_aia.lev1_euv_12s_4k/tiles
3.2M	aia_171_color_1perMonth/./20190712_000036_aia.lev1_euv_12s_4k
40K	aia_171_color_1perMonth/./20190202_000036_aia.lev1_euv_12s_4k/tile_meta_data
3.2M	aia_171_color_1perMonth/./20190202_000036_aia.lev1_euv_12s_4k/tiles
3.2M	aia_171_color_1perMonth/./20190202_000036_aia.lev1_euv_12s_4k
40K	aia_171_color_1perMonth/./20110417_000036_aia.lev1_euv_12s_4k/tile_meta_data
3.2M	aia_171_color_1perMonth/./20110417_000036_aia.lev1_euv_12s_4k/tiles
3.2M	aia_171_color_1perMonth/./20110417_000036_aia.lev1_euv_12s_4k
40K	aia_171_color_1perMonth/./20120130_000036_aia.lev1_euv_12s_4k/tile_meta_data
3.2M	aia_171_color_1perMonth/./2012

# Clone repository

In [None]:
# Clone the repository from GitHub
!git clone https://github.com/hits-sdo/hits-sdo-similaritysearch

fatal: destination path 'hits-sdo-similaritysearch' already exists and is not an empty directory.


In [None]:
%cd hits-sdo-similaritysearch/

/content/hits-sdo-similaritysearch


# Switch to Desired Branch

In [None]:
# Switch to the desired branch with requirements.txt
!git checkout experimentation-1

Already on 'byol-wandb'
Your branch is up to date with 'origin/byol-wandb'.


In [None]:
# Confirm that branch is up to date
!git log --oneline

[33m22d840d[m[33m ([m[1;36mHEAD -> [m[1;32mbyol-wandb[m[33m, [m[1;31morigin/byol-wandb[m[33m)[m Update open in colab button
[33m491c6f9[m[33m ([m[1;31morigin/byol-dataset_doc[m[33m)[m Added documentation to test and database classes
[33mbbcf0f0[m[33m ([m[1;31morigin/byol-training_pipeline[m[33m, [m[1;32mbyol-training_pipeline[m[33m)[m Merge remote-tracking branch 'origin/main' into byol-training_pipeline
[33me770817[m Removed datatype casting
[33m3ac3080[m Fixed and annotated progress bars
[33mf7937e2[m added default datatype and made it single float
[33m570ff76[m Add tqdm bars to training notebook
[33mf666699[m Add dataset stride to work with even smaller sets
[33m899150b[m Merge remote-tracking branch 'origin/byol-augmentation' into byol-training_pipeline
[33m66b4624[m test and ensure that dimensions are chanels first
[33m570365f[m Update data loader in byol_train
[33mcb8037b[m fix opencv pip repository in requirements
[33m0db1aa6

# Install all necesary packages into environment

In [None]:
!pip install -r requirements.txt

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting git+https://github.com/hits-sdo/hits-sdo-packager.git@pip_nodata (from -r requirements.txt (line 12))
  Cloning https://github.com/hits-sdo/hits-sdo-packager.git (to revision pip_nodata) to /tmp/pip-req-build-7u06exup
  Running command git clone --filter=blob:none --quiet https://github.com/hits-sdo/hits-sdo-packager.git /tmp/pip-req-build-7u06exup
  Running command git checkout -b pip_nodata --track origin/pip_nodata
  Switched to a new branch 'pip_nodata'
  Branch 'pip_nodata' set up to track remote branch 'pip_nodata' from 'origin'.
  Resolved https://github.com/hits-sdo/hits-sdo-packager.git to commit 3a54caba2ae6bf7caf4eabaf580a6ffda1b3bbda
  Preparing metadata (setup.py) ... [?25l[?25hdone


##  Load Modules

In [None]:
import copy
import numpy as np
from tqdm.autonotebook import tqdm

import torch
import torchvision
from torch import nn

import wandb

from lightly.data import LightlyDataset
from lightly.data.multi_view_collate import MultiViewCollate
from lightly.loss import NegativeCosineSimilarity
from lightly.models.modules import BYOLPredictionHead, BYOLProjectionHead
from lightly.models.utils import deactivate_requires_grad, update_momentum
from lightly.transforms.simclr_transform import SimCLRTransform
from lightly.utils.scheduler import cosine_schedule

from search_byol.database import SDOTilesDataset

## Login to Wanddb and initialize logger

In [None]:
!wandb login --relogin

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit: 
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


## Define run parameters and initalize Wandb

In [None]:
data_stride = 10
batch_size = 700

wandb.init(
    # set the wandb project where this run will be logged
    project="search-byol",
    
    # track hyperparameters and run metadata
    config={
    "batch size": batch_size,
    "data stride": data_stride
    }
)


## Define BYOL Model

In [None]:
class BYOL(nn.Module):
    def __init__(self, backbone):
        super().__init__()

        self.backbone = backbone
        self.projection_head = BYOLProjectionHead(512, 1024, 256)
        self.prediction_head = BYOLPredictionHead(256, 1024, 256)

        self.backbone_momentum = copy.deepcopy(self.backbone)
        self.projection_head_momentum = copy.deepcopy(self.projection_head)

        deactivate_requires_grad(self.backbone_momentum)
        deactivate_requires_grad(self.projection_head_momentum)

    def forward(self, x):
        y = self.backbone(x).flatten(start_dim=1)
        z = self.projection_head(y)
        p = self.prediction_head(z)
        return p

    def forward_momentum(self, x):
        y = self.backbone_momentum(x).flatten(start_dim=1)
        z = self.projection_head_momentum(y)
        z = z.detach()
        return z

## Initialize Module

In [None]:
resnet = torchvision.models.resnet18()
backbone = nn.Sequential(*list(resnet.children())[:-1])
model = BYOL(backbone)

device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

BYOL(
  (backbone): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 

## Initialize Dataloader

In [None]:
data_path = '/content/aia_171_color_1perMonth'
dataset = SDOTilesDataset(data_path=data_path, double_augmentation=False, data_stride=data_stride)

dataloader = torch.utils.data.DataLoader(
    dataset,
    batch_size=batch_size,
    shuffle=True,
    drop_last=True,
    num_workers=2,
)

## Run Training Loop 

In [None]:
criterion = NegativeCosineSimilarity()
optimizer = torch.optim.SGD(model.parameters(), lr=0.06)

epochs = 5
avg_loss = np.nan
print("Starting Training")
epoch = 0
epochs_bar = tqdm(range(epochs), dynamic_ncols=True, desc=f'Epoch {epoch:>02} - Av. loss: {avg_loss:.5f}')
for epoch in epochs_bar:
    total_loss = 0
    momentum_val = cosine_schedule(epoch, epochs, 0.996, 1)
    batches_bar = tqdm(dataloader, dynamic_ncols=True, leave=True, desc=f'Batches - Av. loss: {avg_loss:.5f}')
    for (x0, x1) in batches_bar:
        update_momentum(model.backbone, model.backbone_momentum, m=momentum_val)
        update_momentum(
            model.projection_head, model.projection_head_momentum, m=momentum_val
        )
        x0 = x0.to(device)
        x1 = x1.to(device)
        p0 = model(x0)
        z0 = model.forward_momentum(x0)
        p1 = model(x1)
        z1 = model.forward_momentum(x1)
        loss = 0.5 * (criterion(p0, z1) + criterion(p1, z0))
        total_loss += loss.detach()
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        avg_loss = total_loss / len(dataloader)
        batches_bar.set_description(f'Epoch {epoch:>02} - Av. loss: {avg_loss:.5f} - Batches')
        batches_bar.refresh()        

    # log metrics to wandb
    wandb.log({"Av. loss": avg_loss})

    epochs_bar.set_description(f'Epoch {epoch:>02} - Av. loss: {avg_loss:.5f}')
    epochs_bar.refresh()

wandb.finish() 

Starting Training


Epoch 00 - Av. loss: nan:   0%|          | 0/5 [00:00<?, ?it/s]

Batches - Av. loss: nan:   0%|          | 0/15 [00:00<?, ?it/s]

Batches - Av. loss: -0.45559:   0%|          | 0/15 [00:00<?, ?it/s]

Batches - Av. loss: -0.66911:   0%|          | 0/15 [00:00<?, ?it/s]

Batches - Av. loss: -0.70190:   0%|          | 0/15 [00:00<?, ?it/s]

Batches - Av. loss: -0.71689:   0%|          | 0/15 [00:00<?, ?it/s]