<a href="https://colab.research.google.com/github/hits-sdo/hits-sdo-similaritysearch/blob/ss_dataloader/search_simsiam/simsiam_example_notebook_HITS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!git clone https://github.com/hits-sdo/hits-sdo-similaritysearch

Cloning into 'hits-sdo-similaritysearch'...
remote: Enumerating objects: 323, done.[K
remote: Counting objects: 100% (323/323), done.[K
remote: Compressing objects: 100% (277/277), done.[K
remote: Total 323 (delta 69), reused 278 (delta 38), pack-reused 0[K
Receiving objects: 100% (323/323), 5.06 MiB | 16.08 MiB/s, done.
Resolving deltas: 100% (69/69), done.


In [2]:
%cd hits-sdo-similaritysearch/

/content/hits-sdo-similaritysearch


In [3]:
!pip install -r requirements.txt

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting git+https://github.com/hits-sdo/hits-sdo-packager.git@pip_nodata (from -r requirements.txt (line 11))
  Cloning https://github.com/hits-sdo/hits-sdo-packager.git (to revision pip_nodata) to /tmp/pip-req-build-15evuggl
  Running command git clone --filter=blob:none --quiet https://github.com/hits-sdo/hits-sdo-packager.git /tmp/pip-req-build-15evuggl
  Running command git checkout -b pip_nodata --track origin/pip_nodata
  Switched to a new branch 'pip_nodata'
  Branch 'pip_nodata' set up to track remote branch 'pip_nodata' from 'origin'.
  Resolved https://github.com/hits-sdo/hits-sdo-packager.git to commit 187f561dec0ac179434fc78b7422436282f815ac
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting lightning (from -r requirements.txt (line 6))
  Downloading lightning-2.0.2-py3-none-any.whl (1.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.

In [17]:
import math

import numpy as np
import torch
import torch.nn as nn
import torchvision

import matplotlib.pyplot as plt

from lightly.data import ImageCollateFunction, LightlyDataset, collate
from lightly.loss import NegativeCosineSimilarity
from lightly.models.modules.heads import SimSiamPredictionHead, SimSiamProjectionHead

In [2]:
# download data
!gdown 15C5spf1la7L09kvWXll2qt67Ec0rwLsY
# unzip data
!tar -zxf aia_171_color_1perMonth.tar.gz && rm aia_171_color_1perMonth.tar.gz

Downloading...
From: https://drive.google.com/uc?id=15C5spf1la7L09kvWXll2qt67Ec0rwLsY
To: /content/aia_171_color_1perMonth.tar.gz
100% 146M/146M [00:00<00:00, 173MB/s]


In [3]:
# path_to_data = '/content/gdrive/MyDrive/HITS/aia_171_color_1perMonth/'
path_to_data = '/content/aia_171_color_1perMonth'

In [4]:
num_workers = 8 # How many process giving model to train -- similar to threading
batch_size = 32 # A subset of files that the model sees to update it's parameters
seed = 1 # Seed for random generator for reproducability
epochs = 50 # How many times we go through our entire data set
input_size = 128 #The number of pixels in x or y

# dimension of the embeddings
num_ftrs = 512 
# dimension of the output of the prediction and projection heads
out_dim = proj_hidden_dim = 512
# the prediction head uses a bottleneck architecture
pred_hidden_dim = 128

In [5]:
# seed torch and numpy 
# used for reproducibility in creating the model
torch.manual_seed(0)
np.random.seed(0)

In [6]:
# define the augmentations for self-supervised learning
collate_fn = ImageCollateFunction(
    input_size=input_size,
    # require invariance to flips and rotations
    hf_prob=0.5,
    vf_prob=0.5,
    rr_prob=0.5,
    # satellite images are all taken from the same height
    # so we use only slight random cropping
    min_scale=0.5,
    # use a weak color jitter for invariance w.r.t small color changes
    cj_prob=0.2,
    cj_bright=0.1,
    cj_contrast=0.1,
    cj_hue=0.1,
    cj_sat=0.1,
)

#test for the collate function
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
img = np.zeros((128,128,3))
img = torchvision.transforms.ToPILImage()(np.uint8(255*img))

input = [(img, 0, 'my-image.png')]

output = collate_fn(input)

(img_t0, img_t1), label, filename = output

# print(img_t0.shape, img_t1.shape)



# create a lightly dataset for training, since the augmentations are handled
# by the collate function, there is no need to apply additional ones here
dataset_train_simsiam = LightlyDataset(input_dir=path_to_data)

#3283 x 32 = 10506
print(len(dataset_train_simsiam))
# returns image, folder num, tile name
print(dataset_train_simsiam[800])


# create a dataloader for training
dataloader_train_simsiam = torch.utils.data.DataLoader(
    dataset_train_simsiam,
    batch_size=batch_size,
    shuffle=True,           # data reshuffled at every epoch if True
    collate_fn=collate_fn,  # constructs function
    drop_last=True,         # If want to merge datasets (optional) - mostly used when batches are loaded from map-styled datasets.
    num_workers=num_workers,
)

# create a torchvision transformation for embedding the dataset after training
# here, we resize the images to match the input size during training and apply
# a normalization of the color channel based on statistics from imagenet
test_transforms = torchvision.transforms.Compose(
    [
        torchvision.transforms.Resize((input_size, input_size)),
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize(
            mean=collate.imagenet_normalize["mean"],
            std=collate.imagenet_normalize["std"],
        ),
    ]
)

# create a lightly dataset for embedding
dataset_test = LightlyDataset(input_dir=path_to_data, transform=test_transforms)

# create a dataloader for embedding
dataloader_test = torch.utils.data.DataLoader(
    dataset_test,
    batch_size=batch_size,
    shuffle=False,
    drop_last=False,
    num_workers=num_workers,
)

105056
(<PIL.Image.Image image mode=RGB size=128x128 at 0x7F74AF891060>, 1, '20100703_000036_aia.lev1_euv_12s_4k/tiles/20100703_000036_aia.lev1_euv_12s_4k_tile_1024_2944.jpg')




In [15]:


#test for the collate function
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
input = [(dataset_train_simsiam[800][0], 0, 'my-image.png')]

output = collate_fn(input)

(img_t0, img_t1), label, filename = output

print(img_t0.shape, img_t1.shape)



torch.Size([1, 3, 128, 128]) torch.Size([1, 3, 128, 128])


In [23]:


plt.imshow(img_t0.permute(2, 3, 1, 0)[:,:,:,0],)


tensor([[[[ 0.3652,  0.3652,  0.3481,  ...,  1.1015,  1.1358,  1.1529],
          [ 0.7419,  0.6563,  0.5193,  ...,  1.1015,  1.1358,  1.1872],
          [ 0.7933,  0.7248,  0.6049,  ...,  1.1015,  1.1358,  1.2043],
          ...,
          [ 0.4337,  0.4166,  0.4166,  ...,  1.2385,  1.2899,  1.3584],
          [ 0.4166,  0.3994,  0.3994,  ...,  1.2728,  1.3070,  1.3584],
          [ 0.3994,  0.3823,  0.3823,  ...,  1.2899,  1.3070,  1.3584]],

         [[-0.4251, -0.4251, -0.4426,  ..., -0.0924, -0.0749, -0.0749],
          [-0.2500, -0.3725, -0.5126,  ..., -0.1800, -0.1625, -0.1275],
          [-0.2850, -0.3200, -0.4601,  ..., -0.1800, -0.1625, -0.1099],
          ...,
          [-0.4951, -0.5126, -0.5126,  ..., -0.0224, -0.0749, -0.0224],
          [-0.5126, -0.5301, -0.5301,  ..., -0.0049, -0.0574, -0.0224],
          [-0.5301, -0.5301, -0.5301,  ..., -0.0749, -0.0574, -0.0224]],

         [[-1.8044, -1.8044, -1.8044,  ..., -1.8044, -1.8044, -1.8044],
          [-1.8044, -1.8044, -

In [7]:
class SimSiam(nn.Module):
    def __init__(self, backbone, num_ftrs, proj_hidden_dim, pred_hidden_dim, out_dim):
        super().__init__()
        self.backbone = backbone
        self.projection_head = SimSiamProjectionHead(num_ftrs, proj_hidden_dim, out_dim)
        self.prediction_head = SimSiamPredictionHead(out_dim, pred_hidden_dim, out_dim)

    def forward(self, x):
        # get representations
        f = self.backbone(x).flatten(start_dim=1)
        # get projections
        z = self.projection_head(f)
        # get predictions
        p = self.prediction_head(z)
        # stop gradient
        z = z.detach()
        return z, p


# we use a pretrained resnet for this tutorial to speed
# up training time but you can also train one from scratch
resnet = torchvision.models.resnet18()
backbone = nn.Sequential(*list(resnet.children())[:-1])
model = SimSiam(backbone, num_ftrs, proj_hidden_dim, pred_hidden_dim, out_dim)

In [8]:
# SimSiam uses a symmetric negative cosine similarity loss
criterion = NegativeCosineSimilarity()

# scale the learning rate
lr = 0.05 * batch_size / 256
# use SGD with momentum and weight decay
optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4)

In [11]:
#true if currently have GPU
torch.cuda.is_available()

#check pgu count
#not number of workers, that is seperate
torch.cuda.device_count()

1

In [9]:
device = "cuda" if torch.cuda.is_available() else "cpu"

device = "mps" if torch.backends.mps.is_available() else device

model.to(device)

avg_loss = 0.0
avg_output_std = 0.0
for e in range(epochs):
    batch_count = 0
    for (x0, x1), _, _ in dataloader_train_simsiam:
        # move images to the gpu
        x0 = x0.to(device)
        x1 = x1.to(device)

        # run the model on both transforms of the images
        # we get projections (z0 and z1) and
        # predictions (p0 and p1) as output
        z0, p0 = model(x0)
        z1, p1 = model(x1)

        # apply the symmetric negative cosine similarity
        # and run backpropagation
        lo2,ss = 0.5 * (criterion(z0, p1) + criterion(z1, p0))
        loss.backward()

        optimizer.step()
        optimizer.zero_grad()

        # calculate the per-dimension standard deviation of the outputs
        # we can use this later to check whether the embeddings are collapsing
        output = p0.detach()
        output = torch.nn.functional.normalize(output, dim=1)

        output_std = torch.std(output, 0)
        output_std = output_std.mean()
        print(batch_count)
        batch_count += 1
        print(loss)
        if(batch_count == 10):
          break
        # use moving averages to track the loss and standard deviation
        w = 0.9
        avg_loss = w * avg_loss + (1 - w) * loss.item()
        avg_output_std = w * avg_output_std + (1 - w) * output_std.item()

    # the level of collapse is large if the standard deviation of the l2
    # normalized output is much smaller than 1 / sqrt(dim)
    collapse_level = max(0.0, 1 - math.sqrt(out_dim) * avg_output_std)
    # print intermediate results
    print(
        f"[Epoch {e:3d}] "
        f"Loss = {avg_loss:.2f} | "
        f"Collapse Level: {collapse_level:.2f} / 1.00"
    )

0
tensor(0.0004, device='cuda:0', grad_fn=<MulBackward0>)
1
tensor(-0.0002, device='cuda:0', grad_fn=<MulBackward0>)
2
tensor(-0.0026, device='cuda:0', grad_fn=<MulBackward0>)
3
tensor(-0.0089, device='cuda:0', grad_fn=<MulBackward0>)
4
tensor(-0.0200, device='cuda:0', grad_fn=<MulBackward0>)
5
tensor(-0.0088, device='cuda:0', grad_fn=<MulBackward0>)
6
tensor(-0.0281, device='cuda:0', grad_fn=<MulBackward0>)
7
tensor(-0.0162, device='cuda:0', grad_fn=<MulBackward0>)
8
tensor(-0.0129, device='cuda:0', grad_fn=<MulBackward0>)
9
tensor(-0.0213, device='cuda:0', grad_fn=<MulBackward0>)
[Epoch   0] Loss = -0.01 | Collapse Level: 0.50 / 1.00
0
tensor(-0.0343, device='cuda:0', grad_fn=<MulBackward0>)
1
tensor(-0.0351, device='cuda:0', grad_fn=<MulBackward0>)
2
tensor(-0.0324, device='cuda:0', grad_fn=<MulBackward0>)
3
tensor(-0.0340, device='cuda:0', grad_fn=<MulBackward0>)
4
tensor(-0.0411, device='cuda:0', grad_fn=<MulBackward0>)
5
tensor(-0.0436, device='cuda:0', grad_fn=<MulBackward0>)
6


KeyboardInterrupt: ignored