## GitHub Setup

In [2]:
# Clone the repository from GitHub
!git clone https://github.com/hits-sdo/HITS-SDO-Independent-Research.git
%cd HITS-SDO-Independent-Research/
!git checkout main

Cloning into 'HITS-SDO-Independent-Research'...
remote: Enumerating objects: 4575, done.[K
remote: Counting objects: 100% (1158/1158), done.[K
remote: Compressing objects: 100% (729/729), done.[K
remote: Total 4575 (delta 412), reused 1104 (delta 382), pack-reused 3417[K
Receiving objects: 100% (4575/4575), 37.86 MiB | 25.88 MiB/s, done.
Resolving deltas: 100% (3779/3779), done.
/content/HITS-SDO-Independent-Research
Already on 'main'
Your branch is up to date with 'origin/main'.


## Import Libraries

In [3]:
!pip install -q -r requirements.txt

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/5.2 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.1/5.2 MB[0m [31m3.5 MB/s[0m eta [36m0:00:02[0m[2K     [91m━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/5.2 MB[0m [31m22.7 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m5.2/5.2 MB[0m [31m54.9 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.2/5.2 MB[0m [31m41.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Building wheel for hdbscan (pyproject.toml) ... [?25l[?25hdone


In [None]:
# Standard Libraries
import copy
import glob
import os
import sys

# NumPy
import numpy as np

# SciPy
import scipy.stats as stats

# Matplotlib
import PIL.Image as Image
import matplotlib.offsetbox as offsetbox
import matplotlib.pyplot as plt

#HDBSCAN
import hdbscan

# Lightly
from lightly.loss import NegativeCosineSimilarity, NTXentLoss
from lightly.models.modules import BYOLPredictionHead, BYOLProjectionHead
from lightly.models.utils import deactivate_requires_grad, update_momentum
from lightly.utils.scheduler import cosine_schedule

# Sklearn
from sklearn.manifold import TSNE
from sklearn.metrics import silhouette_score
from sklearn.model_selection import ParameterGrid

# PyTorch
import pytorch_lightning as pl
import pytorch_lightning as pl
import torch
import torchvision
from torch import nn
from torch.utils.data import Dataset

# External files
sys.path.append(os.path.abspath('./../../sdo_augmentation/'))
from augmentation import Augmentations
from augmentation_list import AugmentationList
from augmentation_test import read_image

device = "cuda" if torch.cuda.is_available() else "cpu"

## Download and Unzip Data

In [None]:
# 171 grayscale - https://drive.google.com/file/d/16WD0td1f5gx4yIIDkWWSTb-oZcezI1CU/view?usp=drive_link
!gdown 16WD0td1f5gx4yIIDkWWSTb-oZcezI1CU
!tar -zxf AIA171_Miniset_BW.tar.gz

Downloading...
From (uriginal): https://drive.google.com/uc?id=16WD0td1f5gx4yIIDkWWSTb-oZcezI1CU
From (redirected): https://drive.google.com/uc?id=16WD0td1f5gx4yIIDkWWSTb-oZcezI1CU&confirm=t&uuid=f2ff4a9f-a761-456b-b93a-1b82a3737b63
To: c:\Github Repositories\HITS-SDO-Independent-Research\src\notebooks\AIA171_Miniset_BW.tar.gz

  0%|          | 0.00/147M [00:00<?, ?B/s]
  0%|          | 524k/147M [00:00<00:33, 4.40MB/s]
  1%|▏         | 2.10M/147M [00:00<00:16, 8.76MB/s]
  3%|▎         | 3.67M/147M [00:00<00:14, 10.1MB/s]
  4%|▎         | 5.24M/147M [00:00<00:13, 10.6MB/s]
  5%|▍         | 6.82M/147M [00:00<00:12, 11.0MB/s]
  6%|▌         | 8.39M/147M [00:00<00:12, 11.2MB/s]
  7%|▋         | 9.96M/147M [00:00<00:12, 11.3MB/s]
  8%|▊         | 11.5M/147M [00:01<00:11, 11.4MB/s]
  9%|▉         | 13.1M/147M [00:01<00:11, 11.4MB/s]
 10%|█         | 14.7M/147M [00:01<00:11, 11.4MB/s]
 11%|█         | 16.3M/147M [00:01<00:11, 11.5MB/s]
 12%|█▏        | 17.8M/147M [00:01<00:11, 11.5MB/s]
 13%

In [None]:
data_path = 'AIA171_Miniset_BW'

## Define 1D Power Spectrum and Wasserstein Distance

In [None]:
# Calculates 1d Power Spectrum
def power_spectrum_1d(image):

    # Get pixel count
    pixel_count = image.shape[0]

    # Convert into fourier transform
    fourier_image = np.fft.fftn(image)
    fourier_amplitudes = np.abs(fourier_image)

    #Calculate 1D power spectrum
    k_frequencies = np.fft.fftfreq(pixel_count) * pixel_count
    k_frequencies2D = np.meshgrid(k_frequencies, k_frequencies)
    k_norm = np.sqrt(k_frequencies2D[0] ** 2 + k_frequencies2D[1] ** 2)
    k_bins = np.arange(0.5, pixel_count // 2 + 1, 1.)
    k_vals = 0.5 * (k_bins[1:] + k_bins[:-1])
    a_bins, _, _ = stats.binned_statistic(k_norm.flatten(),
                                        (fourier_amplitudes ** 2).flatten(),
                                        statistic = "mean", bins = k_bins)
    a_bins *= np.pi * (k_bins[1:] ** 2 - k_bins[:-1] ** 2)

    return a_bins

# Calculates Wasserstein distance of two images
def wasserstein(x, y):
    return stats.wasserstein_distance(np.arange(len(x)), np.arange(len(y)), x, y)

## Define Dataset

In [None]:
class PowerSpectrumDataset(Dataset):
    def __init__(self, data_path, data_stride, datatype=np.float32):

        self.data_path = data_path
        self.image_files = glob.glob(data_path + "/**/*.jpg", recursive=True)
        if data_stride > 1:
            self.image_files = self.image_files[::data_stride]
        self.datatype=datatype

    def __len__(self):

        return len(self.image_files)

    def __getitem__(self, idx):

        image = read_image(image_loc = self.image_files[idx], image_format="jpg")

        power_spectrum = power_spectrum_1d(image)

        if power_spectrum.any():
            return image, power_spectrum, self.image_files[idx]
        else:
            return image, self.image_files[idx]

## Define BYOL Model

In [None]:
class BYOL(pl.LightningModule):
    def __init__(self, lr=0.1, projection_size=256, prediction_size=256, cosine_scheduler_start=0.1, cosine_scheduler_end=1.0, epochs=10, loss='cos'):
        super().__init__()

        resnet = torchvision.models.resnet18() # Play w/ resnet.
        self.backbone = nn.Sequential(*list(resnet.children())[:-1])
        self.projection_head = BYOLProjectionHead(512, 1024, projection_size)
        self.prediction_head = BYOLPredictionHead(projection_size, 1024, prediction_size)

        self.backbone_momentum = copy.deepcopy(self.backbone)
        self.projection_head_momentum = copy.deepcopy(self.projection_head)

        deactivate_requires_grad(self.backbone_momentum)
        deactivate_requires_grad(self.projection_head_momentum)

        self.loss = loss
        self.loss_cos = NegativeCosineSimilarity()
        self.loss_contrast = NTXentLoss()

        self.cosine_scheduler_start = cosine_scheduler_start
        self.cosine_scheduler_end = cosine_scheduler_end
        self.epochs = epochs
        self.lr = lr

    def forward(self, x):
        y = self.backbone(x).flatten(start_dim=1)
        z = self.projection_head(y)
        p = self.prediction_head(z)
        return p

    def forward_momentum(self, x):
        y = self.backbone_momentum(x).flatten(start_dim=1)
        z = self.projection_head_momentum(y)
        z = z.detach()
        return z

    def training_step(self, batch, batch_idx):

        momentum = cosine_schedule(self.current_epoch, self.epochs, self.cosine_scheduler_start, self.cosine_scheduler_end)
        update_momentum(self.backbone, self.backbone_momentum, m=momentum)
        update_momentum(self.projection_head, self.projection_head_momentum, m=momentum)
        (x0, x1, _) = batch
        p0 = self.forward(x0)
        z0 = self.forward_momentum(x0)
        p1 = self.forward(x1)
        z1 = self.forward_momentum(x1)

        loss_cos = 0.5 * (self.loss_cos(p0, z1) + self.loss_cos(p1, z0))
        loss_contrast = 0.5 * (self.loss_contrast(p0, z1) + self.loss_contrast(p1, z0))

        if self.loss == 'cos':
            loss = loss_cos
        else:
            loss = loss_contrast

        self.log('loss cos', loss_cos)
        self.log('loss contrast', loss_contrast)
        self.log('loss', loss)
        return loss

    def configure_optimizers(self):
        return torch.optim.SGD(self.parameters(), lr=self.lr) # Play w/ optimizers.


In [None]:
seed = 42
pl.seed_everything(seed, workers=True)

learning_rate = 0.1
projection_size = 32
prediction_size = 32
cosine_scheduler_start = .1
cosine_scheduler_end = 1.0
epochs = 4
data_stride = 1
batch_size = 512
loss = 'contrast'   # 'contrast' or 'cos'

model = BYOL(lr=learning_rate,
             projection_size=projection_size,
             prediction_size=prediction_size,
             cosine_scheduler_start=cosine_scheduler_start,
             cosine_scheduler_end=cosine_scheduler_end,
             epochs=epochs,
             loss=loss)

Seed set to 42


## Initialize Dataloader

In [None]:
dataset = PowerSpectrumDataset(data_path=data_path, data_stride=data_stride)

dataloader = torch.utils.data.DataLoader(
    dataset,
    batch_size=batch_size,
    shuffle=True,
    drop_last=False,
    num_workers=8,
)

## Run Training Loop

In [None]:
trainer = pl.Trainer(max_epochs=epochs,
                     accelerator="auto", devices="auto", strategy="auto",
                    log_every_n_steps=10, deterministic=True)

trainer.fit(model=model, train_dataloaders=dataloader)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
c:\Users\starf\miniconda3\envs\sdo_research\lib\site-packages\pytorch_lightning\trainer\connectors\logger_connector\logger_connector.py:67: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `pytorch_lightning` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default

  | Name                     | Type                     | Params
----------------------------------------------------------------------
0 | backbone                 | Sequential               | 11.2 M
1 | projection_head          | BYOLProjectionHead       | 559 K 
2 | prediction_head          | BYOLPredi