<a href="https://colab.research.google.com/github/issa-rashdan/INFORM_Project_M/blob/main/data_pre.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!git clone https://github.com/issa-rashdan/INFORM_Project_M.git

Cloning into 'INFORM_Project_M'...
remote: Enumerating objects: 142, done.[K
remote: Counting objects: 100% (142/142), done.[K
remote: Compressing objects: 100% (130/130), done.[K
remote: Total 142 (delta 50), reused 68 (delta 9), pack-reused 0 (from 0)[K
Receiving objects: 100% (142/142), 121.47 KiB | 3.04 MiB/s, done.
Resolving deltas: 100% (50/50), done.


# Getting Echograms

In [3]:
import numpy as np
from INFORM_Project_M.data.echosounder_data.load_data.get_echograms_2 import get_echograms, count_classes_in_echograms

# You can select a specific year or use 'all' to include multiple years.
years = 2014
minimum_shape = 224
tuple_frequencies = (18, 38, 70, 120, 200, 333)
echograms = get_echograms(years=years, tuple_frequencies=tuple_frequencies, minimum_shape=minimum_shape)
print(f"Number of echograms: {len(echograms)}")

Selected 10 echograms from year 2014
Number of echograms: 10


# Data split into Train and Test

In [4]:
import os
import random
# Randomly split echograms into train/test
random.seed(42)  # Set a seed for reproducibility
Train_eg = []
test_eg = []
echogram = random.shuffle(echograms)  # Shuffle echograms randomly
for i,echogram in enumerate(echograms):
  if i < 0.8*len(echograms):
    Train_eg.append(echogram)
  else:
    test_eg.append(echogram)

#Save the echograms to files
train_dir = '/content/INFORM_Project_M/train'
test_dir = '/content/INFORM_Project_M/test'
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

for i, echogram in enumerate(Train_eg):
 os.path.join(train_dir, f'echogram_{i}.npy')
for i, echogram in enumerate(test_eg):
  os.path.join(test_dir, f'echogram_{i}.npy')

# Print the number of echograms in each set
print('Train Echogram:', len(Train_eg))
print('Test Echogram:', len(test_eg))

Train Echogram: 8
Test Echogram: 2


# Data Patching

In [5]:
from INFORM_Project_M.data.echosounder_data.preprocessing.resize_and_crop import SplitResizeEchogram, generate_patch_batches
from INFORM_Project_M.data.echosounder_data.preprocessing.normalization import db, clip, standardize_min_max
import numpy as np

data_transform = [db, clip, standardize_min_max]
split_patch_size = 448
output_patch_size = 448


#---Train set---
train_generators, num_patches_per_train_echogram = generate_patch_batches(Train_eg, split_patch_size,
                                                                          output_patch_size, data_transforms= data_transform,
                                                                          batch_size='full', verbose= False)
print('Number of patches per echogram for training:', num_patches_per_train_echogram )

#---Test set---
generators_test, num_patches_per_test_echogram = generate_patch_batches(test_eg, split_patch_size, output_patch_size,
                                                                        data_transforms= data_transform,
                                                                        batch_size='full', verbose= False)
print('Number of patches per echogram for testing:', num_patches_per_train_echogram )

train_patches = 0
test_patches = 0
for patches in num_patches_per_train_echogram:
  train_patches += patches
for patches in num_patches_per_test_echogram:
  test_patches += patches
print('Number of patches in training set:', train_patches)
print('Number of patches in test set:', test_patches)

Number of patches per echogram for training: [np.int64(21), np.int64(37), np.int64(7), np.int64(9), np.int64(16), np.int64(10), np.int64(8), np.int64(8)]
Number of patches per echogram for testing: [np.int64(21), np.int64(37), np.int64(7), np.int64(9), np.int64(16), np.int64(10), np.int64(8), np.int64(8)]
Number of patches in training set: 116
Number of patches in test set: 26


# dimensionality reduction 6 to 3

In [7]:
from INFORM_Project_M.data.echosounder_data.dataloader import BatchGeneratorDataset, GroupedGenerator, group_generators_by_patch_limit
from INFORM_Project_M.adaptation_method.model_config import Configuration
from torch.utils.data import DataLoader
import torch
import numpy as np


run = Configuration()
#---Training set---
print('-------Dimensions for Training set-------------')
grouped_generator_fns, grouped_patch_count = group_generators_by_patch_limit(train_generators,
                                                                             num_patches_per_train_echogram,
                                                                             64)
train_dataset = BatchGeneratorDataset(grouped_generator_fns)
train_loader = DataLoader(train_dataset, batch_size = None, pin_memory=True)

for label, data, mask in train_loader:
  print(data.shape)

  | |_| | '_ \/ _` / _` |  _/ -_)


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33missa-rashdan[0m ([33missa-rashdan-university-of-oslo[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


-------Dimensions for Training set-------------




torch.Size([58, 6, 448, 448])
torch.Size([58, 6, 448, 448])


In [9]:
#---Test Set---
print('-------Dimensions for Test set-------------')
grouped_generator_fns_test, grouped_patch_count_test = group_generators_by_patch_limit(generators_test,
                                                                                       num_patches_per_test_echogram,
                                                                                       64)
test_dataset = BatchGeneratorDataset(grouped_generator_fns_test)
test_loader = DataLoader(test_dataset, batch_size = None, pin_memory= True)

for label, data, mask in test_loader:
  print(data.shape)

-------Dimensions for Test set-------------
torch.Size([26, 6, 448, 448])


In [10]:
from INFORM_Project_M.adaptation_method.train_autoencoder import train_val_encoder, plot_loss
from INFORM_Project_M.adaptation_method.Autoencoder import Autoencoder
import torch.nn as nn
import torch.optim as optim

#For logging
run = Configuration(model_name = 'Autoencoder')

#Instantiating model
model = Autoencoder()



# Defining Loss function and optimizer
Loss_func = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr = run.config['learning_rate'], weight_decay = run.config['Weight_decay'])

latent,  val_latent, avg_loss_train, avg_loss_val, stop_epoch = train_val_encoder(model, optimizer, Loss_func, run.config['epochs'], train_loader, test_loader, run)

plot_loss(run.config['epochs'], avg_loss_train, avg_loss_val, stop_epoch)

Epoch 1/30: 100%|██████████| 2/2 [01:38<00:00, 49.38s/it]


Train encodings: min=0.0001, max=0.9998
Val latents: min=0.0006, max=0.9982
 Train Loss = 0.1025 ,Validation Loss = 0.0989


Epoch 2/30:  50%|█████     | 1/2 [01:30<01:30, 90.09s/it]


KeyboardInterrupt: 