In [1]:
import os
import logging
import torch
import socket
import pickle
import tqdm
import random

from deepprojection.datasets.lite    import SPIDataset               , TripletCandidate
from deepprojection.model            import OnlineTripletSiameseModel, ConfigSiameseModel
from deepprojection.trainer          import OnlineTrainer            , ConfigTrainer
from deepprojection.validator        import OnlineLossValidator      , ConfigValidator
from deepprojection.encoders.convnet import Hirotaka0122             , ConfigEncoder
from deepprojection.utils            import EpochManager             , MetaLog, init_logger, split_dataset, set_seed

from datetime import datetime

from image_preprocess_faulty_sq import DatasetPreprocess

In [2]:
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
# [[[ SEED ]]]
seed = 0
set_seed(seed)

In [4]:
# [[[ LOGGING ]]]
timestamp = init_logger(log_name = 'train', returns_timestamp = True, saves_log = False)
print(timestamp)

2022_1214_2116_32


In [5]:
frac_train = 0.5
frac_validate = 0.5
num_sample_train = 100
batch_size = 10
num_sample_per_label = 15

In [6]:
# [[[ DATASET ]]]
# Set up parameters for an experiment...
drc_dataset   = 'fastdata.h5'
fl_dataset    = f'mini.sq.train.relabel.pickle'    # Raw, just give it a try
path_dataset  = os.path.join(drc_dataset, fl_dataset)

# Load raw data...
with open(path_dataset, 'rb') as fh:
    dataset_list = pickle.load(fh)

In [7]:
# Split data...
data_train   , data_val_and_test = split_dataset(dataset_list     , frac_train   , seed = None)
data_validate, data_test         = split_dataset(data_val_and_test, frac_validate, seed = None)

In [8]:
# Define the training set
dataset_train = TripletCandidate( dataset_list          = data_train, 
                                  num_sample            = num_sample_train,
                                  num_sample_per_label  = num_sample_per_label, 
                                  trans                 = None, )

In [9]:
# Preprocess dataset...
# Data preprocessing can be lengthy and defined in dataset_preprocess.py
img_orig            = dataset_train[0][1][0][0]   # idx, fetch img
dataset_preproc     = DatasetPreprocess(img_orig)
trans               = dataset_preproc.config_trans()
dataset_train.trans = trans
img_trans           = dataset_train[0][1][0][0]

12/14/2022 21:16:37 INFO image_preprocess_faulty_sq          - ___/ Preprocess Settings \___
12/14/2022 21:16:37 INFO image_preprocess_faulty_sq          - Apply Poisson noise. 
12/14/2022 21:16:37 INFO image_preprocess_faulty_sq          - Apply Gaussian noise. sigma = 0.15.
12/14/2022 21:16:37 INFO image_preprocess_faulty_sq          - TRANS : Apply random shift. frac_y_shift_max = 0.1, frac_x_shift_max = 0.1.
12/14/2022 21:16:37 INFO image_preprocess_faulty_sq          - TRANS : Apply cropping.
12/14/2022 21:16:37 INFO image_preprocess_faulty_sq          - TRANS : Apply downsampling. resize_y = 2, resize_x = 2.
12/14/2022 21:16:37 INFO image_preprocess_faulty_sq          - TRANS : Apply random rotation. angle = None, center = (24, 24).
12/14/2022 21:16:37 INFO image_preprocess_faulty_sq          - TRANS : Apply random patching. size_patch_y = 7, size_patch_x = 7.
12/14/2022 21:16:37 INFO image_preprocess_faulty_sq          - TRANS : Apply random zoom. max_zoom_percent = 0.4.


In [10]:
from torch.utils.data.dataloader import DataLoader

In [11]:
loader_train = DataLoader(dataset_train, shuffle = False, batch_size = batch_size)

In [12]:
loader_train_iter = iter(loader_train)

In [13]:
batch = next(loader_train_iter)

In [14]:
batch_encode, batch_candidate_nplist, batch_metadata_list = batch

In [15]:
batch_metadata_list = list(map(list, zip(*batch_metadata_list)))

#### Pass it through our model

In [16]:
device = torch.cuda.current_device() if torch.cuda.is_available() else 'cpu'

In [17]:
# [[[ IMAGE ENCODER ]]]
# Config the encoder...
dim_emb        = 128
size_y, size_x = img_trans.shape[-2:]
config_encoder = ConfigEncoder( dim_emb = dim_emb,
                                size_y  = size_y,
                                size_x  = size_x,
                                isbias  = True )
encoder = Hirotaka0122(config_encoder)


# [[[ MODEL ]]]
# Config the model...
alpha = 0.05
timestamp_prev = None
config_siamese = ConfigSiameseModel( alpha = alpha, encoder = encoder, )
model = OnlineTripletSiameseModel(config_siamese)
model.init_params(from_timestamp = timestamp_prev)
model.to(device, dtype = torch.float)

12/14/2022 21:16:39 INFO deepprojection.encoders.convnet     - ___/ Configure Encoder \___
12/14/2022 21:16:39 INFO deepprojection.encoders.convnet     - KV - dim_emb          : 128
12/14/2022 21:16:39 INFO deepprojection.encoders.convnet     - KV - size_y           : 48
12/14/2022 21:16:39 INFO deepprojection.encoders.convnet     - KV - size_x           : 48
12/14/2022 21:16:39 INFO deepprojection.encoders.convnet     - KV - isbias           : True
12/14/2022 21:16:39 INFO deepprojection.model                - ___/ Configure Siamese Model \___
12/14/2022 21:16:39 INFO deepprojection.model                - KV - alpha            : 0.05
12/14/2022 21:16:39 INFO deepprojection.model                - KV - encoder          : Hirotaka0122(
  (conv): Sequential(
    (0): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1))
    (1): PReLU(num_parameters=1)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Dropout(p=0.1, inplace=False)
    (4): Conv2d(32, 

OnlineTripletSiameseModel(
  (encoder): Hirotaka0122(
    (conv): Sequential(
      (0): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1))
      (1): PReLU(num_parameters=1)
      (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (3): Dropout(p=0.1, inplace=False)
      (4): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
      (5): PReLU(num_parameters=1)
      (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (7): Dropout(p=0.1, inplace=False)
    )
    (embed): Sequential(
      (0): Linear(in_features=5184, out_features=512, bias=True)
      (1): PReLU(num_parameters=1)
      (2): Linear(in_features=512, out_features=128, bias=True)
    )
  )
)

#### Select semi-hard

In [None]:
batch = next(loader_train_iter)

In [None]:
batch_encode, batch_candidate_nplist, batch_metadata_list = batch

In [None]:
batch_encode

In [None]:
dataset_train.encode_to_label_dict[120]

In [None]:
batch_metadata_list = list(map(list, zip(*batch_metadata_list)))

In [18]:
batch_encode = batch_encode.to(device = device)

In [19]:
batch_candidate_nplist = batch_candidate_nplist.to(device = device)

In [20]:
triplet_list, dist_list = model.select_semi_hard(batch_encode, batch_candidate_nplist, dataset_train.encode_to_label_dict, batch_metadata_list, logs_triplets = True)

12/14/2022 21:16:41 INFO deepprojection.model                - DATA - 1BR1 2(2) 75, 1BR1 2(4) 58, 6WJJ 1(1) 36; semi-hard 4.023236e-02
12/14/2022 21:16:41 INFO deepprojection.model                - DATA - 6RAO 1(1) 45, 6RAO 1(1) 60, 1BR1 2(3) 95; semi-hard 3.301352e-02
12/14/2022 21:16:41 INFO deepprojection.model                - DATA - 7AR9 2(3) 88, 7AR9 2(3) 22, 6VM1 1(1) 39; semi-hard 4.185802e-02
12/14/2022 21:16:41 INFO deepprojection.model                - DATA - 7DQD 1(1) 48, 7DQD 1(1) 12, 6VM1 2(3) 82; semi-hard 3.464109e-02
12/14/2022 21:16:41 INFO deepprojection.model                - DATA - 7KDV 1(1) 49, 7KDV 1(1) 54, 6VM1 2(3) 40; semi-hard 4.469410e-02
12/14/2022 21:16:41 INFO deepprojection.model                - DATA - 6WJJ 1(1) 99, 6WJJ 1(1) 36, 6VM1 2(4) 66; semi-hard 4.386210e-02
12/14/2022 21:16:41 INFO deepprojection.model                - DATA - 7A5P 1(1) 67, 7A5P 1(1) 32, 1BR1 2(2) 20; semi-hard 2.317768e-02
12/14/2022 21:16:41 INFO deepprojection.model          

In [21]:
triplet_list

[((0, 2), (0, 11), (5, 11)),
 ((1, 6), (1, 11), (0, 4)),
 ((2, 10), (2, 9), (7, 4)),
 ((3, 1), (3, 6), (9, 13)),
 ((4, 11), (4, 14), (9, 0)),
 ((5, 7), (5, 11), (9, 5)),
 ((6, 1), (6, 11), (0, 6)),
 ((7, 9), (7, 1), (0, 3)),
 ((8, 11), (8, 3), (0, 10)),
 ((9, 1), (9, 8), (5, 0))]

In [None]:
len("4 7NP3 5")

In [None]:
triplet_list

In [None]:
batch_candidate_nplist[0]

In [None]:
batch_encode

In [None]:
[ triplet[0] for triplet in triplet_list ]

In [None]:
batch_candidate_nplist

In [None]:
batch_candidate_nplist.shape

In [None]:
batch_a = batch_candidate_nplist.view(-1, *batch_candidate_nplist.shape[-3:])[ [ triplet[0][0] * batch_candidate_nplist.shape[0] + triplet[0][1] for triplet in triplet_list ] ]

In [None]:
batch_p = batch_candidate_nplist.view(-1, *batch_candidate_nplist.shape[-3:])[ [ triplet[1][0] * batch_candidate_nplist.shape[0] + triplet[1][1] for triplet in triplet_list ] ]

In [None]:
batch_n = batch_candidate_nplist.view(-1, *batch_candidate_nplist.shape[-3:])[ [ triplet[2][0] * batch_candidate_nplist.shape[0] + triplet[2][1] for triplet in triplet_list ] ]

In [None]:
model.forward(batch_a, batch_p, batch_n)

In [None]:
batch_a2 = batch_candidate_nplist.view(-1, *batch_candidate_nplist.shape[-3:])[ [ idx_encode * batch_candidate_nplist.shape[0] + idx_a for (idx_encode, idx_a), _, _ in triplet_list ] ]

In [None]:
batch_a == batch_a2

In [None]:
test_data = torch.rand((10,20), device = device) > 0.5

In [None]:
test_data.any(dim = -1)

In [None]:
test_data.any(dim = -1)

In [None]:
for i, data in enumerate(test_data.any(dim = -1)):
    print(i, data)

In [None]:
[ i for i, data in enumerate(test_data.any(dim = -1)) if data == False ]

In [None]:
import numpy as np
test_ary = np.array(['asdf', 'sadf'])

In [None]:
torch.tensor(test_ary)