# Package, module setting

In [None]:
import torch
import sys
import time
import yaml
import torchvision
from torchvision import transforms, datasets

import numpy as np
import os
from torch import cuda
from torch.utils.data.dataloader import DataLoader
from tqdm import tqdm

In [None]:
sys.path.append('../')
from mymodels.resnet_base_network import ResNet18
from mydata.imageloader import MyDataset, psnrDataUnit

In [None]:
batch_size = 32
data_transforms = torchvision.transforms.Compose([transforms.ToTensor()])

config = yaml.load(open("../config/config.yaml", "r"), Loader=yaml.FullLoader)

# device = 'cpu'
device = 'cuda' if cuda.is_available() else 'cpu'
print(f"Training with: {device}")
if device=='cuda':
    torch.cuda.empty_cache()


# Train-data, Test-data
- shuffle the train data
- dataset
- dataloader

In [None]:
pair_list = []
file_path = "/home/hong/dir1/PyTorch-BYOL/user_1.txt"

f = open(file_path, 'r')
lines = f.readlines()
for line in lines:
    listed = line.split(" ")
    tmp_unit = psnrDataUnit(listed[0], listed[1], listed[2], listed[3])
    pair_list.append(tmp_unit)
f.close()


In [None]:
train_data_path = '/mnt/URP_DS/HR' # without last slash
test_data_path = '/mnt/URP_DS/HR'  #without last slash

# train_data_path = '/mnt/URP_DS/HR' # without last slash
# test_data_path = '/mnt/URP_DS/HR'  #without last slash

img1_idx = 0

train_imgs_psnr_list = []
for p_unit in pair_list:
    # train_imgs_psnr_list.append((train_data_path+'/'+p_unit.getimg1()+'.png', train_data_path+'/'+p_unit.getimg2()+'.png', p_unit.getsrsum()))
    train_imgs_psnr_list.append((train_data_path+'/'+p_unit.getimg1(), train_data_path+'/'+p_unit.getimg2(), p_unit.getsrsum()))

test_imgs_psnr_list = []
for p_unit in pair_list:
    # test_imgs_psnr_list.append((test_data_path+'/'+p_unit.getimg1()+'.png', test_data_path+'/'+p_unit.getimg2()+'.png', p_unit.getsrsum()))
    test_imgs_psnr_list.append((test_data_path+'/'+p_unit.getimg1(), test_data_path+'/'+p_unit.getimg2(), p_unit.getsrsum()))


# split train valid from train paths (80,20), (1, 99) -> just for convention
set_ratio1 = 1
set_ratio2 = 1-set_ratio1

if set_ratio1<1:
    train_imgs_psnr_list, train2_imgs_psnr_list = train_imgs_psnr_list[:int(set_ratio1*len(train_imgs_psnr_list))], train_imgs_psnr_list[int(set_ratio2*len(train_imgs_psnr_list)):]
    test_imgs_psnr_list, test2_imgs_psnr_list = test_imgs_psnr_list[:int(set_ratio1*len(test_imgs_psnr_list))], test_imgs_psnr_list[int(set_ratio2*len(test_imgs_psnr_list)):]

# shuffle the pairs for training
import random
random.shuffle(train_imgs_psnr_list)

print('test_image_path example: ', test_imgs_psnr_list[img1_idx])
print('train_image_path example: ', train_imgs_psnr_list[img1_idx])
print(len(train_imgs_psnr_list), len(test_imgs_psnr_list))

In [None]:
train_dataset = MyDataset(train_imgs_psnr_list, transform=data_transforms)
test_dataset = MyDataset(test_imgs_psnr_list, transform=data_transforms)

In [None]:
print("Input shape:", train_dataset[0][0].shape)

In [None]:
train_loader = DataLoader(train_dataset, batch_size=batch_size,
                          num_workers=0, drop_last=False, shuffle=True, pin_memory=True)

test_loader = DataLoader(test_dataset, batch_size=batch_size,
                          num_workers=0, drop_last=False, shuffle=False, pin_memory=True)

# Encoder loading
load encoder for both images

In [None]:
encoder = ResNet18(**config['network'])
output_feature_dim = encoder.projetion.net[0].in_features
print(output_feature_dim)

In [None]:
#load pre-trained parameters
load_params = torch.load(os.path.join('/home/hong/dir1/PyTorch-BYOL/runs/Sep26_15-10-29_mango2/checkpoints/model.pth'),
                        map_location=torch.device(torch.device(device)))

if 'online_network_state_dict' in load_params:
    encoder.load_state_dict(load_params['online_network_state_dict'])
    print("Parameters successfully loaded.")

# remove the projection head
encoder = torch.nn.Sequential(*list(encoder.children())[:-1])    
encoder = encoder.to(device)

# Encoding

In [None]:
def get_features_from_encoder(encoder, loader):
    
    x1_train = []
    x2_train = []
    y_train = []

    # get the features from the pre-trained model
    # for i, (x1, x2, y) in enumerate(tqdm(loader)):
    
    for i, (x1, x2, y) in enumerate(tqdm(loader)):
        x1=x1.to(device)
        x2=x2.to(device)
        
        with torch.no_grad():
            f_vector1 = encoder(x1)
            f_vector2 = encoder(x2)
            
            f_vector1 = f_vector1.to('cpu')
            f_vector2 = f_vector2.to('cpu')
    
            # x_train.extend(feature_vector)
            x1_train.extend(f_vector1)
            x2_train.extend(f_vector2)
            y_train.extend(y)

        # print(x1.shape, x2.shape, feature_vector.shape, y.shape)

            
    x1_train = torch.stack(x1_train)
    x2_train = torch.stack(x2_train)
    y_train = torch.tensor(y_train)
    return x1_train, x2_train, y_train

In [None]:
encoder.eval()
x1_train, x2_train, y_train = get_features_from_encoder(encoder, train_loader)
x1_test, x2_test, y_test = get_features_from_encoder(encoder, test_loader)

if len(x1_train.shape) > 2:
    print(x1_train.shape)
    x1_train = torch.mean(x1_train, dim=[2, 3])
    x2_train = torch.mean(x2_train, dim=[2, 3])
    x1_test = torch.mean(x1_test, dim=[2, 3])
    x2_test = torch.mean(x2_test, dim=[2, 3])
    
print("Training data shape:", x1_train.shape, y_train.shape)
print("Testing data shape:", x1_test.shape, y_test.shape)

In [None]:
# save tensor array
now = time.localtime()
m1 = { 'x1': x1_train, 'x2': x2_train, 'y':y_train}
m2 = { 'x1': x1_test, 'x2': x2_test, 'y':y_test}

date_dir = "./tensors/run%02d%02d_%02d%02d/" % (now.tm_mon, now.tm_mday, now.tm_hour, now.tm_min)
os.mkdir(date_dir)
train_tensor_path = date_dir+"train.pt"
test_tensor_path = date_dir+"test.pt"
torch.save(m1, train_tensor_path)
torch.save(m2, test_tensor_path)
