In [1]:
import os 
import torch 
import numpy as np
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from torchvision import transforms, utils
import torch.nn as nn
import matplotlib.pyplot as plt
device = 'cuda' if torch.cuda.is_available() else 'cpu'
import sys

ModuleNotFoundError: No module named 'torchvision'

Documentation [here](https://pytorch.org/tutorials/beginner/basics/data_tutorial.html) plus an example of when there is too much data to fit into ram. In this case, the __getitem__ method uses indx to find the right file, i.e., each image is stored in a separate .npy save file. Probably the best option is to have the file structure:  

-------------
scripts  
- data    
    - sdo  
        - obsid 
            - image .npy files 
    - iris
        - obsid
            - image .npy files 
-------------


do this and make two directories, one for IRIS and the other for AIA images. Also if they are slightly aligned and not just random, store the images in either sdo or iris directories with the same index. 

In [2]:
os.chdir('D:/Example CR IRIS/GAN_data')
obs_list = os.listdir('iris/')
print("Total: ", len(obs_list))
x_train_obs ,x_test_obs = train_test_split(obs_list,test_size=0.2)
print("Train obsid: ",len(x_train_obs))
print("Test obsid: ",len(x_test_obs))

Total:  19
Train obsid:  15
Test obsid:  4


In [3]:
# Define personal dataset
#from torch.utils.dataset import Dataset,Dataloader

class AIAIRISdataset(Dataset):
    def __init__(self,obs,aia_root_dir,iris_root_dir,aia_transform = None, iris_transform = None):
        # Load data. If there is too much data to keep in ram then we will just define file paths
        # to individual images and load them one at a time, afterward we can use multiple workers to parallelize 
        # the feeding in of data to the models. 
        super(AIAIRISdataset, self).__init__() 
        
        print(aia_transform)

                
        aia_arr = []
        iris_arr = []
        for i in obs:
            print(i)
            root_dir = aia_root_dir + '/' + i
            aia_bands = os.listdir(root_dir)
            
            for j in aia_bands:
                aia_file_dir = root_dir + '/' + j
                #print(aia_file_dir)
                aia_files = os.listdir(aia_file_dir)
                #aia_arr = []
                
                for aia_file in aia_files:
                    ar = np.load(aia_file_dir + '/' + aia_file)
                    data = ar.astype(np.float32)
                    #ar = torch.from_numpy(data)
                    
                    #ar = data.byteswap().newbyteorder()
                    
                    aia_arr.append(data)
                 
                #self.aia_test_data = np.array(aia_arr)
                #self.aia_transform = aia_transform
            
            
            
            root_dir = iris_root_dir + '/' + i
            iris_bands = os.listdir(root_dir)
            for j in iris_bands:
                iris_file_dir = root_dir + '/' +j
                iris_files = os.listdir(iris_file_dir)
                #iris_arr = []
                for iris_file in iris_files:
                    ar = np.load(iris_file_dir + '/' + iris_file)
                    data = ar.astype(np.float32)
                    
                    #ar = data.byteswap().newbyteorder()
                    
                    #ar = torch.from_numpy(data)
                    iris_arr.append(data)
                    
                #self.iris_test_data = np.array(iris_arr)
                #self.iris_transform = iris_transform
                
        self.aia_test_data = np.array(aia_arr)
        print(len(self.aia_test_data))
        
        
        
        self.aia_transform = aia_transform  
        
        self.iris_test_data = np.array(iris_arr)
        self.iris_transform = iris_transform
        
        
        
        
        
    def __len__(self):
        # So the loader knows when an epoch is reached
        return self.iris_test_data.shape[0]

    def __getitem__(self, index):
        # Select single images and turn into PyTorch tensors
        aia_im = torch.from_numpy(self.aia_test_data[index])
        iris_im = torch.from_numpy(self.iris_test_data[index])
        #aia_im.aia_im.to(device)
        #iris_im.iris_im.to(device)
        
        if self.aia_transform:
            aia_im = self.aia_transform(aia_im)
            
        if self.iris_transform:
            iris_im = self.iris_transform(iris_im)
        
        
            
        
        
        return aia_im, iris_im 

In [4]:
aia_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((512,512)),
    transforms.RandomRotation(90),
    #transforms.RandomCrop(224),
    transforms.ToTensor()
    
])

In [5]:
iris_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((1024,1024)),
    transforms.RandomRotation(90),
    #transforms.RandomCrop(224),
    transforms.ToTensor()
])

In [None]:
train_dataset = AIAIRISdataset(x_train_obs,'sdo','iris',aia_transform,iris_transform) # create an instance of the dataset
train_data_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=0) # turn into a dataloader

Compose(
    ToPILImage()
    Resize(size=(512, 512), interpolation=PIL.Image.BILINEAR)
    RandomRotation(degrees=(-90, 90), resample=False, expand=False)
    ToTensor()
)
20180830_135414_3620110404
20160212_122145_3690113103
20131208_230830_3880262154
20160212_135911_3690113103
20220923_103842_3690133103
20131206_023345_3800260154
20131119_171530_3893012103


In [None]:
test_dataset = AIAIRISdataset(x_test_obs,'sdo','iris',aia_transform) # create an instance of the dataset
test_data_loader = DataLoader(test_dataset, batch_size=1, shuffle=True, num_workers=0) # turn into a dataloader

In [None]:
x_train, y_train = next(iter(train_data_loader))
print(x_train.shape, y_train.shape)

In [None]:
x_test, y_test = next(iter(test_data_loader))
print(x_test.shape, y_test.shape)

In [None]:
_, ax = plt.subplots(4, 2, figsize=(10, 15))
indices = torch.randperm(len(x_train))[:4]
print(indices)
for i, samples in enumerate(zip(x_train[indices], y_train[indices])):
    aia = (((samples[0][0]).numpy()).astype(np.uint8))
    iris = (((samples[1][0]).numpy()).astype(np.uint8))
    ax[i, 0].imshow(aia)
    ax[i, 1].imshow(iris)
plt.show()