# Package

In [None]:
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import netCDF4
from skimage import io, transform, exposure, filters
import numpy as np
from scipy.fft import fft2, ifft2, fftshift
import fnmatch, os, glob
import json
import gc
import h5py
from scipy.ndimage import zoom
from scipy.stats import entropy
from sklearn.decomposition import PCA

import scipy.io
from scipy.io import savemat

from PIL import Image
import imageio

from matplotlib import cm
from scipy.ndimage import convolve

# Navigate through file

In [None]:
base_path = os.path.expanduser("/data/omar/data_sets/model_based_15Nov/")
data_names  = ['africa', 'Canda', 'east_US', 'Gulf', 'France', 'India', 'Ireland', 'Iceland', 'Madag', 'Malaz&Singa', 'Mex', 'Norway&Swedeen', 'Newzaland' ,  'UK','west_US']
data_paths = {data_name: os.path.join(base_path, data_name) for data_name in data_names}
band_names = ['BAND2_RADIANCE','BAND3_RADIANCE','BAND4_RADIANCE','BAND5_RADIANCE', 'BAND6_RADIANCE','BAND7_RADIANCE',  'BAND8_RADIANCE']



# Plotting

In [None]:
def plot_hyperspectral_images_regions(lr_imgs, hr_imgs, band_name, bands=[30, 50, 70], cmap='terrain'):
    num_regions = len(lr_imgs)
    fig, axes = plt.subplots(1, num_regions, figsize=(15, 5))

    for i in range(num_regions):
        hr_color = hr_imgs[i][:, :, bands]
        hr_color = (hr_color - hr_color.min()) / (hr_color.max() - hr_color.min())
        axes[i].imshow(hr_color, cmap=cmap)
        axes[i].set_title(f"HR {i+1}")
        axes[i].axis("off")


    plt.tight_layout()
    plt.show()
    plt.close(fig)


In [None]:
def read_images(data_name, path):
    for band_name in band_names:
        print(band_name)
        lr_imgs = []
        hr_imgs = []
        
        region_index = 1  
        while True:
            hr_path = os.path.join(path, f"{data_name}_{band_name}_region_{region_index}_cropped_hyper.mat")
            lr_path = os.path.join(path, f"{data_name}_{band_name}_region_{region_index}_cropped_hyper_LR4.mat")
            
            if not os.path.exists(hr_path) or not os.path.exists(lr_path):
                break  
            hr_img = scipy.io.loadmat(hr_path)['radiance']
            hr_imgs.append(hr_img)

            lr_img = scipy.io.loadmat(lr_path)['radiance']
            lr_imgs.append(lr_img)

            region_index += 1
        
        print("region_index", region_index)
        plot_hyperspectral_images_regions(lr_imgs, hr_imgs, band_name)

    return hr_imgs


# Data

In [None]:
def find_radiance_variable(group): # recursively find radinace
    for var_name in group.variables:
        if var_name == 'radiance':
            return group.variables[var_name][:]
    for subgroup_name in group.groups:
        result = find_radiance_variable(group.groups[subgroup_name])
        if result is not None:
            return result
    print('Radiance variable not found')
    return None

In [None]:
def save_to_hdf5(file_name, data, band_name):
    with h5py.File(file_name, 'a') as f:
        f.create_dataset(band_name, data=data)


In [None]:
def load_from_hdf5(file_name, band_name):
    with h5py.File(file_name, 'r') as f:
        data = f[band_name][:]
    return data

In [None]:
def save_images_mat(img, file_name):
    image_array = np.array(img)
    scipy.io.savemat(file_name, {'radiance': image_array})

In [None]:
def process_files_toHDF5(data_name, data_path):
    files_path = data_path
    output_dir = data_path
    files_path = glob.glob(os.path.join(files_path, 'BAND*.nc'))
    
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    for file_path in files_path:
        band_name_prefix = os.path.basename(file_path).split("_")[0][-1]
        band_name = f"BAND{band_name_prefix}_RADIANCE"
        
        if band_name in band_names:
            try:
                hdf5_file = os.path.join(output_dir, f"{data_name}_{band_name}.h5")
                if os.path.exists(hdf5_file):
                    print(f"Skipping {band_name} as it already exists.")
                    continue
                dataset = netCDF4.Dataset(file_path)
                radiance_data = find_radiance_variable(dataset.groups[band_name])
                print(f'Original shape for {band_name}:', radiance_data.shape)
                
                save_to_hdf5(hdf5_file, radiance_data, band_name)
            except (KeyError, OSError) as e:
                print(f"WARNING: Error processing {band_name}: {e}")
        else:
            print(f"WARNING: No matching band name found for file {band_name}")
    
    print("Data has been saved.")


In [None]:
def channel_crop_all_save(data_name, data_path):
    crop_height = 512
    crop_width = 256

    region_index = 0  
    
    for band_name in band_names:
        input_file_path = os.path.join(data_path, f"{data_name}_{band_name}.h5")
        data = load_from_hdf5(input_file_path, band_name)
        region_index = 0  
        
        print(band_name)
        if band_name in ['BAND8_RADIANCE', 'BAND7_RADIANCE']:
            crop_width = 215
        else:
            crop_width = 256

        _, height, width, _ = data.shape
        
        for start_h in range(0, height, crop_height):
            for start_w in range(0, width, crop_width):
                if start_h + crop_height > height or start_w + crop_width > width:
                    continue
                cropped_radiance = data[0, start_h:start_h + crop_height, start_w:start_w + crop_width, :]
                
                region_index += 1

                file_name = os.path.join(data_path, f"{data_name}_{band_name}_region_{region_index}_cropped_hyper.mat")
                save_images_mat(cropped_radiance, file_name)

                print(f"Data has been saved for region {region_index} starting at ({start_h}, {start_w}).")
