# üåÅ Image normalization statistiques

# üìö Libraries

In [26]:
import os
import sys
import rasterio
import numpy as np
from tqdm.notebook import tqdm
from glob import glob
import torch
import json
import torchvision.transforms as T

sys.path.append(os.pardir)

# üõ©Ô∏è Mean & Std for Aerial images

In [5]:
def read_tif(path_file):
    with rasterio.open(path_file) as f:
        image = f.read()
        image = torch.from_numpy(image)
        image = image.type(torch.uint8)  # from 0 to 255

    return image

In [19]:
def compute_stats(template_path_image, filename, num_channels, open_file, max_channels):
    # Initialize variables to store accumulated pixel values
    channel_sum = torch.zeros(num_channels)
    channel_squared_diff_sum = torch.zeros(num_channels)
    total_pixels = 0

    # Iterate over the image paths
    list_path_image = glob(template_path_image, recursive=True)
    for path_image in tqdm(list_path_image, desc='Compute sum by channel'):
        # Open the image
        image = open_file(path_image)
        image = image / max_channels
            
        # Reshape the image to a 2D array of pixels (height * width, channels)
        pixels = image.view(-1, num_channels)
        
        # Accumulate channel sums
        channel_sum += pixels.sum(dim=0)
        
        # Update the total number of pixels
        total_pixels += pixels.shape[0]

    # Compute mean values for each channel
    channel_mean = channel_sum / total_pixels

    for path_image in tqdm(list_path_image, desc='Compute squared diff sum by channel'):
        # Open the image
        image = read_tif(path_image)
        image = image / max_channels
            
        # Reshape the image to a 2D array of pixels (height * width, channels)
        pixels = image.view(-1, num_channels)
        
        # Accumulate squared differences from the mean
        diff = pixels - channel_mean
        channel_squared_diff_sum += (diff * diff).sum(dim=0)

    # Compute standard deviation values for each channel
    channel_std = torch.sqrt(channel_squared_diff_sum / total_pixels)

    dataset_dict = {
        'mean': channel_mean.tolist(),
        'std': channel_std.tolist()
    }

    with open(filename, 'w', encoding='UTF8') as f:
        json.dump(dataset_dict, f)

In [20]:
template_path_image = os.path.join(os.pardir, 'data', 'raw', 'train', 'aerial', '**', '*.tif')
filename = os.path.join(os.pardir, 'data', 'raw', 'aerial_pixels_metadata.json', )
compute_stats(template_path_image, filename, 5, read_tif, 255)

Compute sum by channel:   0%|          | 0/61712 [00:00<?, ?it/s]

Compute squared diff sum by channel:   0%|          | 0/61712 [00:00<?, ?it/s]

In [None]:
def compute_stats(template_path_image, filename, num_channels, open_file, max_channels):
    # Initialize variables to store accumulated pixel values
    channel_sum = torch.zeros(num_channels)
    channel_squared_diff_sum = torch.zeros(num_channels)
    total_pixels = 0

    # Iterate over the image paths
    list_path_image = glob(template_path_image, recursive=True)
    for path_image in tqdm(list_path_image, desc='Compute sum by channel'):
        # Open the image
        image = open_file(path_image)
        image = image / max_channels
            
        # Reshape the image to a 2D array of pixels (height * width, channels)
        pixels = image.view(-1, num_channels)
        
        # Accumulate channel sums
        channel_sum += pixels.sum(dim=0)
        
        # Update the total number of pixels
        total_pixels += pixels.shape[0]

    # Compute mean values for each channel
    channel_mean = channel_sum / total_pixels

    for path_image in tqdm(list_path_image, desc='Compute squared diff sum by channel'):
        # Open the image
        image = read_tif(path_image)
        image = image / max_channels
            
        # Reshape the image to a 2D array of pixels (height * width, channels)
        pixels = image.view(-1, num_channels)
        
        # Accumulate squared differences from the mean
        diff = pixels - channel_mean
        channel_squared_diff_sum += (diff * diff).sum(dim=0)

    # Compute standard deviation values for each channel
    channel_std = torch.sqrt(channel_squared_diff_sum / total_pixels)

    dataset_dict = {
        'mean': channel_mean.tolist(),
        'std': channel_std.tolist()
    }

    with open(filename, 'w', encoding='UTF8') as f:
        json.dump(dataset_dict, f)

In [46]:
np.load('../data/raw/train/sen/D004_2021/Z1_NN/sen/SEN2_sp_D004_2021-Z1_NN_data.npy').reshape()

(42, 10, 207, 207)

In [None]:
template_path_image = os.path.join(os.pardir, 'data', 'raw', 'train', 'sen', '**', '*.npy')
filename = os.path.join(os.pardir, 'data', 'raw', 'sen_pixels_metadata.json')
compute_stats(template_path_image, filename, 5, np.load)

In [25]:
image = read_tif('../data/raw/train/aerial/D004_2021/Z1_NN/img/IMG_000001.tif')
image = image / 255.00
filename = os.path.join(os.pardir, 'data', 'aerial_pixels_metadata.json')
with open(filename) as f:
    stats = json.load(f)
T.Normalize(mean=stats['mean'], std=stats['std'])(image)

tensor([[[-0.5043, -0.6432, -0.7127,  ..., -0.2785,  0.0167, -0.1570],
         [-0.6259, -0.6953, -0.6259,  ..., -0.0701, -0.2438, -0.3306],
         [-0.7127, -0.7648, -0.5217,  ..., -0.1222, -0.5043, -0.4696],
         ...,
         [-1.2684, -1.2684, -1.2858,  ...,  0.6245, -0.0528, -0.1222],
         [-1.2858, -1.2684, -1.2684,  ...,  0.4161,  0.0514, -0.0180],
         [-1.2337, -1.2684, -1.2684,  ...,  0.3293,  0.3293, -0.1222]],

        [[-0.4348, -0.5738, -0.6432,  ..., -0.1917,  0.1383, -0.0701],
         [-0.5738, -0.6259, -0.5564,  ...,  0.0167, -0.1570, -0.2438],
         [-0.6606, -0.6606, -0.4696,  ..., -0.0180, -0.4175, -0.4175],
         ...,
         [-1.0600, -1.0948, -1.1121,  ...,  0.7461,  0.0167, -0.1049],
         [-1.0948, -1.0600, -1.0948,  ...,  0.5724,  0.0862,  0.0514],
         [-1.0600, -1.0948, -1.0948,  ...,  0.4856,  0.4682, -0.0701]],

        [[-0.5217, -0.6606, -0.6953,  ..., -0.1570,  0.2251,  0.0167],
         [-0.6606, -0.7127, -0.6432,  ...,  0