In [None]:
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

from PIL import Image, ImageEnhance
import cv2

import os
import datetime
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import torch
import math
import _pickle as pickle
import pandas as pd
import copy
import networkx as nx

from os import listdir
from os.path import isfile, join
from natsort import natsorted
from skimage.segmentation import mark_boundaries

import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (20,15)

# Implemented imports
import sys
sys.path.append('../')
from proposed_method_lib.graph import segment_images, plot_nx_graph, save_graphs_to_file, save_item_with_pickle, load_item_with_pickle, get_indices_and_labels_for_time_frame_without_labels
from proposed_method_lib.graph_node import create_time_series_graphs, get_nx_graph, get_nx_graph_fast, get_all_possible_graphs, get_all_possible_graphs_fast, join_graphs_if_joint_nodes, is_segment_classified_with_only_one_class, get_segment_raw_image, get_features, add_spatial_connections_to_graphs, update_nx_graph_features

from numba.typed import List, Dict
from numba import njit, jit

import json

from networkx.algorithms.operators.all import compose_all
from scipy.stats import mode

In [2]:
def improve_path(path):
    path = os.path.abspath(path)
    if path.startswith(u"\\\\"):
            path=u"\\\\?\\UNC\\"+path[2:]
    else:
            path=u"\\\\?\\"+path
    return path

def load_graph(file_path):
    """Load a graph from a pickle file."""
    with open(file_path, 'rb') as f:
        graph = pickle.load(f)
    return graph

def save_graph(graph, file_path):
    """Save a graph to a pickle file."""
    p = pickle.Pickler(open(file_path,"wb")) 
    p.fast = True 
    p.dump(graph)

def number_of_subfolders(path):
    count1 = 0
    for root, dirs, files in os.walk(path):
            count1 += len(dirs)

    return count1

def calculate_mean_std(dataloader, num_channels, batch_size=32):
    """
    Calculate channel-wise mean and standard deviation for a dataset.
    Args:
        dataset (Dataset): PyTorch dataset (train dataset).
        batch_size (int): Batch size for DataLoader.
    Returns:
        tuple: mean and standard deviation (each as a list of 3 values for RGB).
    """
    mean = torch.zeros(num_channels)
    std = torch.zeros(num_channels)
    n_samples = 0

    for images, _ in dataloader:  # Assuming dataset returns (image, label)
        # Flatten the images to (batch_size, channels, -1) and compute statistics
        images = images.view(images.size(0), images.size(1), -1)
        mean += images.mean(dim=[0, 2]) * images.size(0)
        std += images.std(dim=[0, 2]) * images.size(0)
        n_samples += images.size(0)

    mean /= n_samples
    std /= n_samples

    return mean.tolist(), std.tolist()

def process_data_folders(base_path):
    """Process all folders under the base directory."""
    
    for folder in os.listdir(base_path):
        folder_path = os.path.join(base_path, folder)
        print("Working:", folder_path)

        mean = np.zeros(4, dtype=np.float32)
        std = np.zeros(4, dtype=np.float32)
        n_samples = 0
        
        if os.path.isdir(folder_path):
            # Paths for 'train' and 'val' subdirectories
            train_folder = os.path.join(folder_path, "train")
            
            # Process and combine the graphs in 'train' folder
            if os.path.exists(train_folder):
                for subdir, _, files in os.walk(train_folder):
                    if subdir != train_folder:
                        print("-- Working on subdirectory:", subdir)
                        current_folder_original_data_path = "E:\PhD\prepared_dynamic_earth_net_data/train/" + subdir.split("\\")[-1] + "/image"
                        file_names = [f for f in listdir(current_folder_original_data_path) if isfile(join(current_folder_original_data_path, f))]

                        for file_name in file_names:
                            raw_image = np.load(current_folder_original_data_path + "/" + file_name)

                            mean += np.mean(raw_image, axis=(0,1))
                            std += np.std(raw_image, axis=(0,1))
                            n_samples += 1

                            
        # Calculate means and stds
        mean /= n_samples
        std /= n_samples

        # File path
        json_file_path = os.path.join(folder_path, "train_set_normalization_data.json")
        
        # Create the dictionary
        normalization_data = {
            "means": {
                "R": str(mean[0]),
                "G": str(mean[1]),
                "B": str(mean[2]),
                "infra": str(mean[3])
            },
            "stds": {
                "R": str(std[0]),
                "G": str(std[1]),
                "B": str(std[2]),
                "infra": str(std[3])
            }
        }
        
        # Save to JSON file
        with open(json_file_path, "w") as json_file:
            json.dump(normalization_data, json_file, indent=4)
        
        print(f"JSON file saved at: {json_file_path}")

In [3]:
# Main function call
base_path = "./prepared_data/"
process_data_folders(base_path)

Working: ./prepared_data/interval_monthly_padding_32_bbox_size_32_layers_rgb_plus_infrared_sc_10_sigma_1.0_min_size_50
-- Working on subdirectory: ./prepared_data/interval_monthly_padding_32_bbox_size_32_layers_rgb_plus_infrared_sc_10_sigma_1.0_min_size_50\train\1311_3077_13
-- Working on subdirectory: ./prepared_data/interval_monthly_padding_32_bbox_size_32_layers_rgb_plus_infrared_sc_10_sigma_1.0_min_size_50\train\1417_3281_13
-- Working on subdirectory: ./prepared_data/interval_monthly_padding_32_bbox_size_32_layers_rgb_plus_infrared_sc_10_sigma_1.0_min_size_50\train\1487_3335_13
-- Working on subdirectory: ./prepared_data/interval_monthly_padding_32_bbox_size_32_layers_rgb_plus_infrared_sc_10_sigma_1.0_min_size_50\train\1700_3100_13
-- Working on subdirectory: ./prepared_data/interval_monthly_padding_32_bbox_size_32_layers_rgb_plus_infrared_sc_10_sigma_1.0_min_size_50\train\2006_3280_13
-- Working on subdirectory: ./prepared_data/interval_monthly_padding_32_bbox_size_32_layers_rgb_