In [None]:
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

from PIL import Image, ImageEnhance
import cv2

import os
import datetime
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import torch
import math
import _pickle as pickle
import pandas as pd
import copy
import networkx as nx

from os import listdir
from os.path import isfile, join
from natsort import natsorted
from skimage.segmentation import mark_boundaries

import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (20,15)

# Implemented imports
import sys
sys.path.append('../')
from proposed_method_lib.graph import segment_images, plot_nx_graph, save_graphs_to_file, save_item_with_pickle, load_item_with_pickle, get_indices_and_labels_for_time_frame_without_labels
from proposed_method_lib.graph_node import create_time_series_graphs, get_nx_graph, get_nx_graph_fast, get_all_possible_graphs, get_all_possible_graphs_fast, join_graphs_if_joint_nodes, is_segment_classified_with_only_one_class, get_segment_raw_image, get_features, add_spatial_connections_to_graphs, update_nx_graph_features

from numba.typed import List, Dict
from numba import njit, jit

import json

from networkx.algorithms.operators.all import compose_all
from scipy.stats import mode

In [3]:
def load_graph(file_path):
    """Load a graph from a pickle file."""
    with open(file_path, 'rb') as f:
        graph = pickle.load(f)
    return graph

def save_graph(graph, file_path):
    """Save a graph to a pickle file."""
    p = pickle.Pickler(open(file_path,"wb")) 
    p.fast = True 
    p.dump(graph)

def number_of_subfolders(path):
    count1 = 0
    for root, dirs, files in os.walk(path):
            count1 += len(dirs)

    return count1

def combine_graphs_in_folder(folder_path):
    """Combine all graphs in the folder and its subfolders, ensuring unique node labels."""
    combined_graph = nx.DiGraph()
    node_offset = 0  # To keep track of node labels across multiple graphs

    # Traverse through all subfolders inside the folder_path
    number_subfolders = number_of_subfolders(folder_path)
    count_subdirs = 0
    for subdir, _, files in os.walk(folder_path):
        count_subdirs += 1
        current_unique_node_labels = np.unique(list(nx.get_node_attributes(combined_graph, "label").values()))
        print("-- Working:", "...", subdir[-17:], "(", count_subdirs, "/", number_subfolders, ")", "Num. of nodes/edges in combined graph:", combined_graph.number_of_nodes(), combined_graph.number_of_edges(), "Unique labels in combined graph:", current_unique_node_labels)
        
        for file in files:
            if file == "graph.pickle":
                file_path = os.path.join(subdir, file)
                
                # Load the graph
                graph = load_graph(file_path)

                print("--- Number of nodes in loaded graph:", graph.number_of_nodes(), "(", file_path[-30:], ")")
                
                # Relabel the graph to avoid node label conflicts
                graph_relabel = nx.relabel_nodes(graph, lambda x, offset=node_offset: x + offset)
                
                # Update node offset to prevent overlap in the next graph
                node_offset += graph_relabel.number_of_nodes()
                
                # Compose the current graph with the combined graph
                combined_graph = nx.compose(combined_graph, graph_relabel)

    return combined_graph

def process_data_folders(base_path):
    """Process all folders under the base directory."""
    for folder in os.listdir(base_path):
        folder_path = os.path.join(base_path, folder)
        print("Working:", folder_path)
        
        if os.path.isdir(folder_path):
            # Paths for 'train' and 'val' subdirectories
            train_folder = os.path.join(folder_path, "train")
            val_folder = os.path.join(folder_path, "val")
            
            # Process and combine the graphs in 'train' folder
            if os.path.exists(train_folder):
                print("- Working:", train_folder)
                combined_train_graph = combine_graphs_in_folder(train_folder)
                save_graph(combined_train_graph, os.path.join(train_folder, "combined_graph.pickle"))
                print(f"Saved combined graph for 'train' folder: {train_folder}")
            
            # Process and combine the graphs in 'val' folder
            if os.path.exists(val_folder):
                print("- Working on:", val_folder)
                combined_val_graph = combine_graphs_in_folder(val_folder)
                save_graph(combined_val_graph, os.path.join(val_folder, "combined_graph.pickle"))
                print(f"Saved combined graph for 'val' folder: {val_folder}")

In [4]:
# Main function call
base_path = "./prepared_data/"
process_data_folders(base_path)

Working: ./prepared_data/interval_monthly_padding_32_bbox_size_32_layers_rgb_plus_infrared_sc_10_sigma_1.0_min_size_50
- Working: ./prepared_data/interval_monthly_padding_32_bbox_size_32_layers_rgb_plus_infrared_sc_10_sigma_1.0_min_size_50\train
-- Working: ... min_size_50\train ( 1 / 55 ) Num. of nodes/edges in combined graph: 0 0 Unique labels in combined graph: []
-- Working: ... rain\1311_3077_13 ( 2 / 55 ) Num. of nodes/edges in combined graph: 0 0 Unique labels in combined graph: []
--- Number of nodes in loaded graph: 113142 ( rain\1311_3077_13\graph.pickle )
-- Working: ... rain\1417_3281_13 ( 3 / 55 ) Num. of nodes/edges in combined graph: 113142 1210313 Unique labels in combined graph: [0 2 4 5]
--- Number of nodes in loaded graph: 110233 ( rain\1417_3281_13\graph.pickle )
-- Working: ... rain\1487_3335_13 ( 4 / 55 ) Num. of nodes/edges in combined graph: 223375 2348419 Unique labels in combined graph: [0 1 2 4 5]
--- Number of nodes in loaded graph: 122026 ( rain\1487_3335_1