# Create the train datastracture for 3 edges

In [1]:
import pandas as pd

# Specify the features to keep
features_to_keep = [
    'Timestamp', 'Src IP', 'Src Port', 'Dst IP', 'Protocol', 'TotLen Fwd Pkts', 'TotLen Bwd Pkts',
    'Fwd Pkt Len Max', 'Fwd Pkt Len Mean', 'Fwd Pkt Len Std', 'Bwd Pkt Len Mean',
    'Bwd Pkt Len Std', 'Flow Byts/s', 'Flow IAT Std', 'Flow IAT Min',
    'Fwd IAT Std', 'Bwd IAT Tot', 'Bwd IAT Mean', 'Bwd IAT Std', 'Bwd IAT Max',
    'Bwd IAT Min', 'Fwd Header Len', 'Pkt Len Max', 'Pkt Len Mean', 'Pkt Len Std',
    'Pkt Len Var', 'FIN Flag Cnt', 'SYN Flag Cnt', 'PSH Flag Cnt', 'ACK Flag Cnt',
    'Pkt Size Avg', 'Fwd Seg Size Avg', 'Bwd Seg Size Avg', 'Subflow Fwd Byts',
    'Subflow Bwd Byts', 'Init Fwd Win Byts', 'Fwd Act Data Pkts', 'Fwd Seg Size Min',
    'Idle Mean', 'Idle Max', 'Idle Min', 'Label'
]

# Load your dataset
data = pd.read_csv('train_data.csv')  # Replace 'your_data.csv' with your actual file name

# Keep only the specified features
filtered_data = data[features_to_keep]

# Convert 'Timestamp' to datetime
filtered_data['Timestamp'] = pd.to_datetime(filtered_data['Timestamp'])

# Order the data by 'Timestamp'
filtered_data = filtered_data.sort_values(by='Timestamp')

# Save the temporally ordered data to a new file
filtered_data.to_csv('filtered_train_3edge.csv', index=False)

print("Filtered and temporally ordered data saved to 'filtered_train_3edge.csv'.")


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_data['Timestamp'] = pd.to_datetime(filtered_data['Timestamp'])


Filtered and temporally ordered data saved to 'filtered_train_3edge.csv'.


In [2]:
#check for inside of csv (just for test, no need for run)
import pandas as pd

# Load the CSV file
file_path = "filtered_train_3edge.csv"  # Replace with your actual file path
df = pd.read_csv(file_path)

# Check if the label column contains '1'
label_column = 'Label'  # Replace with the actual label column name if different
if label_column in df.columns:
    label_distribution = df[label_column].value_counts()
    print("Label Distribution:")
    print(label_distribution)

    if 1 in label_distribution.index:
        print("The CSV contains label '1'.")
    else:
        print("The CSV does NOT contain label '1'.")
else:
    print(f"'{label_column}' column not found in the CSV.")

Label Distribution:
Label
1    1978039
0    1208711
Name: count, dtype: int64
The CSV contains label '1'.


# created hourly graph with 3 edges from train dataset

In [3]:
import pandas as pd
import networkx as nx
import os

def create_test_graphs_edge_labels(df, output_dir):
    """
    Split the DataFrame into hourly slices and create graphs for each slice.
    Each edge gets a valid label (e.g., 0 or 1) read from the DataFrame.
    
    Parameters:
        df (pd.DataFrame): The input DataFrame with temporal data.
        output_dir (str): Directory to save the graphs.
    """
    # Ensure output directory exists
    os.makedirs(output_dir, exist_ok=True)
    
    # Group the DataFrame into hourly slices using the datetime index.
    # (Assumes the DataFrame index is already a DateTimeIndex)
    time_slices = [g for _, g in df.groupby(pd.Grouper(freq='H'))]
    
    for slice_index, slice_df in enumerate(time_slices):
        if slice_df.empty:
            continue

        # Print value counts of the 'Label' column in this time-slice.
        print(f"Hour {slice_index}:")
        print(slice_df['Label'].value_counts())
        
        # Create a MultiDiGraph for this time-slice.
        G = nx.MultiDiGraph()

        for _, row in slice_df.iterrows():
            src_ip = row['Src IP']
            dst_ip = row['Dst IP']
            
            # Convert the label to an int (if missing or invalid, you can decide a fallback; here we assume it is valid)
            try:
                label = int(row['Label'])
            except Exception as e:
                print(f"Skipping row due to invalid label: {row['Label']}; error: {e}")
                continue

            if pd.isna(src_ip) or pd.isna(dst_ip):
                continue

            # Add nodes if not already present.
            if not G.has_node(src_ip):
                G.add_node(src_ip)
            if not G.has_node(dst_ip):
                G.add_node(dst_ip)

            # Add edges for different interactions.
            # 1. Network Edge
            G.add_edge(src_ip, dst_ip, key='network',
                       label=label,
                       src_port=row.get('Src Port', 0),
                       protocol=row.get('Protocol', 0),
                       fwd_header_len=row.get('Fwd Header Len', 0),
                       syn_flag_cnt=row.get('SYN Flag Cnt', 0),
                       psh_flag_cnt=row.get('PSH Flag Cnt', 0),
                       ack_flag_cnt=row.get('ACK Flag Cnt', 0),
                       fin_flag_cnt=row.get('FIN Flag Cnt', 0),
                       subflow_fwd_byts=row.get('Subflow Fwd Byts', 0),
                       subflow_bwd_byts=row.get('Subflow Bwd Byts', 0),
                       init_fwd_win_byts=row.get('Init Fwd Win Byts', 0),
                       interaction='network_communication')

            # 2. Context Edge
            G.add_edge(src_ip, dst_ip, key='context',
                       label=label,
                       flow_iat_std=row.get('Flow IAT Std', 0),
                       flow_iat_min=row.get('Flow IAT Min', 0),
                       fwd_iat_std=row.get('Fwd IAT Std', 0),
                       bwd_iat_tot=row.get('Bwd IAT Tot', 0),
                       bwd_iat_mean=row.get('Bwd IAT Mean', 0),
                       bwd_iat_std=row.get('Bwd IAT Std', 0),
                       bwd_iat_max=row.get('Bwd IAT Max', 0),
                       bwd_iat_min=row.get('Bwd IAT Min', 0),
                       idle_mean=row.get('Idle Mean', 0),
                       idle_max=row.get('Idle Max', 0),
                       idle_min=row.get('Idle Min', 0),
                       interaction='context')

            # 3. Knowledge Edge
            G.add_edge(src_ip, dst_ip, key='knowledge',
                       label=label,
                       totlen_fwd_pkts=row.get('TotLen Fwd Pkts', 0),
                       totlen_bwd_pkts=row.get('TotLen Bwd Pkts', 0),
                       fwd_pkt_len_max=row.get('Fwd Pkt Len Max', 0),
                       fwd_pkt_len_mean=row.get('Fwd Pkt Len Mean', 0),
                       fwd_pkt_len_std=row.get('Fwd Pkt Len Std', 0),
                       bwd_pkt_len_mean=row.get('Bwd Pkt Len Mean', 0),
                       bwd_pkt_len_std=row.get('Bwd Pkt Len Std', 0),
                       flow_byts_per_sec=row.get('Flow Byts/s', 0),
                       pkt_len_max=row.get('Pkt Len Max', 0),
                       pkt_len_mean=row.get('Pkt Len Mean', 0),
                       pkt_len_std=row.get('Pkt Len Std', 0),
                       pkt_len_var=row.get('Pkt Len Var', 0),
                       pkt_size_avg=row.get('Pkt Size Avg', 0),
                       fwd_seg_size_avg=row.get('Fwd Seg Size Avg', 0),
                       bwd_seg_size_avg=row.get('Bwd Seg Size Avg', 0),
                       fwd_act_data_pkts=row.get('Fwd Act Data Pkts', 0),
                       fwd_seg_size_min=row.get('Fwd Seg Size Min', 0),
                       interaction='knowledge')

        # Save the graph as a .gpickle file.
        graph_path = os.path.join(output_dir, f"test_graph_hour_{slice_index}.gpickle")
        nx.write_gpickle(G, graph_path)
        print(f"Test graph for hour {slice_index} saved to {graph_path}")

# Usage Example for graph creation
if __name__ == "__main__":
    # Read CSV and prepare DataFrame.
    df_test = pd.read_csv('filtered_train_3edge.csv')
    df_test['Timestamp'] = pd.to_datetime(df_test['Timestamp'])
    # Set Timestamp as index and sort (required for grouping by hour)
    df_test = df_test.set_index('Timestamp').sort_index()

    output_test_dir = "3ed_trai_h_graphs"
    create_test_graphs_edge_labels(df_test, output_test_dir)


Hour 0:
Label
0    558
Name: count, dtype: int64
Test graph for hour 0 saved to 3ed_trai_h_graphs/test_graph_hour_0.gpickle
Hour 1:
Label
0    475
Name: count, dtype: int64
Test graph for hour 1 saved to 3ed_trai_h_graphs/test_graph_hour_1.gpickle
Hour 2:
Label
0    551
Name: count, dtype: int64
Test graph for hour 2 saved to 3ed_trai_h_graphs/test_graph_hour_2.gpickle
Hour 3:
Label
0    1251
Name: count, dtype: int64
Test graph for hour 3 saved to 3ed_trai_h_graphs/test_graph_hour_3.gpickle
Hour 4:
Label
0    1513
Name: count, dtype: int64
Test graph for hour 4 saved to 3ed_trai_h_graphs/test_graph_hour_4.gpickle
Hour 5:
Label
0    1397
Name: count, dtype: int64
Test graph for hour 5 saved to 3ed_trai_h_graphs/test_graph_hour_5.gpickle
Hour 6:
Label
0    1468
Name: count, dtype: int64
Test graph for hour 6 saved to 3ed_trai_h_graphs/test_graph_hour_6.gpickle
Hour 7:
Label
0    1382
Name: count, dtype: int64
Test graph for hour 7 saved to 3ed_trai_h_graphs/test_graph_hour_7.gpickle
Hou

# Community detection for graphs and then update the graph with the label of community for each node

In [4]:
import networkx as nx
import os

def detect_and_label_communities_lpa(graph):
    """
    Perform community detection using the Label Propagation Algorithm (LPA) and label nodes with community IDs.
    Adds 'x' attribute based on the 'community' label.

    Parameters:
        graph (nx.MultiDiGraph): Input graph.

    Returns:
        graph (nx.MultiDiGraph): Updated graph with community labels and 'x' attributes.
    """
    # Convert MultiDiGraph to Graph (undirected graph for LPA)
    undirected_graph = nx.Graph(graph)

    # Perform community detection using LPA
    communities = nx.community.label_propagation_communities(undirected_graph)

    # Assign community labels to nodes and add 'x' attribute
    for community_id, community in enumerate(communities):
        for node in community:
            graph.nodes[node]['community'] = community_id
            graph.nodes[node]['x'] = [community_id]  # 'x' is a feature; wrap in a list for PyTorch Geometric compatibility

    return graph


def process_graphs_with_lpa(input_dir, output_dir):
    """
    Detect communities using LPA, update graphs with community labels, and add 'x' attribute.
    
    Parameters:
        input_dir (str): Directory containing input graphs.
        output_dir (str): Directory to save updated graphs.
    """
    # Ensure output directory exists
    os.makedirs(output_dir, exist_ok=True)

    # Process each graph file in the input directory
    for graph_file in os.listdir(input_dir):
        if not graph_file.endswith('.gpickle'):
            continue
        
        # Load the graph
        graph_path = os.path.join(input_dir, graph_file)
        G = nx.read_gpickle(graph_path)

        # Detect communities using LPA and label nodes
        G = detect_and_label_communities_lpa(G)

        # Save the updated graph
        updated_graph_path = os.path.join(output_dir, graph_file)
        nx.write_gpickle(G, updated_graph_path)
        print(f"Updated graph with LPA communities and 'x' attribute saved to {updated_graph_path}")


# Example usage
if __name__ == "__main__":
    # Input directory containing graphs
    input_graph_dir = "3ed_trai_h_graphs"

    # Output directory for updated graphs
    output_graph_dir = "3ed_trai_h_graphs_commun"

    # Process graphs and add community labels using LPA
    process_graphs_with_lpa(input_graph_dir, output_graph_dir)



Updated graph with LPA communities and 'x' attribute saved to 3ed_trai_h_graphs_commun/test_graph_hour_14.gpickle
Updated graph with LPA communities and 'x' attribute saved to 3ed_trai_h_graphs_commun/test_graph_hour_17.gpickle
Updated graph with LPA communities and 'x' attribute saved to 3ed_trai_h_graphs_commun/test_graph_hour_13.gpickle
Updated graph with LPA communities and 'x' attribute saved to 3ed_trai_h_graphs_commun/test_graph_hour_18.gpickle
Updated graph with LPA communities and 'x' attribute saved to 3ed_trai_h_graphs_commun/test_graph_hour_1.gpickle
Updated graph with LPA communities and 'x' attribute saved to 3ed_trai_h_graphs_commun/test_graph_hour_12.gpickle
Updated graph with LPA communities and 'x' attribute saved to 3ed_trai_h_graphs_commun/test_graph_hour_11.gpickle
Updated graph with LPA communities and 'x' attribute saved to 3ed_trai_h_graphs_commun/test_graph_hour_6.gpickle
Updated graph with LPA communities and 'x' attribute saved to 3ed_trai_h_graphs_commun/tes

# convert Multigraph to hetrodata

In [5]:
import torch
from torch_geometric.data import HeteroData
import networkx as nx
import os

def multiDiGraph_to_hetero_with_label(G: nx.MultiDiGraph) -> HeteroData:
    """
    Converts a MultiDiGraph with multiple edge types to a HeteroData object.
    Preserves the 'label' field in data[rel_type].edge_label.
    """
    data = HeteroData()
    node_mapping = {node: i for i, node in enumerate(G.nodes())}
    data['ip'].num_nodes = G.number_of_nodes()

    # Add node-level features
    x = []
    community_labels = []
    for node in G.nodes():
        community = G.nodes[node].get('community', -1)
        community_labels.append(community)
        x.append([community])
    data['ip'].community = torch.tensor(community_labels, dtype=torch.long)
    data['ip'].x = torch.tensor(x, dtype=torch.float)

    # Process each edge from G.
    for u, v, key, edge_attrs in G.edges(data=True, keys=True):
        src = node_mapping[u]
        dst = node_mapping[v]
        rel_type = ('ip', key, 'ip')
        if rel_type not in data.edge_types:
            data[rel_type].edge_index = []
            data[rel_type].edge_attr = []
            data[rel_type].edge_label = []  # Container for the label

        data[rel_type].edge_index.append([src, dst])
        feature_vec = []
        if key == 'network':
            for attr_name in ['Src Port', 'Protocol', 'Fwd Header Len', 'SYN Flag Cnt',
                              'PSH Flag Cnt', 'ACK Flag Cnt', 'FIN Flag Cnt',
                              'Subflow Fwd Byts', 'Subflow Bwd Byts', 'Init Fwd Win Byts']:
                feature_vec.append(edge_attrs.get(attr_name, 0))
        elif key == 'context':
            for attr_name in ['Flow IAT Std', 'Flow IAT Min', 'Fwd IAT Std', 'Bwd IAT Tot',
                              'Bwd IAT Mean', 'Bwd IAT Std', 'Bwd IAT Max', 'Bwd IAT Min',
                              'Idle Mean', 'Idle Max', 'Idle Min']:
                feature_vec.append(edge_attrs.get(attr_name, 0))
        elif key == 'knowledge':
            for attr_name in ['TotLen Fwd Pkts', 'TotLen Bwd Pkts', 'Fwd Pkt Len Max',
                              'Fwd Pkt Len Mean', 'Fwd Pkt Len Std', 'Bwd Pkt Len Mean',
                              'Bwd Pkt Len Std', 'Flow Byts/s', 'Pkt Len Max', 'Pkt Len Mean',
                              'Pkt Len Std', 'Pkt Len Var', 'Pkt Size Avg', 'Fwd Seg Size Avg',
                              'Bwd Seg Size Avg', 'Fwd Act Data Pkts', 'Fwd Seg Size Min']:
                feature_vec.append(edge_attrs.get(attr_name, 0))
        data[rel_type].edge_attr.append(feature_vec)
        # Save the label—this should now be valid (0 or 1)
        data[rel_type].edge_label.append(edge_attrs.get('label', -1))

    # Convert lists to tensors.
    for rel_type in data.edge_types:
        data[rel_type].edge_index = torch.tensor(data[rel_type].edge_index, dtype=torch.long).t().contiguous()
        if data[rel_type].edge_attr:
            data[rel_type].edge_attr = torch.tensor(data[rel_type].edge_attr, dtype=torch.float)
        if data[rel_type].edge_label:
            data[rel_type].edge_label = torch.tensor(data[rel_type].edge_label, dtype=torch.long)
    return data

def process_and_save_hetero_graphs_with_label(input_dir, output_dir):
    """
    Converts all .gpickle graphs in a directory to HeteroData objects and saves them as .pt,
    preserving the 'label' field in data[rel_type].edge_label.
    """
    os.makedirs(output_dir, exist_ok=True)
    for graph_file in os.listdir(input_dir):
        if not graph_file.endswith('.gpickle'):
            continue
        graph_path = os.path.join(input_dir, graph_file)
        G = nx.read_gpickle(graph_path)
        hetero_data = multiDiGraph_to_hetero_with_label(G)
        hetero_path = os.path.join(output_dir, graph_file.replace('.gpickle', '.pt'))
        torch.save(hetero_data, hetero_path)
        print(f"Saved HeteroData with labels to {hetero_path}")

if __name__ == "__main__":
    input_test_dir = "3ed_trai_h_graphs_commun"         # Input .gpickle files (with communities added)
    output_test_pt_dir = "3ed_trai_h_graphs_hetero_graphs" # Output .pt files
    process_and_save_hetero_graphs_with_label(input_test_dir, output_test_pt_dir)


Saved HeteroData with labels to 3ed_trai_h_graphs_hetero_graphs/test_graph_hour_14.pt
Saved HeteroData with labels to 3ed_trai_h_graphs_hetero_graphs/test_graph_hour_17.pt
Saved HeteroData with labels to 3ed_trai_h_graphs_hetero_graphs/test_graph_hour_13.pt
Saved HeteroData with labels to 3ed_trai_h_graphs_hetero_graphs/test_graph_hour_18.pt
Saved HeteroData with labels to 3ed_trai_h_graphs_hetero_graphs/test_graph_hour_1.pt
Saved HeteroData with labels to 3ed_trai_h_graphs_hetero_graphs/test_graph_hour_12.pt
Saved HeteroData with labels to 3ed_trai_h_graphs_hetero_graphs/test_graph_hour_11.pt
Saved HeteroData with labels to 3ed_trai_h_graphs_hetero_graphs/test_graph_hour_6.pt
Saved HeteroData with labels to 3ed_trai_h_graphs_hetero_graphs/test_graph_hour_5.pt
Saved HeteroData with labels to 3ed_trai_h_graphs_hetero_graphs/test_graph_hour_4.pt
Saved HeteroData with labels to 3ed_trai_h_graphs_hetero_graphs/test_graph_hour_2.pt
Saved HeteroData with labels to 3ed_trai_h_graphs_hetero_gr

# Test for inside of graph, no need to run it

In [17]:
#was test for inside of .pt ( no need to run)
import torch
import os

def inspect_pt_file(file_path):
    """
    Inspects the contents of a .pt file and prints its structure.

    Parameters:
        file_path (str): Path to the .pt file.
    """
    data = torch.load(file_path)
    print(f"Inspecting file: {file_path}")
    print("-" * 40)

    # Check if it's a PyTorch Geometric HeteroData object
    if isinstance(data, dict):
        print("File contains a dictionary. Keys:")
        for key, value in data.items():
            print(f"  {key}: {type(value)}")
            if isinstance(value, torch.Tensor):
                print(f"    Tensor shape: {value.shape}")
    elif hasattr(data, 'keys') and hasattr(data, 'edge_index_dict'):
        print("File contains a HeteroData object.")
        print(f"Node types: {data.node_types}")
        for node_type in data.node_types:
            print(f"  Node type '{node_type}':")
            if 'x' in data[node_type]:
                print(f"    Node features 'x': shape {data[node_type].x.shape}")
            else:
                print("    No node features ('x') found.")
            if 'num_nodes' in data[node_type]:
                print(f"    Number of nodes: {data[node_type].num_nodes}")
        
        print(f"Edge types: {data.edge_types}")
        for edge_type in data.edge_types:
            print(f"  Edge type {edge_type}:")
            if 'edge_index' in data[edge_type]:
                print(f"    Edge index: shape {data[edge_type].edge_index.shape}")
            if 'edge_attr' in data[edge_type]:
                print(f"    Edge attributes: shape {data[edge_type].edge_attr.shape}")
    else:
        print("Unknown data format.")
    print("-" * 40)

def inspect_all_pt_files(directory):
    """
    Inspects all .pt files in a given directory.

    Parameters:
        directory (str): Path to the directory containing .pt files.
    """
    print(f"Inspecting .pt files in directory: {directory}")
    for file in os.listdir(directory):
        if file.endswith(".pt"):
            inspect_pt_file(os.path.join(directory, file))

# Directory containing your .pt files
input_graph_dir = "3ed_trai_h_graphs_hetero_graphs"

# Inspect all files in the directory
inspect_all_pt_files(input_graph_dir)


Inspecting .pt files in directory: 3ed_trai_h_graphs_hetero_graphs
Inspecting file: 3ed_trai_h_graphs_hetero_graphs/graph_hour_1876.pt
----------------------------------------
File contains a HeteroData object.
Node types: ['ip']
  Node type 'ip':
    Node features 'x': shape torch.Size([17604, 1])
    Number of nodes: 17604
Edge types: [('ip', 'network', 'ip'), ('ip', 'context', 'ip'), ('ip', 'knowledge', 'ip')]
  Edge type ('ip', 'network', 'ip'):
    Edge index: shape torch.Size([2, 19038])
    Edge attributes: shape torch.Size([19038, 10])
  Edge type ('ip', 'context', 'ip'):
    Edge index: shape torch.Size([2, 19038])
    Edge attributes: shape torch.Size([19038, 11])
  Edge type ('ip', 'knowledge', 'ip'):
    Edge index: shape torch.Size([2, 19038])
    Edge attributes: shape torch.Size([19038, 17])
----------------------------------------
Inspecting file: 3ed_trai_h_graphs_hetero_graphs/graph_hour_1967.pt
----------------------------------------
File contains a HeteroData objec

  data = torch.load(file_path)


Inspecting file: 3ed_trai_h_graphs_hetero_graphs/graph_hour_1885.pt
----------------------------------------
File contains a HeteroData object.
Node types: ['ip']
  Node type 'ip':
    Node features 'x': shape torch.Size([163, 1])
    Number of nodes: 163
Edge types: [('ip', 'network', 'ip'), ('ip', 'context', 'ip'), ('ip', 'knowledge', 'ip')]
  Edge type ('ip', 'network', 'ip'):
    Edge index: shape torch.Size([2, 147])
    Edge attributes: shape torch.Size([147, 10])
  Edge type ('ip', 'context', 'ip'):
    Edge index: shape torch.Size([2, 147])
    Edge attributes: shape torch.Size([147, 11])
  Edge type ('ip', 'knowledge', 'ip'):
    Edge index: shape torch.Size([2, 147])
    Edge attributes: shape torch.Size([147, 17])
----------------------------------------
Inspecting file: 3ed_trai_h_graphs_hetero_graphs/graph_hour_1906.pt
----------------------------------------
File contains a HeteroData object.
Node types: ['ip']
  Node type 'ip':
    Node features 'x': shape torch.Size([18

In [13]:
#was test for inside of graph ( no need to run)
import os
import networkx as nx

def inspect_community_in_gpickle(file_path):
    """
    Inspects the presence of the 'community' attribute in a .gpickle file.

    Parameters:
        file_path (str): Path to the .gpickle file.
    """
    print(f"Inspecting file: {file_path}")
    print("-" * 40)

    # Load the graph
    G = nx.read_gpickle(file_path)

    # Check for 'community' attribute in nodes
    if all('community' in G.nodes[node] for node in G.nodes()):
        print(f"All nodes have a 'community' attribute.")
        print("Sample 'community' values:")
        sample_communities = {node: G.nodes[node]['community'] for node in list(G.nodes)[:10]}
        print(sample_communities)
    else:
        missing = [node for node in G.nodes() if 'community' not in G.nodes[node]]
        print(f"Some nodes are missing the 'community' attribute. Missing nodes: {missing[:10]} (only showing first 10)")

    print(f"Total nodes: {len(G.nodes())}")
    print("-" * 40)


def inspect_all_gpickle_files(directory):
    """
    Inspects the 'community' attribute in all .gpickle files in a given directory.

    Parameters:
        directory (str): Path to the directory containing .gpickle files.
    """
    print(f"Inspecting .gpickle files in directory: {directory}")
    for file in os.listdir(directory):
        if file.endswith(".gpickle"):
            inspect_community_in_gpickle(os.path.join(directory, file))


# Directory containing your .gpickle files
input_graph_dir = "3ed_trai_h_graphs_commun"

# Inspect all files in the directory for the 'community' attribute
inspect_all_gpickle_files(input_graph_dir)


Inspecting .gpickle files in directory: 3ed_trai_h_graphs_commun
Inspecting file: 3ed_trai_h_graphs_commun/graph_hour_1886.gpickle
----------------------------------------
All nodes have a 'community' attribute.
Sample 'community' values:
{'41.24.115.254': 0, '192.168.1.190': 0, '183.69.192.168': 1, '1.1.192.168': 1, '252.229.192.168': 2, '1.190.172.17': 2, '41.72.66.76': 3, '192.168.1.32': 3, '41.184.177.116': 4, '192.168.1.31': 4}
Total nodes: 198
----------------------------------------
Inspecting file: 3ed_trai_h_graphs_commun/graph_hour_2.gpickle
----------------------------------------
All nodes have a 'community' attribute.
Sample 'community' values:
{'228.243.192.168': 0, '1.152.192.168': 0, '217.97.192.168': 0, '177.30.87.144': 1, '192.168.1.1': 1, '120.187.192.168': 2, '1.152.3.122': 2, '93.53.192.168': 0, '244.15.103.115': 3, '192.168.1.79': 3}
Total nodes: 60
----------------------------------------
Inspecting file: 3ed_trai_h_graphs_commun/graph_hour_1408.gpickle
---------