In [None]:
import pandas as pd

df = pd.read_csv('test_mix.csv', low_memory=False)
pd.set_option('display.max_columns', None)
print(df.head())

In [None]:
import pandas as pd
import networkx as nx
from collections import defaultdict
import community as community_louvain

# Initialize a MultiGraph to accommodate multiple edges between the same nodes
G = nx.MultiGraph()


required_columns = [
    'Src IP', 'Dst IP', 'Src Port', 'Dst Port', 'Protocol',
    'Flow Duration', 'TotLen Fwd Pkts', 'Init Fwd Win Byts', 'Init Bwd Win Byts',
    'Fwd Header Len', 'Bwd Header Len', 'Fwd Pkt Len Mean', 'Bwd Pkt Len Mean',
    'Idle Min', 'Idle Max', 'Idle Mean', 'Pkt Len Var', 'Pkt Size Avg'
]

# Check for missing columns in the DataFrame
missing_columns = [col for col in required_columns if col not in df.columns]
if missing_columns:
    print(f"The following required columns are missing from the DataFrame: {missing_columns}")
else:
    print("All required columns are present in the DataFrame.")


if not missing_columns:
    for index, row in df.iterrows():
        edge_id = (row['Src IP'], row['Dst IP'])

        # Network Communication Edge
        G.add_edge(*edge_id, interaction='network_communication',
                   src_port=row['Src Port'], dst_port=row['Dst Port'], protocol=row['Protocol'],
                   flow_duration=row['Flow Duration'], totlen_fwd_pkts=row['TotLen Fwd Pkts'], label=row['Label'])

        # Context of Device and Environment Edge
        G.add_edge(*edge_id, interaction='context',
                   init_fwd_win_byts=row['Init Fwd Win Byts'], init_bwd_win_byts=row['Init Bwd Win Byts'],
                   fwd_header_len=row['Fwd Header Len'], bwd_header_len=row['Bwd Header Len'],
                   fwd_pkt_len_mean=row['Fwd Pkt Len Mean'], bwd_pkt_len_mean=row['Bwd Pkt Len Mean'], label=row['Label'])

        # Knowledge Graph Edge
        G.add_edge(*edge_id, interaction='knowledge',
                   idle_min=row['Idle Min'], idle_max=row['Idle Max'], idle_mean=row['Idle Mean'],
                   pkt_len_var=row['Pkt Len Var'], pkt_size_avg=row['Pkt Size Avg'],label=row['Label'])

   

    # Detect communities using the Louvain method on the undirected version of the graph
    partition = community_louvain.best_partition(G.to_undirected())


    with open("graph_output.txt", "w") as f:
        f.write(f"Number of nodes: {G.number_of_nodes()}\n")
        f.write(f"Number of edges: {G.number_of_edges()}\n\n")





In [None]:
from collections import defaultdict
import networkx as nx
import community as community_louvain



# Initialize a dictionary to hold lists of nodes for each community
communities = defaultdict(list)

# Group nodes by their community
for node, community in partition.items():
    communities[community].append(node)

# Print the number of communities and their nodes
print(f"Total communities: {len(communities)}\n")

# Save communities information to a file
with open("communities_output_test.txt", "w") as f:
    for community, nodes in communities.items():
        f.write(f"Community {community} has {len(nodes)} nodes: {nodes}\n")
    f.write("\n")

# For each community, extract subgraphs from the original graph
for community in communities:
    subgraph = G.subgraph(communities[community])
    print(f"Community {community} subgraph has {subgraph.number_of_nodes()} nodes and {subgraph.number_of_edges()} edges")

