In [18]:
import os
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

# Define the path to the tables folder
tables_folder = "tables"

# Function to calculate node sizes based on degree centrality
def calculate_node_sizes(G):
    # Calculate degree centrality
    degree_centrality = nx.degree_centrality(G)
    # Scale node sizes based on degree centrality
    node_sizes = [3000 * centrality for centrality in degree_centrality.values()]
    return node_sizes

# Function to visualize the network
def visualize_network(G, node_sizes, save_folder):
    plt.figure(figsize=(10, 8))
    pos = nx.spring_layout(G, k=0.2, iterations=100)
    # Draw nodes with adjusted sizes
    nx.draw_networkx_nodes(G, pos, node_size=node_sizes, node_color='skyblue', alpha=0.7)
    # Draw edges with varying thickness based on weight
    weights = [edge[2]['weight'] for edge in G.edges(data=True)]
    nx.draw_networkx_edges(G, pos, width=weights, alpha=0.5, edge_color='gray')
    # Draw labels
    nx.draw_networkx_labels(G, pos, font_size=10, font_color='black')
    plt.title("Network Visualization")
    plt.axis('off')
    # Save network visualization as PNG
    save_path = os.path.join(save_folder, "network_visualization.png")
    plt.savefig(save_path)
    plt.close()
    print(f"Network visualization saved at: {save_path}")

# Function to calculate and save centrality to CSV
def calculate_and_save_centrality(G, save_folder):
    # Calculate degree centrality
    degree_centrality = nx.degree_centrality(G)
    # Convert to DataFrame
    centrality_df = pd.DataFrame(degree_centrality.items(), columns=['Node', 'Degree Centrality'])
    # Sort centrality values in descending order
    centrality_df = centrality_df.sort_values(by='Degree Centrality', ascending=False)
    # Save centrality to CSV
    save_path = os.path.join(save_folder, "centrality.csv")
    centrality_df.to_csv(save_path, index=False)
    print(f"Centrality saved at: {save_path}")

# Iterate through each play folder in the tables folder
for play_folder in os.listdir(tables_folder):
    play_folder_path = os.path.join(tables_folder, play_folder)

    # Check if it's a directory
    if os.path.isdir(play_folder_path):
        # Create empty graph
        G = nx.Graph()

        # Look for files ending with "edges.csv" and "nodes.csv" inside the play folder
        edges_df = None
        nodes_df = None
        for file in os.listdir(play_folder_path):
            file_name, file_extension = os.path.splitext(file)
            if file_extension == ".csv":
                file_parts = file_name.split("_")
                if len(file_parts) == 2:
                    if file_parts[1] == "edges":
                        edges_file = os.path.join(play_folder_path, file)
                        # Read edges data
                        edges_df = pd.read_csv(edges_file)
                        print(f"Edges loaded from: {edges_file}")
                        # Add edges with weights
                        for _, edge_data in edges_df.iterrows():
                            source = str(edge_data['Source']).lstrip('#')
                            target = str(edge_data['Target']).lstrip('#')
                            weight = edge_data['Weight']
                            G.add_edge(source, target, weight=weight)
                            G.add_edge(target, source, weight=weight)  # Ensure undirected edges
                    elif file_parts[1] == "nodes":
                        nodes_file = os.path.join(play_folder_path, file)
                        # Read nodes data
                        nodes_df = pd.read_csv(nodes_file)
                        print(f"Nodes loaded from: {nodes_file}")

        if edges_df is not None and nodes_df is not None:
            # Add nodes with labels
            for _, node_data in nodes_df.iterrows():
                node_id = node_data['Id']
                G.add_node(node_id, label=node_data['Label'])

            # Check if the graph is not empty
            if len(G.nodes()) == 0:
                print(f"No nodes found in folder: {play_folder_path}")
            else:
                # Calculate node sizes based on degree centrality
                node_sizes = calculate_node_sizes(G)
                # Save centrality to CSV
                calculate_and_save_centrality(G, play_folder_path)
                # Visualize network and save as PNG
                visualize_network(G, node_sizes, play_folder_path)


Nodes loaded from: tables/el-ano-santo-de-madrid/el-ano-santo-de-madrid_nodes.csv
Edges loaded from: tables/el-ano-santo-de-madrid/el-ano-santo-de-madrid_edges.csv
Centrality saved at: tables/el-ano-santo-de-madrid/centrality.csv
Network visualization saved at: tables/el-ano-santo-de-madrid/network_visualization.png
Nodes loaded from: tables/el-santo-rey-fernando-segunda-parte/el-santo-rey-fernando-segunda-parte_nodes.csv
Edges loaded from: tables/el-santo-rey-fernando-segunda-parte/el-santo-rey-fernando-segunda-parte_edges.csv
Centrality saved at: tables/el-santo-rey-fernando-segunda-parte/centrality.csv
Network visualization saved at: tables/el-santo-rey-fernando-segunda-parte/network_visualization.png
Edges loaded from: tables/la-cena-del-rey-baltasar-auto/la-cena-del-rey-baltasar-auto_edges.csv
Nodes loaded from: tables/la-cena-del-rey-baltasar-auto/la-cena-del-rey-baltasar-auto_nodes.csv
Centrality saved at: tables/la-cena-del-rey-baltasar-auto/centrality.csv
Network visualization