In [12]:
import os
import networkx as nx
from collections import defaultdict
import pandas as pd

In [13]:
path = "../Data/Barcelona/04Stage/Graphs/diGraphs"
pathCSV = "../Data/Barcelona/03Stage/finalMetadataBarcelona.csv"

In [14]:
try:
    # Open the CSV file
    # Create a CSV reader object
    dfScore = pd.read_csv(pathCSV)
except FileNotFoundError:
    print("The file does not exist.")
except Exception as e:
    print("An error occurred:", e)

In [15]:
# Convert 'Difference' column to numeric
dfScore['Difference'] = pd.to_numeric(dfScore['Difference'], errors='coerce')

# Group by the 'Score' column and calculate the size of each group
score_counts = dfScore.groupby('Score').size()

# Filter the DataFrame by the conditions: 'Difference' <= 3, size of 'Score' > 1, and 'Difference' < 0
filtered_df_score = dfScore[((dfScore['Difference'] < 3) & dfScore['Score'].isin(score_counts[score_counts > 1].index)) | (dfScore['Difference'] <= 0) | (dfScore["Score"] == "3_2") | ((dfScore["Score"] == "3_0"))]

In [16]:
def getScore(fileName):
    parts = fileName.split("_")
    if len(parts) == 4:
        pass
    elif len(parts) == 5 :
        return f"{parts[2]}_{parts[3]}"

In [17]:
def readGraph(dataFolder, fileName):
    try:
        filePath = os.path.join(dataFolder, fileName)
        graph = nx.read_gexf(filePath)
        return graph
    except FileNotFoundError:
        print(f"File '{fileName}' not found in '{dataFolder}'.")
        return None
    except nx.NetworkXError as e:
        print(f"Error reading graph from '{filePath}': {e}")
        return None

In [18]:
# Initialize dictionaries to store metrics for each score
network_metrics = {
    'average_clustering': {},
    'density': {},
    #'global_efficiency': {},
    'diameter': {}
}

In [19]:

# Iterate through all the files in the folder
for fileName in os.listdir(path):
    # Join the folder path with the file name
    graph = readGraph(path, fileName)
    score = getScore(fileName)
    if score in filtered_df_score["Score"].unique():
        # Initialize dictionaries for metrics if they don't exist
        if score not in network_metrics['average_clustering']:
            network_metrics['average_clustering'][score] = []
        if score not in network_metrics['density']:
            network_metrics['density'][score] = []
        #if score not in network_metrics['global_efficiency']:
            #network_metrics['global_efficiency'][score] = {}
        if nx.is_strongly_connected(graph):   
            if score not in network_metrics['diameter']:
                network_metrics['diameter'][score] = []

        # Calculate clustering coefficient and store
        average_clustering = nx.average_clustering(graph)
        network_metrics['average_clustering'][score].append(average_clustering)
        
        #Calculate betweenness centrality and store
        density = nx.density(graph)
        network_metrics['density'][score].append(density)
        
        #Calculate closeness centrality and store
        #global_efficiency = nx.local_efficiency(graph)
        #network_metrics['global_efficiency'][score]= global_efficiency
            
        if nx.is_strongly_connected(graph):   
            #Calcualte eccentricity and store
            diameter = nx.diameter(graph)
            network_metrics["diameter"][score].append(diameter)
            
            

In [20]:
network_metrics

{'average_clustering': {'0_1': [0.45436117936117937,
   0.691912022045527,
   0.7639105584223681,
   0.632833387191292,
   0.7332883554256678,
   0.5366863021210097,
   0.45849424916702314,
   0.4772548093809091,
   0.6911668130878258],
  '0_2': [0.6543267215553988, 0.10476190476190476, 0.3957571430337648],
  '1_2': [0.7203986929421033, 0.41704423084345554, 0.6349333122113149],
  '1_0': [0.5424826880147292,
   0.5326840378010442,
   0.8289962195394007,
   0.5225904543836233,
   0.5933505871493133,
   0.1596153846153846,
   0.7509075735719534,
   0.49963369963369964,
   0.7936241155490794,
   0.8384977662801755,
   0.6657303699520561,
   0.8189530939897509,
   0.8636779322947937,
   0.5985762784846099,
   0.6427392301102021,
   0.8460093060835949,
   0.7654290572483805,
   0.6909234524693549,
   0.62415641992989,
   0.6810186474012775,
   0.7222663234143618,
   0.40371753387624554,
   0.5878039624324125,
   0.575563833278374,
   0.5273678112200851,
   0.5609748841493768,
   0.8020844300

In [21]:
metricsTable = pd.DataFrame()

for element in network_metrics:
    
    df = pd.DataFrame.from_dict(network_metrics[element], orient='index')        
    df_no_nan = df.fillna(" ")
    df_no_blank = df_no_nan.replace(" ", float('nan'))  # Replace blank spaces with NaN
    #df_filled_int = df_no_blank.astype(float).astype('Int64')  # Convert to integers

    #df_string = df_filled_int.to_string(header=False)

    # Calculate mean, standard deviation, and count
    mean_values = df_no_blank.mean(axis=1).round(2)
    std_values = df_no_blank.std(axis=1).round(2)
    count_values = df_no_blank.count(axis=1)
    metricsTable = pd.concat([metricsTable, mean_values.rename('Mean'), std_values.rename('Std'), count_values.rename('Count')], axis=1)
metricsTable.to_csv(f"Barcelona/Global/globalMetrics.csv", index = True)



In [22]:
metricsTable

Unnamed: 0,Mean,Std,Count,Mean.1,Std.1,Count.1,Mean.2,Std.2,Count.2
0_1,0.6,0.12,9,0.53,0.16,9,2.71,0.49,7.0
0_2,0.38,0.27,3,0.35,0.15,3,5.0,,1.0
1_2,0.59,0.16,3,0.51,0.17,3,2.5,0.71,2.0
1_0,0.64,0.16,28,0.57,0.17,28,2.96,0.66,26.0
2_0,0.58,0.22,18,0.55,0.15,18,3.0,1.1,16.0
3_0,0.6,0.12,14,0.54,0.12,14,3.0,0.41,13.0
1_1,0.44,0.19,13,0.4,0.17,13,3.12,0.64,8.0
2_1,0.5,0.18,12,0.45,0.16,12,3.82,1.17,11.0
2_2,0.3,0.05,2,0.26,0.07,2,4.0,,1.0
3_2,0.69,,1,0.65,,1,3.0,,1.0
