In [109]:
import os
import networkx as nx
from collections import defaultdict
import pandas as pd

In [110]:
path = "../Data/Barcelona/04Stage/Graphs/diGraphs"
pathCSV = "../Data/Barcelona/03Stage/finalMetadataBarcelona.csv"

In [111]:
try:
    # Open the CSV file
    # Create a CSV reader object
    dfScore = pd.read_csv(pathCSV)
except FileNotFoundError:
    print("The file does not exist.")
except Exception as e:
    print("An error occurred:", e)

In [112]:
# Convert 'Difference' column to numeric
dfScore['Difference'] = pd.to_numeric(dfScore['Difference'], errors='coerce')

# Group by the 'Score' column and calculate the size of each group
score_counts = dfScore.groupby('Score').size()

# Filter the DataFrame by the conditions: 'Difference' <= 3, size of 'Score' > 1, and 'Difference' < 0
filtered_df_score = dfScore[((dfScore['Difference'] < 3) & dfScore['Score'].isin(score_counts[score_counts > 1].index)) | (dfScore['Difference'] <= 0) | (dfScore["Score"] == "3_2") | ((dfScore["Score"] == "3_0"))]



In [113]:
filteredMatches = filtered_df_score.groupby('Score').size()
print(filteredMatches)

Score
0_1     9
0_2     3
0_3     1
1_0    28
1_1    13
1_2     3
1_3     1
1_4     1
2_0    18
2_1    12
2_2     2
3_0    14
3_1     7
3_2     1
dtype: int64


In [114]:
print(filteredMatches.keys())

Index(['0_1', '0_2', '0_3', '1_0', '1_1', '1_2', '1_3', '1_4', '2_0', '2_1',
       '2_2', '3_0', '3_1', '3_2'],
      dtype='object', name='Score')


In [115]:
def getScore(fileName):
    parts = fileName.split("_")
    if len(parts) == 4:
        pass
    elif len(parts) == 5 :
        return f"{parts[2]}_{parts[3]}"


In [116]:
def readGraph(dataFolder, fileName):
    try:
        filePath = os.path.join(dataFolder, fileName)
        graph = nx.read_gexf(filePath)
        return graph
    except FileNotFoundError:
        print(f"File '{fileName}' not found in '{dataFolder}'.")
        return None
    except nx.NetworkXError as e:
        print(f"Error reading graph from '{filePath}': {e}")
        return None

In [117]:
# Initialize dictionaries to store metrics for each score
network_metrics = {
    'inDegree_players': {},
    'outDegree_players': {},
    'clustering_coefficients': {},
    'betweenness_centralities': {},
    'closeness_centralities': {},
    'eigenvector_centrality': {},
    'eccentricity': {}
}

In [118]:

# Iterate through all the files in the folder
for fileName in os.listdir(path):
    # Join the folder path with the file name
    graph = readGraph(path, fileName)
    score = getScore(fileName)
    if score in filtered_df_score["Score"].unique():
        # Initialize dictionaries for metrics if they don't exist
        if score not in network_metrics['inDegree_players']:
            network_metrics['inDegree_players'][score] = {}
        if score not in network_metrics['outDegree_players']:
            network_metrics['outDegree_players'][score] = {}
        if score not in network_metrics['clustering_coefficients']:
            network_metrics['clustering_coefficients'][score] = {}
        if score not in network_metrics['betweenness_centralities']:
            network_metrics['betweenness_centralities'][score] = {}
        if score not in network_metrics['closeness_centralities']:
            network_metrics['closeness_centralities'][score] = {}
        if nx.is_strongly_connected(graph):   
            if score not in network_metrics['eigenvector_centrality']:
                network_metrics['eigenvector_centrality'][score] = {}
            if score not in network_metrics['eccentricity']:
                network_metrics['eccentricity'][score] = {}

        # Iterate over nodes in the graph
        for node in graph.nodes():
            # Calculate in-degree and append to inDegree_players dictionary
            in_degree = graph.in_degree(node)
            if node not in network_metrics['inDegree_players'][score]:
                network_metrics['inDegree_players'][score][node] = []
            network_metrics['inDegree_players'][score][node].append(in_degree)
            
            # Calculate out-degree and append to outDegree_players dictionary
            out_degree = graph.out_degree(node)
            if node not in network_metrics['outDegree_players'][score]:
                network_metrics['outDegree_players'][score][node] = []
            network_metrics['outDegree_players'][score][node].append(out_degree)
            
            # Calculate clustering coefficient and store
            clustering_coefficient = nx.clustering(graph, node)
            network_metrics['clustering_coefficients'][score][node] = clustering_coefficient
            
            # Calculate betweenness centrality and store
            betweenness_centrality = nx.betweenness_centrality(graph)[node]
            network_metrics['betweenness_centralities'][score][node] = betweenness_centrality
            
            # Calculate closeness centrality and store
            closeness_centrality = nx.closeness_centrality(graph)[node]
            network_metrics['closeness_centralities'][score][node] = closeness_centrality
            
            if nx.is_strongly_connected(graph):   
                #Calcualte eccentricity and store
                eccentricity = nx.eccentricity(graph)[node]
                network_metrics["eccentricity"][score][node] = eccentricity
                
                #Calcualte eccentricity and store
                eigenvector = nx.eigenvector_centrality(graph)[node]
                network_metrics["eigenvector_centrality"][score][node] = eigenvector
                
                

In [119]:
for score in filtered_df_score["Score"].unique():
    metricsTable = pd.DataFrame()

    for element in network_metrics:
        if score in network_metrics[element]:
            df = pd.DataFrame.from_dict(network_metrics[element][score], orient='index')        
            df_no_nan = df.fillna(" ")
            df_no_blank = df_no_nan.replace(" ", float('nan'))  # Replace blank spaces with NaN
            #df_filled_int = df_no_blank.astype(float).astype('Int64')  # Convert to integers

            #df_string = df_filled_int.to_string(header=False)

            # Calculate mean, standard deviation, and count
            mean_values = df_no_blank.mean(axis=1).round(2)
            std_values = df_no_blank.std(axis=1).round(2)
            count_values = df_no_blank.count(axis=1)
            metricsTable = pd.concat([metricsTable, mean_values.rename('Mean'), std_values.rename('Std'), count_values.rename('Count')], axis=1)
    metricsTable.to_csv(f"Barcelona/Individual/{score}_individualMetrics.csv", index = True)

