In [2]:
import os
import networkx as nx
from collections import defaultdict
import pandas as pd

In [3]:
path = "../Data/Barcelona/04Stage/Graphs/diGraphs"
pathCSV = "../Data/Barcelona/03Stage/finalMetadataBarcelona.csv"

In [4]:
try:
    # Open the CSV file
    # Create a CSV reader object
    dfScore = pd.read_csv(pathCSV)
except FileNotFoundError:
    print("The file does not exist.")
except Exception as e:
    print("An error occurred:", e)

In [5]:
# Convert 'Difference' column to numeric
dfScore['Difference'] = pd.to_numeric(dfScore['Difference'], errors='coerce')

# Group by the 'Score' column and calculate the size of each group
score_counts = dfScore.groupby('Score').size()

# Filter the DataFrame by the conditions: 'Difference' <= 3, size of 'Score' > 1, and 'Difference' < 0
filtered_df_score = dfScore[((dfScore['Difference'] < 3) & dfScore['Score'].isin(score_counts[score_counts > 1].index)) | (dfScore['Difference'] <= 0) | (dfScore["Score"] == "3_2") | ((dfScore["Score"] == "3_0"))]



In [6]:
filteredMatches = filtered_df_score.groupby('Score').size()
print(filteredMatches)

Score
0_1     9
0_2     3
0_3     1
1_0    28
1_1    11
1_2     3
1_3     1
1_4     1
2_0    20
2_1    12
2_2     2
3_0    14
3_1     6
dtype: int64


In [7]:
print(filteredMatches.keys())

Index(['0_1', '0_2', '0_3', '1_0', '1_1', '1_2', '1_3', '1_4', '2_0', '2_1',
       '2_2', '3_0', '3_1'],
      dtype='object', name='Score')


In [8]:
def getScore(fileName):
    parts = fileName.split("_")
    if len(parts) == 4:
        pass
    elif len(parts) == 5 :
        return f"{parts[2]}_{parts[3]}"


In [9]:
def readGraph(dataFolder, fileName):
    try:
        filePath = os.path.join(dataFolder, fileName)
        graph = nx.read_gexf(filePath)
        return graph
    except FileNotFoundError:
        print(f"File '{fileName}' not found in '{dataFolder}'.")
        return None
    except nx.NetworkXError as e:
        print(f"Error reading graph from '{filePath}': {e}")
        return None

In [10]:
# Initialize dictionaries to store metrics for each score
network_metrics = {
    'inDegree_players': {},
    'outDegree_players': {},
    'clustering_coefficients': {},
    'betweenness_centralities': {},
    'closeness_centralities': {},
    'eigenvector_centrality': {},
    'eccentricity': {}
}

In [11]:

# Iterate through all the files in the folder
for fileName in os.listdir(path):
    # Join the folder path with the file name
    graph = readGraph(path, fileName)
    score = getScore(fileName)
    if score in filtered_df_score["Score"].unique():
        # Initialize dictionaries for metrics if they don't exist
        if score not in network_metrics['inDegree_players']:
            network_metrics['inDegree_players'][score] = {}
        if score not in network_metrics['outDegree_players']:
            network_metrics['outDegree_players'][score] = {}
        if score not in network_metrics['clustering_coefficients']:
            network_metrics['clustering_coefficients'][score] = {}
        if score not in network_metrics['betweenness_centralities']:
            network_metrics['betweenness_centralities'][score] = {}
        if score not in network_metrics['closeness_centralities']:
            network_metrics['closeness_centralities'][score] = {}
        if nx.is_strongly_connected(graph):   
            if score not in network_metrics['eigenvector_centrality']:
                network_metrics['eigenvector_centrality'][score] = {}
            if score not in network_metrics['eccentricity']:
                network_metrics['eccentricity'][score] = {}

        # Iterate over nodes in the graph
        for node in graph.nodes():
            # Calculate in-degree and append to inDegree_players dictionary
            in_degree = graph.in_degree(node)
            if node not in network_metrics['inDegree_players'][score]:
                network_metrics['inDegree_players'][score][node] = []
            network_metrics['inDegree_players'][score][node].append(in_degree)
            
            # Calculate out-degree and append to outDegree_players dictionary
            out_degree = graph.out_degree(node)
            if node not in network_metrics['outDegree_players'][score]:
                network_metrics['outDegree_players'][score][node] = []
            network_metrics['outDegree_players'][score][node].append(out_degree)
            
            # Calculate clustering coefficient and store
            clustering_coefficient = nx.clustering(graph, node)
            network_metrics['clustering_coefficients'][score][node] = clustering_coefficient
            
            # Calculate betweenness centrality and store
            betweenness_centrality = nx.betweenness_centrality(graph)[node]
            network_metrics['betweenness_centralities'][score][node] = betweenness_centrality
            
            # Calculate closeness centrality and store
            closeness_centrality = nx.closeness_centrality(graph)[node]
            network_metrics['closeness_centralities'][score][node] = closeness_centrality
            
            if nx.is_strongly_connected(graph):   
                #Calcualte eccentricity and store
                eccentricity = nx.eccentricity(graph)[node]
                network_metrics["eccentricity"][score][node] = eccentricity
                
                #Calcualte eccentricity and store
                eigenvector = nx.eigenvector_centrality(graph)[node]
                network_metrics["eigenvector_centrality"][score][node] = eigenvector
                
                

In [12]:
for score in filtered_df_score["Score"].unique():
       for element in network_metrics:
           if score in network_metrics[element]:
               for keys in network_metrics[element][score]:
                    print(keys)
                    print(network_metrics[element][score][keys])

6616
[1, 8]
5503
[3, 8, 6, 2, 6]
5203
[7, 6, 9, 6, 9, 5, 4, 4, 7]
6379
[3, 6, 8, 7, 7, 3, 5]
5506
[3, 6, 10, 4, 6, 4, 4]
4691
[2, 6, 4, 3, 2, 2, 0]
4324
[0, 6, 6, 4, 6]
4320
[3, 7, 9, 6, 9, 6, 4, 4, 7]
5213
[3, 8, 5, 8, 4, 3, 6, 7]
11094
[3, 5]
6998
[2, 8]
5470
[6, 7, 8, 6, 3, 4]
5211
[8, 8, 5, 5, 5, 4, 6]
5216
[6, 11, 7, 2]
5246
[7, 8, 8, 9, 5, 5, 1, 6]
20055
[5, 2]
6332
[8]
10609
[5, 4, 5, 4, 6]
3508
[8, 6]
7068
[5]
6400
[7]
6616
[3, 5]
5503
[1, 8, 6, 3, 6]
5203
[6, 9, 10, 5, 8, 6, 3, 4, 5]
6379
[5, 7, 8, 6, 7, 4, 5]
5506
[2, 8, 11, 3, 8, 3, 3]
4691
[3, 5, 8, 4, 2, 3, 2]
4324
[1, 5, 8, 6, 6]
4320
[3, 5, 8, 6, 7, 6, 6, 3, 8]
5213
[3, 10, 8, 8, 4, 5, 3, 8]
11094
[2, 5]
6998
[1, 7]
5470
[7, 10, 9, 6, 5, 4]
5211
[7, 9, 9, 6, 5, 6, 6]
5216
[5, 9, 7, 3]
5246
[3, 5, 6, 4, 2, 3, 1, 3]
20055
[8, 5]
6332
[5]
10609
[6, 4, 5, 5, 7]
3508
[4, 3]
7068
[4]
6400
[7]
6616
0.8082191780821918
5503
0.6229508196721312
5203
0.7540983606557377
6379
0.7682926829268293
5506
0.4444444444444444
4691
1.0
4324
0.

INDIVIDUAL TABLES FOR EACH SCORE

In [13]:

for score in filtered_df_score["Score"].unique():
    metricsTable = pd.DataFrame()

    for element in network_metrics:
        if score in network_metrics[element]:
            for keys in network_metrics[element][score]:
                    df = pd.DataFrame(network_metrics[element][score])        
                    df_no_nan = df.fillna(" ")
                    df_no_blank = df_no_nan.replace(" ", float('nan'))  # Replace blank spaces with NaN
                    #df_filled_int = df_no_blank.astype(float).astype('Int64')  # Convert to integers

                    #df_string = df_filled_int.to_string(header=False)

                    # Calculate mean, standard deviation, and count
                    mean_values = df_no_blank.mean(axis=1).round(2)
                    std_values = df_no_blank.std(axis=1).round(2)
                    count_values = df_no_blank.count(axis=1)
                    metricsTable = pd.concat([metricsTable, mean_values.rename('Mean'), std_values.rename('Std'), count_values.rename('Count')], axis=1)
            metricsTable.to_csv(f"Barcelona/{score}_individualMetrics.csv", index = True)



ValueError: All arrays must be of the same length

In [None]:
for score in filtered_df_score["Score"].unique():
    metricsTable = pd.DataFrame()

    for element in network_metrics:
        if score in network_metrics[element]:
            for keys in network_metrics[element][score]:
                data = network_metrics[element][score][keys]  # Get the list data
                df = pd.DataFrame(data, columns=['Values'])  # Create DataFrame from list
                metricsTable = pd.concat([metricsTable, df], axis=1)
                
            metricsTable.to_csv(f"Barcelona/{score}_individualMetrics.csv", index=True)


ValueError: DataFrame constructor not properly called!

INDIVIDUAL TABLE FOR EACH PLAYER

In [None]:
for score in filtered_df_score["Score"].unique():
    metricsTable = pd.DataFrame()

    for element in network_metrics:
        if score in network_metrics[element]:
            df = pd.DataFrame.from_dict(network_metrics[element][score], orient='index')        
            df_no_nan = df.fillna(" ")
            df_no_blank = df_no_nan.replace(" ", float('nan'))  # Replace blank spaces with NaN
            #df_filled_int = df_no_blank.astype(float).astype('Int64')  # Convert to integers

            #df_string = df_filled_int.to_string(header=False)

            # Calculate mean, standard deviation, and count
            mean_values = df_no_blank.mean(axis=1).round(2)
            std_values = df_no_blank.std(axis=1).round(2)
            count_values = df_no_blank.count(axis=1)
            metricsTable = pd.concat([metricsTable, mean_values.rename('Mean'), std_values.rename('Std'), count_values.rename('Count')], axis=1)
    metricsTable.to_csv(f"Barcelona/Individual/{score}_individualMetrics.csv", index = True)

In [None]:
import pandas as pd

# Example dictionary of dictionaries
network_metrics = {
    '6616': {'metric1': [10, 20, 30], 'metric2': [25, 35, 45]},
    '5503': {'metric1': [15, 25, 35], 'metric2': [30, 40, 50]},
    '5203': {'metric1': [12, 22, 32], 'metric2': [27, 37, 47]}
}

# Iterate over inner dictionary to group values by metric
for metric, values_dict in network_metrics['6616'].items():
    # Collect all values across different keys for the same metric
    metric_values = [values_dict[metric] for key, values_dict in network_metrics.items() if metric in values_dict]
    # Flatten the list of lists
    metric_values_flat = [value for sublist in metric_values for value in sublist]
    # Create DataFrame
    df = pd.DataFrame({metric: metric_values_flat})
    # Save DataFrame to CSV
    df.to_csv(f'{metric}_values.csv', index=False)
