In [434]:
import os
import networkx as nx
from collections import defaultdict
import numpy as np
import pandas as pd
import csv


In [435]:
path = "../Data/Barcelona/04Stage/Graphs/diGraphs"
pathCSV = "../Data/Barcelona/03Stage/finalMetadataBarcelona.csv"

In [436]:
inDegree_players = defaultdict(list)
outDegree_players = defaultdict(list)

In [437]:
try:
    # Open the CSV file
    # Create a CSV reader object
    dfScore = pd.read_csv(pathCSV)
except FileNotFoundError:
    print("The file does not exist.")
except Exception as e:
    print("An error occurred:", e)

In [438]:
# Convert 'Difference' column to numeric
dfScore['Difference'] = pd.to_numeric(dfScore['Difference'], errors='coerce')

# Group by the 'Score' column and calculate the size of each group
score_counts = dfScore.groupby('Score').size()

# Filter the DataFrame by the conditions: 'Difference' <= 3, size of 'Score' > 1, and 'Difference' < 0
filtered_df_score = dfScore[((dfScore['Difference'] < 3) & dfScore['Score'].isin(score_counts[score_counts > 1].index)) | (dfScore['Difference'] <= 0) | (dfScore["Score"] == "3_2") | ((dfScore["Score"] == "3_0"))]



In [439]:
filteredMatches = filtered_df_score.groupby('Score').size()
print(filteredMatches)

Score
0_1     9
0_2     3
0_3     1
1_0    28
1_1    13
1_2     3
1_3     1
1_4     1
2_0    18
2_1    12
2_2     2
3_0    14
3_1     7
3_2     1
dtype: int64


In [440]:
def getScore(fileName):
    parts = fileName.split("_")
    if len(parts) == 4:
        pass
    elif len(parts) == 5 :
        return f"{parts[2]}_{parts[3]}"


In [441]:
def readGraph(dataFolder, fileName):
    try:
        filePath = os.path.join(dataFolder, fileName)
        graph = nx.read_gexf(filePath)
        return graph
    except FileNotFoundError:
        print(f"File '{fileName}' not found in '{dataFolder}'.")
        return None
    except nx.NetworkXError as e:
        print(f"Error reading graph from '{filePath}': {e}")
        return None

In [442]:
# Initialize dictionaries to store metrics for each score
inDegree_players = {}
outDegree_players = {}
clustering_coefficients = {}
betweenness_centralities = {}
closeness_centralities = {}

# Iterate through all the files in the folder
for fileName in os.listdir(path):
    # Join the folder path with the file name
    graph = readGraph(path, fileName)
    score = getScore(fileName)
    if score in filtered_df_score["Score"].unique():
        # Initialize dictionaries for metrics if they don't exist
        if score not in inDegree_players:
            inDegree_players[score] = {}
        if score not in outDegree_players:
            outDegree_players[score] = {}
        if score not in clustering_coefficients:
            clustering_coefficients[score] = {}
        if score not in betweenness_centralities:
            betweenness_centralities[score] = {}
        if score not in closeness_centralities:
            closeness_centralities[score] = {}

        # Iterate over nodes in the graph
        for node in graph.nodes():
            # Print node label        
            # Calculate in-degree and append to inDegree_players dictionary
            in_degree = graph.in_degree(node)
            if node not in inDegree_players[score]:
                inDegree_players[score][node] = []
            inDegree_players[score][node].append(in_degree)
            
            # Calculate out-degree and append to outDegree_players dictionary
            out_degree = graph.out_degree(node)
            if node not in outDegree_players[score]:
                outDegree_players[score][node] = []
            outDegree_players[score][node].append(out_degree)
            
            # Calculate clustering coefficient and store
            clustering_coefficient = nx.clustering(graph, node)
            clustering_coefficients[score][node] = clustering_coefficient
            
            # Calculate betweenness centrality and store
            betweenness_centrality = nx.betweenness_centrality(graph)[node]
            betweenness_centralities[score][node] = betweenness_centrality
            
            # Calculate closeness centrality and store
            closeness_centrality = nx.closeness_centrality(graph)[node]
            closeness_centralities[score][node] = closeness_centrality


In [446]:
import pandas as pd

score = '0_1'
node_outdegrees = outDegree_players[score]

df = pd.DataFrame.from_dict(node_outdegrees, orient='index')
df_no_nan = df.fillna(" ")
df_no_blank = df_no_nan.replace(" ", float('nan'))  # Replace blank spaces with NaN
df_filled_int = df_no_blank.astype(float).astype('Int64')  # Convert to integers

df_string = df_filled_int.to_string(header=False)

print(df_string)


6616   3     5  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>
5503   1     8     6     3     6  <NA>  <NA>  <NA>  <NA>
5203   6     9    10     5     8     6     3     4     5
6379   5     7     8     6     7     4     5  <NA>  <NA>
5506   2     8    11     3     8     3     3  <NA>  <NA>
4691   3     5     8     4     2     3     2  <NA>  <NA>
4324   1     5     8     6     6  <NA>  <NA>  <NA>  <NA>
4320   3     5     8     6     7     6     6     3     8
5213   3    10     8     8     4     5     3     8  <NA>
11094  2     5  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>
6998   1     7  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>
5470   7    10     9     6     5     4  <NA>  <NA>  <NA>
5211   7     9     9     6     5     6     6  <NA>  <NA>
5216   5     9     7     3  <NA>  <NA>  <NA>  <NA>  <NA>
5246   3     5     6     4     2     3     1     3  <NA>
20055  8     5  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>
6332   5  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>
10609  6     4     5     5     

In [449]:
# Calculate mean, standard deviation, and count
mean_values = df_filled_int.mean(axis=1).round(2)
std_values = df_filled_int.std(axis=1).round(2)
sum_values = df_filled_int.sum(axis=1)

# Combine mean, std, and count values with the DataFrame
df_with_stats = pd.concat([df_filled_int, mean_values.rename('Mean'), std_values.rename('Std'), sum_values.rename('Count')], axis=1)

# Print the DataFrame
print(df_with_stats.to_string(header=False))

6616   3     5  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>   4.0  1.41   8
5503   1     8     6     3     6  <NA>  <NA>  <NA>  <NA>   4.8  2.77  24
5203   6     9    10     5     8     6     3     4     5  6.22  2.33  56
6379   5     7     8     6     7     4     5  <NA>  <NA>   6.0  1.41  42
5506   2     8    11     3     8     3     3  <NA>  <NA>  5.43  3.51  38
4691   3     5     8     4     2     3     2  <NA>  <NA>  3.86  2.12  27
4324   1     5     8     6     6  <NA>  <NA>  <NA>  <NA>   5.2  2.59  26
4320   3     5     8     6     7     6     6     3     8  5.78  1.86  52
5213   3    10     8     8     4     5     3     8  <NA>  6.12   2.7  49
11094  2     5  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>   3.5  2.12   7
6998   1     7  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>   4.0  4.24   8
5470   7    10     9     6     5     4  <NA>  <NA>  <NA>  6.83  2.32  41
5211   7     9     9     6     5     6     6  <NA>  <NA>  6.86  1.57  48
5216   5     9     7     3  <NA>  <NA>  <NA>  <NA> 