In [875]:
import os
import networkx as nx
from collections import defaultdict
import pandas as pd
import pickle

In [876]:
path = "../Data/Barcelona/04Stage/Graphs/diGraphs"
pathCSV = "../Data/Barcelona/03Stage/finalMetadataBarcelona.csv"

In [877]:
try:
    # Open the CSV file
    # Create a CSV reader object
    dfScore = pd.read_csv(pathCSV)
except FileNotFoundError:
    print("The file does not exist.")
except Exception as e:
    print("An error occurred:", e)

In [878]:
# Convert 'Difference' column to numeric
dfScore['Difference'] = pd.to_numeric(dfScore['Difference'], errors='coerce')

# Group by the 'Score' column and calculate the size of each group
score_counts = dfScore.groupby('Score').size()

# Filter the DataFrame by the conditions: 'Difference' <= 3, size of 'Score' > 1, and 'Difference' < 0
filtered_df_score = dfScore[((dfScore['Difference'] < 3) & dfScore['Score'].isin(score_counts[score_counts > 1].index)) | (dfScore['Difference'] <= 0) | (dfScore["Score"] == "3_2") | ((dfScore["Score"] == "3_0"))]



In [879]:
filteredMatches = filtered_df_score.groupby('Score').size()
print(filteredMatches)

Score
0_1     9
0_2     3
0_3     1
1_0    28
1_1    11
1_2     3
1_3     1
1_4     1
2_0    20
2_1    12
2_2     2
3_0    14
3_1     6
dtype: int64


In [880]:
print(filteredMatches.keys())

Index(['0_1', '0_2', '0_3', '1_0', '1_1', '1_2', '1_3', '1_4', '2_0', '2_1',
       '2_2', '3_0', '3_1'],
      dtype='object', name='Score')


In [881]:
def getScore(fileName):
    parts = fileName.split("_")
    if len(parts) == 4:
        pass
    elif len(parts) == 5 :
        return f"{parts[2]}_{parts[3]}"


In [882]:
def readGraph(dataFolder, fileName):
    try:
        filePath = os.path.join(dataFolder, fileName)
        graph = nx.read_gexf(filePath)
        return graph
    except FileNotFoundError:
        print(f"File '{fileName}' not found in '{dataFolder}'.")
        return None
    except nx.NetworkXError as e:
        print(f"Error reading graph from '{filePath}': {e}")
        return None

In [883]:
# Initialize dictionaries to store metrics for each score
network_metrics = {
    'inDegree_players': {},
    'outDegree_players': {},
    'clustering_coefficients': {},
    'betweenness_centralities': {},
    'closeness_centralities': {},
    'eigenvector_centrality': {},
    'eccentricity': {}
}

In [884]:
Barcelona = "../Data/Barcelona/04Stage/playersList.pkl"
with open(Barcelona, 'rb') as file:
    # Load data from the file
    dataBarcelona = pickle.load(file)

In [885]:
BarcelonaMetrics = "../Data/Barcelona/04Stage/Metrics/Individual/IndividualnetworkMetrics.pkl"

with open(BarcelonaMetrics, 'rb') as file:
    # Load data from the file
    network_metrics = pickle.load(file)

In [886]:
print(dataBarcelona)

{4320: 'Neymar da Silva Santos Junior', 5506: 'Javier Alejandro Mascherano', 4324: 'Daniel Alves da Silva', 6379: 'Sergi Roberto Carnicer', 5203: 'Sergio Busquets i Burgos', 4691: 'Claudio Andrés Bravo Muñoz', 11094: 'Arda Turan', 6998: 'Rafael Alcântara do Nascimento', 6616: 'Munir El Haddadi Mohamed', 5213: 'Gerard Piqué Bernabéu', 5503: 'Lionel Andrés Messi Cuccittini', 5216: 'Andrés Iniesta Luján', 5211: 'Jordi Alba Ramos', 5470: 'Ivan Rakitić', 5246: 'Luis Alberto Suárez Díaz', 7068: 'Marc Bartra Aregall', 22102: 'Adriano Correia Claro', 20055: 'Marc-André ter Stegen', 10609: 'Jérémy Mathieu', 6400: 'Aleix Vidal Parreu', 6332: 'Thomas Vermaelen', 3508: 'Sandro Ramírez Castillo', 6834: 'Gerard Gumbau Garriga', 27318: 'Douglas Pereira dos Santos', 21229: 'Sergi Samper Montaña'}


In [887]:
import numpy as np

scoreMetrics = {}

for key in dataBarcelona.keys():
    previousMetricsTable = pd.DataFrame()
    metricsTable = pd.DataFrame()
    for element in network_metrics:
        for score in network_metrics[element]:
            #print(f"Resultat: {score}")
            allValues = []
            if str(key) in network_metrics[element][score].keys():
                values = network_metrics[element][score][str(key)]
                if isinstance(values, list):
                    allValues.extend(values)
                else:
                    allValues.append(values)
            meanValue = np.mean(values)
            stdValue = np.std(values)
            if isinstance(values, list):
                countValue = len(values)
            else:
                countValue = 1
            # Round the calculated values
            meanValue = round(meanValue, 2)
            stdValue = round(stdValue, 2)
            # Create or update dictionary entry for the score
            if score not in scoreMetrics:
                scoreMetrics[score] = {'Mean': meanValue, 'Std': stdValue, 'Count': countValue}
            else:
                scoreMetrics[score]['Mean'] = meanValue
                scoreMetrics[score]['Std'] = stdValue
                scoreMetrics[score]['Count'] = countValue
            # Concatenate horizontally with the main DataFrame
        previousMetricsTable = pd.DataFrame.from_dict(scoreMetrics, orient='index')
        metricsTable = pd.concat([metricsTable, previousMetricsTable], axis = 1)
    metricsTable.to_csv(f"Barcelona/{key}_individualMetrics.csv", index = True)


4320
inDegree_players
{'0_1': {'Mean': 6.11, 'Std': 2.02, 'Count': 9}, '0_2': {'Mean': 3.0, 'Std': 1.41, 'Count': 3}, '1_2': {'Mean': 6.33, 'Std': 1.25, 'Count': 3}, '1_0': {'Mean': 6.79, 'Std': 2.06, 'Count': 24}, '2_0': {'Mean': 5.44, 'Std': 2.22, 'Count': 18}, '3_0': {'Mean': 6.15, 'Std': 2.21, 'Count': 13}, '4_0': {'Mean': 6.22, 'Std': 2.9, 'Count': 9}, '5_0': {'Mean': 4.4, 'Std': 0.8, 'Count': 5}, '6_0': {'Mean': 3.33, 'Std': 1.25, 'Count': 3}, '7_0': {'Mean': 4.0, 'Std': 0.0, 'Count': 1}, '8_0': {'Mean': 1.0, 'Std': 0.0, 'Count': 1}, '1_1': {'Mean': 4.2, 'Std': 2.23, 'Count': 10}, '2_1': {'Mean': 5.7, 'Std': 2.28, 'Count': 10}, '3_1': {'Mean': 5.33, 'Std': 2.81, 'Count': 6}, '4_1': {'Mean': 4.0, 'Std': 2.12, 'Count': 4}, '0_3': {'Mean': 4.0, 'Std': 0.0, 'Count': 1}, '1_3': {'Mean': 8.0, 'Std': 0.0, 'Count': 1}, '1_4': {'Mean': 8.0, 'Std': 0.0, 'Count': 1}, '5_1': {'Mean': 3.0, 'Std': 1.41, 'Count': 3}, '5_2': {'Mean': 1.0, 'Std': 0.0, 'Count': 1}, '2_2': {'Mean': 4.0, 'Std': 0.0,

{'0_1': {'Mean': 5.43, 'Std': 3.25, 'Count': 7}, '0_2': {'Mean': 6.0, 'Std': 1.0, 'Count': 2}, '1_2': {'Mean': 7.0, 'Std': 0.0, 'Count': 3}, '1_0': {'Mean': 6.52, 'Std': 2.26, 'Count': 23}, '2_0': {'Mean': 5.69, 'Std': 2.57, 'Count': 16}, '3_0': {'Mean': 6.3, 'Std': 2.0, 'Count': 10}, '4_0': {'Mean': 4.57, 'Std': 2.06, 'Count': 7}, '5_0': {'Mean': 3.75, 'Std': 0.83, 'Count': 4}, '6_0': {'Mean': 3.33, 'Std': 1.7, 'Count': 3}, '7_0': {'Mean': 2.0, 'Std': 0.0, 'Count': 1}, '8_0': {'Mean': 1.0, 'Std': 0.0, 'Count': 1}, '1_1': {'Mean': 5.25, 'Std': 1.79, 'Count': 8}, '2_1': {'Mean': 6.44, 'Std': 2.06, 'Count': 9}, '3_1': {'Mean': 4.75, 'Std': 2.95, 'Count': 4}, '4_1': {'Mean': 7.0, 'Std': 0.82, 'Count': 3}, '0_3': {'Mean': 7.0, 'Std': 0.0, 'Count': 1}, '1_3': {'Mean': 8.0, 'Std': 0.0, 'Count': 1}, '1_4': {'Mean': 1.0, 'Std': 0.0, 'Count': 1}, '5_1': {'Mean': 3.5, 'Std': 2.5, 'Count': 2}, '5_2': {'Mean': 3.5, 'Std': 2.5, 'Count': 2}, '2_2': {'Mean': 2.0, 'Std': 0.0, 'Count': 1}, '6_1': {'Mea

INDIVIDUAL TABLES FOR EACH SCORE

In [888]:
'''
for score in filtered_df_score["Score"].unique():
    metricsTable = pd.DataFrame()

    for element in network_metrics:
        if score in network_metrics[element]:
            for keys in network_metrics[element][score]:
                    df = pd.DataFrame(network_metrics[element][score])        
                    df_no_nan = df.fillna(" ")
                    df_no_blank = df_no_nan.replace(" ", float('nan'))  # Replace blank spaces with NaN
                    #df_filled_int = df_no_blank.astype(float).astype('Int64')  # Convert to integers

                    #df_string = df_filled_int.to_string(header=False)

                    # Calculate mean, standard deviation, and count
                    mean_values = df_no_blank.mean(axis=1).round(2)
                    std_values = df_no_blank.std(axis=1).round(2)
                    count_values = df_no_blank.count(axis=1)
                    metricsTable = pd.concat([metricsTable, mean_values.rename('Mean'), std_values.rename('Std'), count_values.rename('Count')], axis=1)
            metricsTable.to_csv(f"Barcelona/{score}_individualMetrics.csv", index = True)



SyntaxError: incomplete input (450294690.py, line 1)

In [None]:
'''
for score in filtered_df_score["Score"].unique():
    metricsTable = pd.DataFrame()

    for element in network_metrics:
        if score in network_metrics[element]:
            for keys in network_metrics[element][score]:
                data = network_metrics[element][score][keys]  # Get the list data
                df = pd.DataFrame(data, columns=['Values'])  # Create DataFrame from list
                metricsTable = pd.concat([metricsTable, df], axis=1)
                
            metricsTable.to_csv(f"Barcelona/{score}_individualMetrics.csv", index=True)


INDIVIDUAL TABLE FOR EACH PLAYER

In [None]:
'''
for score in filtered_df_score["Score"].unique():
    metricsTable = pd.DataFrame()

    for element in network_metrics:
        if score in network_metrics[element]:
            df = pd.DataFrame.from_dict(network_metrics[element][score], orient='index')        
            df_no_nan = df.fillna(" ")
            df_no_blank = df_no_nan.replace(" ", float('nan'))  # Replace blank spaces with NaN
            #df_filled_int = df_no_blank.astype(float).astype('Int64')  # Convert to integers

            #df_string = df_filled_int.to_string(header=False)

            # Calculate mean, standard deviation, and count
            mean_values = df_no_blank.mean(axis=1).round(2)
            std_values = df_no_blank.std(axis=1).round(2)
            count_values = df_no_blank.count(axis=1)
            metricsTable = pd.concat([metricsTable, mean_values.rename('Mean'), std_values.rename('Std'), count_values.rename('Count')], axis=1)
    metricsTable.to_csv(f"Barcelona/Individual/{score}_individualMetrics.csv", index = True)