In [779]:
import os
import networkx as nx
from collections import defaultdict
import pandas as pd
import pickle

In [780]:
path = "../Data/Barcelona/04Stage/Graphs/diGraphs"
pathCSV = "../Data/Barcelona/03Stage/finalMetadataBarcelona.csv"

In [781]:
try:
    # Open the CSV file
    # Create a CSV reader object
    dfScore = pd.read_csv(pathCSV)
except FileNotFoundError:
    print("The file does not exist.")
except Exception as e:
    print("An error occurred:", e)

In [782]:
# Convert 'Difference' column to numeric
dfScore['Difference'] = pd.to_numeric(dfScore['Difference'], errors='coerce')

# Group by the 'Score' column and calculate the size of each group
score_counts = dfScore.groupby('Score').size()

# Filter the DataFrame by the conditions: 'Difference' <= 3, size of 'Score' > 1, and 'Difference' < 0
filtered_df_score = dfScore[((dfScore['Difference'] < 3) & dfScore['Score'].isin(score_counts[score_counts > 1].index)) | (dfScore['Difference'] <= 0) | (dfScore["Score"] == "3_2") | ((dfScore["Score"] == "3_0"))]



In [783]:
filteredMatches = filtered_df_score.groupby('Score').size()
print(filteredMatches)

Score
0_1     9
0_2     3
0_3     1
1_0    28
1_1    11
1_2     3
1_3     1
1_4     1
2_0    20
2_1    12
2_2     2
3_0    14
3_1     6
dtype: int64


In [784]:
print(filteredMatches.keys())

Index(['0_1', '0_2', '0_3', '1_0', '1_1', '1_2', '1_3', '1_4', '2_0', '2_1',
       '2_2', '3_0', '3_1'],
      dtype='object', name='Score')


In [785]:
def getScore(fileName):
    parts = fileName.split("_")
    if len(parts) == 4:
        pass
    elif len(parts) == 5 :
        return f"{parts[2]}_{parts[3]}"


In [786]:
def readGraph(dataFolder, fileName):
    try:
        filePath = os.path.join(dataFolder, fileName)
        graph = nx.read_gexf(filePath)
        return graph
    except FileNotFoundError:
        print(f"File '{fileName}' not found in '{dataFolder}'.")
        return None
    except nx.NetworkXError as e:
        print(f"Error reading graph from '{filePath}': {e}")
        return None

In [787]:
# Initialize dictionaries to store metrics for each score
network_metrics = {
    'inDegree_players': {},
    'outDegree_players': {},
    'clustering_coefficients': {},
    'betweenness_centralities': {},
    'closeness_centralities': {},
    'eigenvector_centrality': {},
    'eccentricity': {}
}

In [788]:
Barcelona = "../Data/Barcelona/04Stage/playersList.pkl"
with open(Barcelona, 'rb') as file:
    # Load data from the file
    dataBarcelona = pickle.load(file)

In [789]:
BarcelonaMetrics = "../Data/Barcelona/04Stage/Metrics/Individual/IndividualnetworkMetrics.pkl"

with open(BarcelonaMetrics, 'rb') as file:
    # Load data from the file
    network_metrics = pickle.load(file)

In [790]:
print(dataBarcelona)

{4320: 'Neymar da Silva Santos Junior', 5506: 'Javier Alejandro Mascherano', 4324: 'Daniel Alves da Silva', 6379: 'Sergi Roberto Carnicer', 5203: 'Sergio Busquets i Burgos', 4691: 'Claudio Andrés Bravo Muñoz', 11094: 'Arda Turan', 6998: 'Rafael Alcântara do Nascimento', 6616: 'Munir El Haddadi Mohamed', 5213: 'Gerard Piqué Bernabéu', 5503: 'Lionel Andrés Messi Cuccittini', 5216: 'Andrés Iniesta Luján', 5211: 'Jordi Alba Ramos', 5470: 'Ivan Rakitić', 5246: 'Luis Alberto Suárez Díaz', 7068: 'Marc Bartra Aregall', 22102: 'Adriano Correia Claro', 20055: 'Marc-André ter Stegen', 10609: 'Jérémy Mathieu', 6400: 'Aleix Vidal Parreu', 6332: 'Thomas Vermaelen', 3508: 'Sandro Ramírez Castillo', 6834: 'Gerard Gumbau Garriga', 27318: 'Douglas Pereira dos Santos', 21229: 'Sergi Samper Montaña'}


In [791]:
import numpy as np

scoreMetrics = {}

for key in dataBarcelona.keys():
    previousMetricsTable = pd.DataFrame()
    metricsTable = pd.DataFrame()
    for element in network_metrics:
        for score in network_metrics[element]:
            #print(f"Resultat: {score}")
            allValues = []
            if str(key) in network_metrics[element][score].keys():
                values = network_metrics[element][score][str(key)]
                if isinstance(values, list):
                    allValues.extend(values)
                else:
                    allValues.append(values)
            meanValue = np.mean(values)
            stdValue = np.std(values)
            if isinstance(values, list):
                countValue = len(values)
            else:
                countValue = 1
            # Round the calculated values
            meanValue = round(meanValue, 2)
            stdValue = round(stdValue, 2)
            # Create or update dictionary entry for the score
            if score not in scoreMetrics:
                scoreMetrics[score] = {'Mean': meanValue, 'Std': stdValue, 'Count': countValue}
            else:
                scoreMetrics[score]['Mean'] = meanValue
                scoreMetrics[score]['Std'] = stdValue
                scoreMetrics[score]['Count'] = countValue
            # Concatenate horizontally with the main DataFrame
        previousMetricsTable = pd.DataFrame.from_dict(scoreMetrics, orient='index')
        metricsTable = pd.concat([metricsTable, previousMetricsTable], axis = 1)
    #metricsTable.to_csv(f"Barcelona/{key}_individualMetrics.csv", index = True)
    print(metricsTable)


     Mean   Std  Count  Mean   Std  Count  Mean  Std  Count  Mean  ...  Count  \
0_1  6.11  2.02      9  5.78  1.75      9  0.55  0.0      1  0.11  ...      1   
0_2  3.00  1.41      3  3.00  2.16      3  0.25  0.0      1  0.20  ...      1   
1_2  6.33  1.25      3  5.67  2.05      3  0.62  0.0      1  0.01  ...      1   
1_0  6.79  2.06     24  6.00  2.04     24  0.53  0.0      1  0.04  ...      1   
2_0  5.44  2.22     18  5.00  2.47     18  0.39  0.0      1  0.13  ...      1   
3_0  6.15  2.21     13  5.23  1.31     13  0.76  0.0      1  0.02  ...      1   
4_0  6.22  2.90      9  5.22  2.66      9  0.61  0.0      1  0.09  ...      1   
5_0  4.40  0.80      5  3.80  0.75      5  0.46  0.0      1  0.07  ...      1   
6_0  3.33  1.25      3  3.67  0.47      3  0.33  0.0      1  0.17  ...      1   
7_0  4.00  0.00      1  2.00  0.00      1  0.27  0.0      1  0.21  ...      1   
8_0  1.00  0.00      1  0.00  0.00      1  0.00  0.0      1  0.00  ...      1   
1_1  4.20  2.23     10  3.40

In [792]:
element_values = ['inDegree', 'outDegree', 'clustering_coefficients', 'betweenness_centralities', 'closeness_centralities', 'eigenvector_centrality', 'eccentricity']

# Create MultiIndex for the new header row with element values repeated three times
new_header_values = [(element,) for element in element_values]
print(new_header_values)
# Flatten the list of tuples
flat_new_header_values = [item for sublist in new_header_values for item in sublist]

# Create MultiIndex with 'Mean', 'Std', and 'Count' for each element value
multi_index = pd.MultiIndex.from_product([element_values, ['Mean', 'Std', 'Count']], names=['Metric', None])

# Generate sample data
data = np.random.randn(5, len(multi_index))

# Create DataFrame with the MultiIndex
df = pd.DataFrame(data, columns=multi_index)
df


[('inDegree',), ('outDegree',), ('clustering_coefficients',), ('betweenness_centralities',), ('closeness_centralities',), ('eigenvector_centrality',), ('eccentricity',)]


Metric,inDegree,inDegree,inDegree,outDegree,outDegree,outDegree,clustering_coefficients,clustering_coefficients,clustering_coefficients,betweenness_centralities,betweenness_centralities,betweenness_centralities,closeness_centralities,closeness_centralities,closeness_centralities,eigenvector_centrality,eigenvector_centrality,eigenvector_centrality,eccentricity,eccentricity,eccentricity
Unnamed: 0_level_1,Mean,Std,Count,Mean,Std,Count,Mean,Std,Count,Mean,...,Count,Mean,Std,Count,Mean,Std,Count,Mean,Std,Count
0,-0.425307,-0.734374,-0.892557,0.206462,0.31368,-1.065685,0.821651,1.261451,-0.810837,-2.057395,...,-0.08462,0.225023,-0.024972,-0.338088,1.283175,-0.707977,0.514615,-1.593838,1.786856,0.119418
1,-0.052184,0.081494,1.298063,-0.792786,-1.78998,-0.782843,-1.064573,-0.873929,-1.762235,1.10704,...,0.421398,-1.391193,-0.885195,-0.465277,0.895666,0.508084,-0.158126,0.666946,0.55214,-0.492885
2,-0.358642,2.256,2.483574,1.964914,0.580723,-0.049174,0.328767,0.263645,-0.07072,0.529777,...,0.559359,0.348177,0.710133,0.338614,-0.443111,-0.130637,1.281949,-0.265168,-0.112328,-1.347967
3,-1.373301,0.847936,-0.157679,-0.645278,-1.443104,-0.088246,2.79084,0.277531,1.994385,-0.016312,...,-0.102246,0.386789,0.841471,1.164801,-0.766839,-1.998021,0.912404,1.872872,0.537779,1.388329
4,-0.768025,0.824084,0.01521,0.956858,1.974321,-0.569701,0.115995,0.966216,0.772636,-1.129547,...,-0.932145,0.782547,-0.41818,0.696771,-1.904065,0.718,-0.135674,0.218745,-1.715128,-1.187993


In [799]:
scoreMetrics = {}
element_values = []

for key in dataBarcelona.keys():
    previousMetricsTable = pd.DataFrame()
    metricsTable = pd.DataFrame()
    for element in network_metrics:
        if not element in element_values:
            element_values.append(element)
        for score in network_metrics[element]:
            allValues = []
            if str(key) in network_metrics[element][score].keys():
                values = network_metrics[element][score][str(key)]
                if isinstance(values, list):
                    allValues.extend(values)
                else:
                    allValues.append(values)
            meanValue = np.mean(values)
            stdValue = np.std(values)
            if isinstance(values, list):
                countValue = len(values)
            else:
                countValue = 1
            meanValue = round(meanValue, 2)
            stdValue = round(stdValue, 2)
            if score not in scoreMetrics:
                scoreMetrics[score] = {'Mean': meanValue, 'Std': stdValue, 'Count': countValue}
            else:
                scoreMetrics[score]['Mean'] = meanValue
                scoreMetrics[score]['Std'] = stdValue
                scoreMetrics[score]['Count'] = countValue
        previousMetricsTable = pd.DataFrame.from_dict(scoreMetrics, orient='index')
        metricsTable = pd.concat([metricsTable, previousMetricsTable], axis=1)

    metricsTable.columns.name = None

    # Flatten the list of tuples
    #print(element_values)
    multi_index = pd.MultiIndex.from_product([element_values, ['Mean', 'Std', 'Count']], names=[None, None])
    metricsTable.columns = multi_index

    metricsTable.to_csv(f"Barcelona/{key}_individualMetrics.csv", index = True)
  

TypeError: Input must be list-like

INDIVIDUAL TABLES FOR EACH SCORE

In [794]:
'''
for score in filtered_df_score["Score"].unique():
    metricsTable = pd.DataFrame()

    for element in network_metrics:
        if score in network_metrics[element]:
            for keys in network_metrics[element][score]:
                    df = pd.DataFrame(network_metrics[element][score])        
                    df_no_nan = df.fillna(" ")
                    df_no_blank = df_no_nan.replace(" ", float('nan'))  # Replace blank spaces with NaN
                    #df_filled_int = df_no_blank.astype(float).astype('Int64')  # Convert to integers

                    #df_string = df_filled_int.to_string(header=False)

                    # Calculate mean, standard deviation, and count
                    mean_values = df_no_blank.mean(axis=1).round(2)
                    std_values = df_no_blank.std(axis=1).round(2)
                    count_values = df_no_blank.count(axis=1)
                    metricsTable = pd.concat([metricsTable, mean_values.rename('Mean'), std_values.rename('Std'), count_values.rename('Count')], axis=1)
            metricsTable.to_csv(f"Barcelona/{score}_individualMetrics.csv", index = True)
'''


'\nfor score in filtered_df_score["Score"].unique():\n    metricsTable = pd.DataFrame()\n\n    for element in network_metrics:\n        if score in network_metrics[element]:\n            for keys in network_metrics[element][score]:\n                    df = pd.DataFrame(network_metrics[element][score])        \n                    df_no_nan = df.fillna(" ")\n                    df_no_blank = df_no_nan.replace(" ", float(\'nan\'))  # Replace blank spaces with NaN\n                    #df_filled_int = df_no_blank.astype(float).astype(\'Int64\')  # Convert to integers\n\n                    #df_string = df_filled_int.to_string(header=False)\n\n                    # Calculate mean, standard deviation, and count\n                    mean_values = df_no_blank.mean(axis=1).round(2)\n                    std_values = df_no_blank.std(axis=1).round(2)\n                    count_values = df_no_blank.count(axis=1)\n                    metricsTable = pd.concat([metricsTable, mean_values.rename(\'Mea

In [795]:
'''
for score in filtered_df_score["Score"].unique():
    metricsTable = pd.DataFrame()

    for element in network_metrics:
        if score in network_metrics[element]:
            for keys in network_metrics[element][score]:
                data = network_metrics[element][score][keys]  # Get the list data
                df = pd.DataFrame(data, columns=['Values'])  # Create DataFrame from list
                metricsTable = pd.concat([metricsTable, df], axis=1)
                
            metricsTable.to_csv(f"Barcelona/{score}_individualMetrics.csv", index=True)
'''

'\nfor score in filtered_df_score["Score"].unique():\n    metricsTable = pd.DataFrame()\n\n    for element in network_metrics:\n        if score in network_metrics[element]:\n            for keys in network_metrics[element][score]:\n                data = network_metrics[element][score][keys]  # Get the list data\n                df = pd.DataFrame(data, columns=[\'Values\'])  # Create DataFrame from list\n                metricsTable = pd.concat([metricsTable, df], axis=1)\n                \n            metricsTable.to_csv(f"Barcelona/{score}_individualMetrics.csv", index=True)\n'

INDIVIDUAL TABLE FOR EACH PLAYER

In [796]:
'''
for score in filtered_df_score["Score"].unique():
    metricsTable = pd.DataFrame()

    for element in network_metrics:
        if score in network_metrics[element]:
            df = pd.DataFrame.from_dict(network_metrics[element][score], orient='index')        
            df_no_nan = df.fillna(" ")
            df_no_blank = df_no_nan.replace(" ", float('nan'))  # Replace blank spaces with NaN
            #df_filled_int = df_no_blank.astype(float).astype('Int64')  # Convert to integers

            #df_string = df_filled_int.to_string(header=False)

            # Calculate mean, standard deviation, and count
            mean_values = df_no_blank.mean(axis=1).round(2)
            std_values = df_no_blank.std(axis=1).round(2)
            count_values = df_no_blank.count(axis=1)
            metricsTable = pd.concat([metricsTable, mean_values.rename('Mean'), std_values.rename('Std'), count_values.rename('Count')], axis=1)
    metricsTable.to_csv(f"Barcelona/Individual/{score}_individualMetrics.csv", index = True)
'''

'\nfor score in filtered_df_score["Score"].unique():\n    metricsTable = pd.DataFrame()\n\n    for element in network_metrics:\n        if score in network_metrics[element]:\n            df = pd.DataFrame.from_dict(network_metrics[element][score], orient=\'index\')        \n            df_no_nan = df.fillna(" ")\n            df_no_blank = df_no_nan.replace(" ", float(\'nan\'))  # Replace blank spaces with NaN\n            #df_filled_int = df_no_blank.astype(float).astype(\'Int64\')  # Convert to integers\n\n            #df_string = df_filled_int.to_string(header=False)\n\n            # Calculate mean, standard deviation, and count\n            mean_values = df_no_blank.mean(axis=1).round(2)\n            std_values = df_no_blank.std(axis=1).round(2)\n            count_values = df_no_blank.count(axis=1)\n            metricsTable = pd.concat([metricsTable, mean_values.rename(\'Mean\'), std_values.rename(\'Std\'), count_values.rename(\'Count\')], axis=1)\n    metricsTable.to_csv(f"Barcelon