In [2]:
import pickle
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

FCBarcelona study

In [3]:
individualMetricsBarcelonaPath = "../Data/Team_1/05Stage/Metrics/Raw/Individual/IndividualnetworkMetrics.pkl"
globalMetricsBarcelonaPath = "../Data/Team_1/05Stage/Metrics/Raw/Global/GlobalnetworkMetrics.pkl"
metadataBarcelonaPath = "../Data/Team_1/03Stage/finalMetadataBarcelona.csv"
playersBarcelonaPath =  "../Data/Team_1/04Stage/playersList.pkl"    

In [4]:
def readCSV(filePath):
    try:
        df = pd.read_csv(filePath)
        return df
    except FileNotFoundError:
        print(f"No s'ha trobat el fitxer: {filePath}")
    except pd.errors.EmptyDataError:
        print(f"El fitxer està buit: {filePath}")
    except pd.errors.ParserError:
        print(f"Error de parseig al fitxer: {filePath}")
    except Exception as e:
        print(f"Ha ocorregut un error: {e}")

In [5]:
def loadPickleFile(file):
    try:
        with open(file, "rb") as f:
            deserializedFile = pickle.load(f)
        return deserializedFile
    except FileNotFoundError:
        print(f"File  not found.")

Get rawed dict

In [6]:
file = loadPickleFile(individualMetricsBarcelonaPath)

Get filtered dict by players

In [7]:
def getCount(df):
    counts = df.notna().sum(axis=1)
    return counts

In [8]:
def dfCounts(counts):
    countDf = pd.DataFrame(counts, columns=['count'])
    return countDf

In [9]:
def filterDf(df):
    maxCount = df['count'].max()

    # Calculate the threshold (1/5 of the maximum count)
    threshold = maxCount / 4

    # Filter the DataFrame to include only rows where the count is at least the threshold
    filteredDf = df[df['count'] >= threshold]
    return filteredDf

In [10]:
def filtererdMetricsByPlayers(file):
    finalDict = {}
    for element in file:
        concatenated_dict = {}
        for score in file[element]:
            df = pd.DataFrame.from_dict(file[element][score], orient="index")
            counts = getCount(df)
            countDf = dfCounts(counts)
            filteredCountDf  = filterDf(countDf)
            filteredDf = df.loc[filteredCountDf.index]
            a = filteredDf.T.to_dict(orient="list")
            concatenated_dict[score] = a
        finalDict[element] = concatenated_dict
    return finalDict
            

In [11]:
filteredDict = filtererdMetricsByPlayers(file)

Get normalized dict

In [12]:
def normalizatedindividualMetrics(file):
    finalDict = {}
    for element in file:
        concatenated_dict = {}
        for score in file[element]:
            df = pd.DataFrame.from_dict(file[element][score], orient="index")
            dfN = (df - df.min()) / (df.max() - df.min())
            dfN.set_index(df.index, inplace=True)
            a = dfN.T.to_dict(orient="list")
            concatenated_dict[score] = a
        finalDict[element] = concatenated_dict
    return finalDict

In [13]:
normalizedDict = normalizatedindividualMetrics(filteredDict)

Get classified file

In [14]:
def calculateMetrics(data):
    dfNoNan = data.fillna(" ")
    dfNoBlank = dfNoNan.replace(" ", float('NaN'))  # Replace blank spaces with 0
    # Calculate mean, standard deviation, and count
    meanValues = dfNoBlank.mean(axis=1).round(2)
    stdValues = dfNoBlank.std(axis=1).round(2)
    countValues = dfNoBlank.count(axis=1)

    return meanValues, stdValues, countValues

In [15]:
def classifiedMetrics(file):
    finalDict = {}
    for element in file:
        concatenated_dict = {}
        for score in file[element]:
            metricsTable = pd.DataFrame()
            df = pd.DataFrame.from_dict(file[element][score], orient="index")
            meanValues, stdValues, countValues = calculateMetrics(df)  
            meanDropNa = meanValues.dropna(axis=0)
            if  not meanDropNa.empty:
                classifyValues = pd.cut(np.array(meanDropNa), 5, labels=["worst", "bad", "medium", "good", "excellent"]).astype(str)
                columnClassifiy = pd.DataFrame({'Class': classifyValues}, index=meanDropNa.index)
                metricsTable = pd.concat([metricsTable, meanDropNa.rename('Mean'), stdValues.rename('Std'), countValues.rename('Count'), columnClassifiy], axis=1)
                metricsTable.set_index(df.index, inplace=True)
                metricsTableDict = metricsTable.T.to_dict(orient="list")
                concatenated_dict[score] = metricsTableDict
        finalDict[element] = concatenated_dict
    return finalDict

In [16]:
classifiedDict = classifiedMetrics(normalizedDict)

Get filtered dict by scores

In [17]:
def filterScoresBarcelona(dfScore):
    # Convert 'Difference' column to numeric
    dfScore['Difference'] = pd.to_numeric(dfScore['Difference'], errors='coerce')

    # Group by the 'Score' column and calculate the size of each group
    scoreCounts = dfScore.groupby('Score').size()

    # Filter the DataFrame by the conditions: 'Difference' <= 3, size of 'Score' > 1, and 'Difference' < 0
    filteredDfScore = dfScore[((dfScore['Difference'] < 3) & dfScore['Score'].isin(scoreCounts[scoreCounts > 1].index)) | (dfScore['Difference'] <= 0) | (dfScore["Score"] == "3_2") | ((dfScore["Score"] == "3_0"))]
    filteredDfScore = filteredDfScore[filteredDfScore["Score"]!="NF"]

    return filteredDfScore

In [18]:
def filtererdMetricsByScore(df, keys):
    finalDict = {}
    # Iterate over elements in df
    for element in df:
        concatenatedDict = {}
        # Iterate over keys
        for key in keys:
            # Check if the current element has the key
            if str(key) in df[element]:
                # Assign the value corresponding to the key in concatenatedDict
                concatenatedDict[key] = df[element][str(key)]
        # Add concatenatedDict to finalDict under the current element
        finalDict[element] = concatenatedDict
    return finalDict

In [19]:
metadataBarcelonaFile = readCSV(metadataBarcelonaPath)

In [20]:
dfBarcelonaFiltered = filterScoresBarcelona(metadataBarcelonaFile)

In [21]:
uniqueScores = dfBarcelonaFiltered["Score"].unique()

In [22]:
finalDict = filtererdMetricsByScore(classifiedDict, uniqueScores)

Save dictionary

In [23]:
# Save dictionary as a object.
def saveDictToPickle(dictionary, filePath):
    try:
        with open(filePath, "wb") as f:
            pickle.dump(dictionary, f)
        # print("Dictionary saved to", filePath)
    except Exception as e:
        print("Error occurred while saving the dictionary:", str(e))

In [24]:
saveDictToPickle(finalDict, "../Data/Team_1/05Stage/Metrics/Filtered/Individual/finalIndividualnetworkMetrics.pkl")

Tables

Score tables

In [25]:
def saveIndividualMetrics(finalDict, uniqueScores, folderPath):
    for score in uniqueScores:
        metricsTable = pd.DataFrame()
        elementValues = []
        for element in finalDict:
            if score in finalDict[element]:
                if not element in elementValues:
                    elementValues.append(element)
                df = pd.DataFrame.from_dict(finalDict[element][score], orient='index')
                metricsTable = pd.concat([metricsTable, df], axis=1)
        multiIndex = pd.MultiIndex.from_product([elementValues, ['Mean', 'Std', 'Count', 'Class']], names=[None, None])
        metricsTable.columns = multiIndex
        metricsTable.fillna(0, inplace=True)
        metricsTable.to_pickle(f"{folderPath}/{score}_individualMetrics.pkl")

In [26]:
saveIndividualMetrics(finalDict, uniqueScores, "../Data/Team_1/06Stage/Tables/Score/Individual")

Player tables

In [27]:
playersList = loadPickleFile(playersBarcelonaPath)

In [28]:
def savePlayerMetrics(playersList, finalDict, folderPath):
    elementValues = []
    for key in playersList.keys():
        scoreMetrics = {}
        previousMetricsTable = pd.DataFrame()
        metricsTable = pd.DataFrame()
        for element in finalDict:
            if element not in elementValues:
                elementValues.append(element)
            for score in finalDict[element]:
                if score in finalDict[element]:
                    allValues = []
                    if str(key) in finalDict[element][score].keys():
                        values = finalDict[element][score][str(key)]
                        if values:           
                            # Create or update dictionary entry for the score
                            if score not in scoreMetrics:
                                scoreMetrics[score] = {'Mean': values[0], 'Std': values[1], 'Count': values[2], 'Class': values[3]}
                            else:
                                scoreMetrics[score]['Mean'] = values[0]
                                scoreMetrics[score]['Std'] = values[1]
                                scoreMetrics[score]['Count'] = values[2]
                                scoreMetrics[score]['Class'] = values[3]
                        else:
                            if score not in scoreMetrics:
                                scoreMetrics[score] = {'Mean': 0, 'Std': 0, 'Count': 0, 'Class':0}
                            else:
                                scoreMetrics[score]['Mean'] = 0
                                scoreMetrics[score]['Std'] = 0
                                scoreMetrics[score]['Count'] = 0
                                scoreMetrics[score]['Class'] = 0
                    else:
                        if score not in scoreMetrics:
                            scoreMetrics[score] = {'Mean': 0, 'Std': 0, 'Count': 0, 'Class': 0}
                        else:
                            scoreMetrics[score]['Mean'] = 0
                            scoreMetrics[score]['Std'] = 0
                            scoreMetrics[score]['Count'] = 0
                            scoreMetrics[score]['Class'] = 0

            previousMetricsTable = pd.DataFrame.from_dict(scoreMetrics, orient='index')
            metricsTable = pd.concat([metricsTable, previousMetricsTable], axis=1) 
        metricsTable.columns.name = None
        multiIndex = pd.MultiIndex.from_product([elementValues, ['Mean', 'Std', 'Count','Class']], names=[None, None])
        metricsTable.columns = multiIndex
        metricsTable.fillna(0, inplace=True)
        metricsTable.to_pickle(f"{folderPath}/{key}_individualMetrics.pkl")


In [29]:
savePlayerMetrics(playersList, finalDict, "../Data/Team_1/06Stage/Tables/Player")

Global tables

In [30]:
globalMetricsBarcelonaFile = loadPickleFile(globalMetricsBarcelonaPath)

In [31]:
filteredGlobal = filtererdMetricsByScore(globalMetricsBarcelonaFile, uniqueScores)

In [32]:
saveDictToPickle(finalDict, "../Data/Team_1/05Stage/Metrics/Filtered/Global/finalGlobalnetworkMetrics.pkl")

In [33]:
def saveGlobalMetrics(filteredGlobal,path ):
    elementValues = []
    metricsTable = pd.DataFrame()
    for element in filteredGlobal: 
        if element not in elementValues:
            elementValues.append(element)
        df = pd.DataFrame.from_dict(filteredGlobal[element], orient='index')
        meanValues, stdValues, countValues = calculateMetrics(df)
        metricsTable = pd.concat([metricsTable, meanValues.rename('Mean'), stdValues.rename('Std'), countValues.rename('Count')], axis=1)
    multiIndex = pd.MultiIndex.from_product([elementValues, ['Mean', 'Std', 'Count']], names=[None, None])
    metricsTable.columns = multiIndex
    metricsTable.fillna(0, inplace=True)
    metricsTable.to_pickle(f"{path}/06Stage/Tables/Score/Global/globalMetrics.pkl")

In [34]:
saveGlobalMetrics(filteredGlobal, "../Data/Team_1/")


Graphics Treatment

In [35]:
categories = ["worst", "bad", "medium", "good", "excellent"]
catValues = dict(zip(categories,range(len(categories))))

In [36]:
def filterDictByExcellence(finalDict):
    resultDict = {}
    
    for element in finalDict:
        concatenatedDict = {}
        
        for score in finalDict[element]:
            df = pd.DataFrame.from_dict(finalDict[element][score], orient="index")
            dfFiltered = df[(df[3] == "excellent") | (df[3] == "good")]
            dfFiltered.set_index(dfFiltered.index, inplace=True)
            filteredDict = dfFiltered.T.to_dict(orient="list")
            concatenatedDict[score] = filteredDict
        
        resultDict[element] = concatenatedDict
    
    return resultDict

In [37]:
def dictToTables(transformedDict, uniqueScores):
    processedData = {}
    
    for score in uniqueScores:
        combinedDf = pd.DataFrame()
        elementValues = []
        
        for element in transformedDict:
            if score in transformedDict[element]:
                if element not in elementValues:
                    elementValues.append(element)
                df = pd.DataFrame.from_dict(transformedDict[element][score], orient='index')
                combinedDf = pd.concat([combinedDf, df], axis=1)
        
        # Create MultiIndex for the columns
        multiIndex = pd.MultiIndex.from_product([elementValues, ['Mean', 'Std', 'Count', 'Class']], names=[None, None])
        combinedDf.columns = multiIndex
        combinedDf.fillna(0, inplace=True)
        
        # Store the processed DataFrame in the dictionary
        processedData[score] = combinedDf
    
    return processedData

In [38]:
def transformClassColumns(dataframe, catValues):
    # Replace 0 with "worst"
    transformedDf = dataframe.replace(0, "worst")
    
    # Extract the 'Class' columns
    classColumns = transformedDf.xs('Class', level=1, axis=1)
    
    # Map class columns to their corresponding numeric values
    mappedClassColumns = classColumns.map(lambda x: catValues.get(x, x))
    
    return mappedClassColumns

In [39]:
def filterTopRowsBySum(mappedClassColumns, topN=5):
    # Calculate row sums
    rowSums = mappedClassColumns.sum(axis=1)
    
    # Sort row sums and get the top N rows
    rowSumsSorted = rowSums.sort_values(ascending=False).head(topN)
    
    # Filter the DataFrame to include only the top N rows
    filteredClassColumns = mappedClassColumns.loc[rowSumsSorted.index]
    
    return filteredClassColumns

In [40]:
def createPolarPlot(df, score):
    fig = go.Figure()

    for i in range(len(df)):
        fig.add_trace(
            go.Scatterpolar(
                r=df.iloc[i],
                theta=df.columns,
                fill='toself',
                opacity=0.5,
                name=f"Player-{df.index[i]}",  # Use player's name if available
                showlegend=True
            )
        )

    fig.update_layout(
        font_size=13,
        legend=dict(
            yanchor="top",
            y=1.0,
            xanchor="left",
            x=0.8
        ),
        polar=dict(
            radialaxis=dict(
                angle=45,
                tickangle=45,
                visible=True,
                gridwidth=2,
                range=[0, max(df.max(numeric_only=True))],  # Adjust range based on data
                tickvals=list(range(len(categories))),
                ticktext=categories,
                tickwidth=10
            )
        ),
        title= f"Player Variables for {score}",
        height=500,
        width=800
    )

    fig.show()

In [100]:
#Total
tableTotal = dictToTables(finalDict, uniqueScores)
for score in tableTotal:
    tableTotalTrans = transformClassColumns(tableTotal[score], catValues)
    tableTotalTransTop5 = filterTopRowsBySum(tableTotalTrans, topN=5)
    createPolarPlot(tableTotalTransTop5, score)


Top Players 

In [102]:
cumulativeValuesPlayer = pd.Series(dtype=float)

In [103]:
def accumulateSums(df):
    global cumulativeValuesPlayer

    # Calculate the sum for each row
    new_sums = df.sum(axis=1)

    # Add the sums to the global cumulative_sums Series
    cumulativeValuesPlayer = cumulativeValuesPlayer.add(new_sums, fill_value=0)

In [104]:
#Total
tableTotal = dictToTables(finalDict, uniqueScores)
for score in tableTotal:
    tableTotalTrans = transformClassColumns(tableTotal[score], catValues)
    #tableTotalTransTop5 = filterTopRowsBySum(tableTotalTrans, topN=5)
    accumulateSums(tableTotalTrans)


In [106]:
sortcumulativeValuesPlayer = cumulativeValuesPlayer.sort_values(ascending=False)
sortcumulativeValuesPlayer

5216     199.0
5203     180.0
5503     179.0
5506     173.0
5213     163.0
4324     145.0
4320     143.0
5211     140.0
5470     136.0
6379     128.0
5246     123.0
10609     92.0
6616      50.0
4691      48.0
11094     43.0
6400      33.0
22102     23.0
3508      16.0
7068      12.0
20055      5.0
dtype: float64