In [57]:
import pickle
import pandas as pd
import numpy as np

FCBarcelona study

In [58]:
individualMetricsBarcelonaPath = "../Data/Team_1/05Stage/Metrics/Raw/Individual/IndividualnetworkMetrics.pkl"
globalMetricsBarcelonaPath = "../Data/Team_1/05Stage/Metrics/Raw/Global/GlobalnetworkMetrics.pkl"
metadataBarcelonaPath = "../Data/Team_1/03Stage/finalMetadataBarcelona.csv"
playersBarcelonaPath =  "../Data/Team_1/04Stage/playersList.pkl"    

In [59]:
def readCSV(filePath):
    try:
        df = pd.read_csv(filePath)
        return df
    except FileNotFoundError:
        print(f"No s'ha trobat el fitxer: {filePath}")
    except pd.errors.EmptyDataError:
        print(f"El fitxer està buit: {filePath}")
    except pd.errors.ParserError:
        print(f"Error de parseig al fitxer: {filePath}")
    except Exception as e:
        print(f"Ha ocorregut un error: {e}")

In [60]:
def loadPickleFile(file):
    try:
        with open(file, "rb") as f:
            deserializedFile = pickle.load(f)
        return deserializedFile
    except FileNotFoundError:
        print(f"File  not found.")

Get rawed dict

In [61]:
file = loadPickleFile(individualMetricsBarcelonaPath)

Get filtered dict by players

In [62]:
def getCount(df):
    counts = df.notna().sum(axis=1)
    return counts

In [63]:
def dfCounts(counts):
    countDf = pd.DataFrame(counts, columns=['count'])
    return countDf

In [64]:
def filterDf(df):
    maxCount = df['count'].max()

    # Calculate the threshold (1/5 of the maximum count)
    threshold = maxCount / 4

    # Filter the DataFrame to include only rows where the count is at least the threshold
    filteredDf = df[df['count'] >= threshold]
    return filteredDf

In [65]:
def filtererdMetricsByPlayers(file):
    finalDict = {}
    for element in file:
        concatenated_dict = {}
        for score in file[element]:
            df = pd.DataFrame.from_dict(file[element][score], orient="index")
            counts = getCount(df)
            countDf = dfCounts(counts)
            filteredCountDf  = filterDf(countDf)
            filteredDf = df.loc[filteredCountDf.index]
            a = filteredDf.T.to_dict(orient="list")
            concatenated_dict[score] = a
        finalDict[element] = concatenated_dict
    return finalDict
            

In [66]:
filteredDict = filtererdMetricsByPlayers(file)

Get normalized dict

In [67]:
def normalizatedMetrics(file):
    finalDict = {}
    for element in file:
        concatenated_dict = {}
        for score in file[element]:
            df = pd.DataFrame.from_dict(file[element][score], orient="index")
            dfN = (df - df.min()) / (df.max() - df.min())
            dfN.set_index(df.index, inplace=True)
            a = dfN.T.to_dict(orient="list")
            concatenated_dict[score] = a
        finalDict[element] = concatenated_dict
    return finalDict

In [68]:
normalizedDict = normalizatedMetrics(filteredDict)

Get classified file

In [69]:
def calculateMetrics(data):
    dfNoNan = data.fillna(" ")
    dfNoBlank = dfNoNan.replace(" ", float('NaN'))  # Replace blank spaces with 0
    # Calculate mean, standard deviation, and count
    meanValues = dfNoBlank.mean(axis=1).round(2)
    stdValues = dfNoBlank.std(axis=1).round(2)
    countValues = dfNoBlank.count(axis=1)

    return meanValues, stdValues, countValues

In [70]:
def classifiedMetrics(file):
    finalDict = {}
    for element in file:
        concatenated_dict = {}
        for score in file[element]:
            metricsTable = pd.DataFrame()
            df = pd.DataFrame.from_dict(file[element][score], orient="index")
            meanValues, stdValues, countValues = calculateMetrics(df)  
            meanDropNa = meanValues.dropna(axis=0)
            if  not meanDropNa.empty:
                classifyValues = pd.cut(np.array(meanDropNa), 5, labels=["worst", "bad", "medium", "good", "excellent"]).astype(str)
                columnClassifiy = pd.DataFrame({'Class': classifyValues}, index=meanDropNa.index)
                metricsTable = pd.concat([metricsTable, meanDropNa.rename('Mean'), stdValues.rename('Std'), countValues.rename('Count'), columnClassifiy], axis=1)
                metricsTable.set_index(df.index, inplace=True)
                metricsTableDict = metricsTable.T.to_dict(orient="list")
                concatenated_dict[score] = metricsTableDict
        finalDict[element] = concatenated_dict
    return finalDict

In [71]:
classifiedDict = classifiedMetrics(normalizedDict)

Get filtered dict by scores

In [72]:
def filterScoresBarcelona(dfScore):
    # Convert 'Difference' column to numeric
    dfScore['Difference'] = pd.to_numeric(dfScore['Difference'], errors='coerce')

    # Group by the 'Score' column and calculate the size of each group
    scoreCounts = dfScore.groupby('Score').size()

    # Filter the DataFrame by the conditions: 'Difference' <= 3, size of 'Score' > 1, and 'Difference' < 0
    filteredDfScore = dfScore[((dfScore['Difference'] < 3) & dfScore['Score'].isin(scoreCounts[scoreCounts > 1].index)) | (dfScore['Difference'] <= 0) | (dfScore["Score"] == "3_2") | ((dfScore["Score"] == "3_0"))]
    filteredDfScore = filteredDfScore[filteredDfScore["Score"]!="NF"]

    return filteredDfScore

In [73]:
def filtererdMetricsByScore(df, keys):
    finalDict = {}
    # Iterate over elements in df
    for element in df:
        concatenatedDict = {}
        # Iterate over keys
        for key in keys:
            # Check if the current element has the key
            if str(key) in df[element]:
                # Assign the value corresponding to the key in concatenatedDict
                concatenatedDict[key] = df[element][str(key)]
        # Add concatenatedDict to finalDict under the current element
        finalDict[element] = concatenatedDict
    return finalDict

In [74]:
metadataBarcelonaFile = readCSV(metadataBarcelonaPath)

In [75]:
dfBarcelonaFiltered = filterScoresBarcelona(metadataBarcelonaFile)

In [76]:
uniqueScores = dfBarcelonaFiltered["Score"].unique()

In [77]:
finalDict = filtererdMetricsByScore(classifiedDict, uniqueScores)

Save dictionary

In [78]:
# Save dictionary as a object.
def saveDictToPickle(dictionary, filePath):
    try:
        with open(filePath, "wb") as f:
            pickle.dump(dictionary, f)
        # print("Dictionary saved to", filePath)
    except Exception as e:
        print("Error occurred while saving the dictionary:", str(e))

In [79]:
saveDictToPickle(finalDict, "../Data/Team_1/05Stage/Metrics/Filtered/Individual/finalIndividualnetworkMetrics.pkl")

Tables

Score tables

In [80]:
def saveIndividualMetrics(finalDict, uniqueScores, folderPath):
    for score in uniqueScores:
        metricsTable = pd.DataFrame()
        elementValues = []
        for element in finalDict:
            if score in finalDict[element]:
                if not element in elementValues:
                    elementValues.append(element)
                df = pd.DataFrame.from_dict(finalDict[element][score], orient='index')
                metricsTable = pd.concat([metricsTable, df], axis=1)
        multiIndex = pd.MultiIndex.from_product([elementValues, ['Mean', 'Std', 'Count', 'Class']], names=[None, None])
        metricsTable.columns = multiIndex
        metricsTable.fillna(0, inplace=True)
        metricsTable.to_pickle(f"{folderPath}/{score}_individualMetrics.pkl")

In [81]:
saveIndividualMetrics(finalDict, uniqueScores, "../Data/Team_1/06Stage/Tables/Score/Individual")

Player tables

In [82]:
playersList = loadPickleFile(playersBarcelonaPath)

In [83]:
def savePlayerMetrics(playersList, finalDict, folderPath):
    elementValues = []
    for key in playersList.keys():
        scoreMetrics = {}
        previousMetricsTable = pd.DataFrame()
        metricsTable = pd.DataFrame()
        for element in finalDict:
            if element not in elementValues:
                elementValues.append(element)
            for score in finalDict[element]:
                if score in finalDict[element]:
                    allValues = []
                    if str(key) in finalDict[element][score].keys():
                        values = finalDict[element][score][str(key)]
                        if values:           
                            # Create or update dictionary entry for the score
                            if score not in scoreMetrics:
                                scoreMetrics[score] = {'Mean': values[0], 'Std': values[1], 'Count': values[2], 'Class': values[3]}
                            else:
                                scoreMetrics[score]['Mean'] = values[0]
                                scoreMetrics[score]['Std'] = values[1]
                                scoreMetrics[score]['Count'] = values[2]
                                scoreMetrics[score]['Class'] = values[3]
                        else:
                            if score not in scoreMetrics:
                                scoreMetrics[score] = {'Mean': 0, 'Std': 0, 'Count': 0, 'Class':0}
                            else:
                                scoreMetrics[score]['Mean'] = 0
                                scoreMetrics[score]['Std'] = 0
                                scoreMetrics[score]['Count'] = 0
                                scoreMetrics[score]['Class'] = 0
                    else:
                        if score not in scoreMetrics:
                            scoreMetrics[score] = {'Mean': 0, 'Std': 0, 'Count': 0, 'Class': 0}
                        else:
                            scoreMetrics[score]['Mean'] = 0
                            scoreMetrics[score]['Std'] = 0
                            scoreMetrics[score]['Count'] = 0
                            scoreMetrics[score]['Class'] = 0

            previousMetricsTable = pd.DataFrame.from_dict(scoreMetrics, orient='index')
            metricsTable = pd.concat([metricsTable, previousMetricsTable], axis=1) 
        metricsTable.columns.name = None
        multiIndex = pd.MultiIndex.from_product([elementValues, ['Mean', 'Std', 'Count','Class']], names=[None, None])
        metricsTable.columns = multiIndex
        metricsTable.fillna(0, inplace=True)
        metricsTable.to_pickle(f"{folderPath}/{key}_individualMetrics.pkl")


In [84]:
savePlayerMetrics(playersList, finalDict, "../Data/Team_1/06Stage/Tables/Player")

Global tables

In [85]:
globalMetricsBarcelonaFile = loadPickleFile(globalMetricsBarcelonaPath)

In [86]:
filteredGlobal = filtererdMetricsByScore(globalMetricsBarcelonaFile, uniqueScores)

In [87]:
saveDictToPickle(finalDict, "../Data/Team_1/05Stage/Metrics/Filtered/Global/finalGlobalnetworkMetrics.pkl")

In [88]:
def saveGlobalMetrics(filteredGlobal,path ):
    elementValues = []
    metricsTable = pd.DataFrame()
    for element in filteredGlobal: 
        if element not in elementValues:
            elementValues.append(element)
        df = pd.DataFrame.from_dict(filteredGlobal[element], orient='index')
        meanValues, stdValues, countValues = calculateMetrics(df)
        metricsTable = pd.concat([metricsTable, meanValues.rename('Mean'), stdValues.rename('Std'), countValues.rename('Count')], axis=1)
    multiIndex = pd.MultiIndex.from_product([elementValues, ['Mean', 'Std', 'Count']], names=[None, None])
    metricsTable.columns = multiIndex
    metricsTable.fillna(0, inplace=True)
    metricsTable.to_pickle(f"{path}/06Stage/Tables/Score/Global/globalMetrics.pkl")
    return metricsTable

In [89]:
saveGlobalMetrics(filteredGlobal, "../Data/Team_1/")


Unnamed: 0_level_0,av_clust,av_clust,av_clust,dty,dty,dty,diam,diam,diam
Unnamed: 0_level_1,Mean,Std,Count,Mean,Std,Count,Mean,Std,Count
0_1,0.6,0.12,9,0.53,0.16,9,2.71,0.49,7.0
0_2,0.38,0.27,3,0.35,0.15,3,5.0,0.0,1.0
1_2,0.59,0.16,3,0.51,0.17,3,2.5,0.71,2.0
1_0,0.64,0.16,28,0.57,0.17,28,2.96,0.66,26.0
2_0,0.56,0.23,20,0.53,0.17,20,3.06,1.09,17.0
3_0,0.57,0.16,14,0.52,0.15,14,3.23,0.73,13.0
1_1,0.46,0.19,11,0.42,0.16,11,3.0,0.58,7.0
2_1,0.54,0.17,12,0.48,0.15,12,3.55,1.13,11.0
3_1,0.54,0.18,6,0.47,0.2,6,3.0,0.71,5.0
0_3,0.62,0.0,1,0.6,0.0,1,3.0,0.0,1.0


Graphics Treatment

In [90]:
print(finalDict)

{'inD': {'0_1': {'5503': [0.37, 0.23, 5, 'medium'], '5203': [0.62, 0.33, 8, 'excellent'], '6379': [0.55, 0.24, 7, 'excellent'], '5506': [0.53, 0.25, 7, 'good'], '4691': [0.1, 0.13, 7, 'worst'], '4324': [0.3, 0.2, 5, 'bad'], '4320': [0.61, 0.34, 8, 'excellent'], '5213': [0.56, 0.35, 8, 'excellent'], '5470': [0.51, 0.21, 6, 'good'], '5211': [0.59, 0.31, 7, 'excellent'], '5216': [0.56, 0.43, 4, 'excellent'], '5246': [0.64, 0.26, 8, 'excellent'], '10609': [0.33, 0.27, 5, 'medium']}, '0_2': {'5246': [0.5, 0.17, 3, 'medium'], '5503': [1.0, nan, 1, 'excellent'], '6379': [0.31, 0.34, 3, 'bad'], '5213': [0.54, 0.12, 3, 'medium'], '5506': [0.71, 0.06, 2, 'good'], '5211': [0.47, 0.04, 2, 'medium'], '5203': [0.8, 0.26, 3, 'good'], '5216': [0.56, nan, 1, 'medium'], '5470': [0.83, 0.24, 2, 'excellent'], '4320': [0.45, 0.21, 3, 'medium'], '4691': [0.25, 0.35, 2, 'bad'], '4324': [0.22, nan, 1, 'worst'], '10609': [0.06, 0.08, 2, 'worst'], '6616': [0.11, nan, 1, 'worst']}, '1_2': {'5246': [0.7, 0.18, 3,

In [102]:
b = {}
for element in finalDict:
    concatenated_dict = {}
    for score in finalDict[element]:
        df = pd.DataFrame.from_dict(finalDict[element][score], orient="index")
        dfExcellent = df[(df[3] == "excellent") | (df[3] == "good")]
        dfExcellent.set_index(dfExcellent.index, inplace=True)
        a = dfExcellent.T.to_dict(orient="list")
        concatenated_dict[score] = a
    b[element] = concatenated_dict

In [121]:

categories = ["worst", "", "", "good", "excellent"]
cat_values = dict(zip(categories,range(len(categories))))

import plotly.express as px
import plotly.graph_objects as go

for score in uniqueScores:
    c = pd.DataFrame()
    elementValues = []
    for element in b:
        if score in b[element]:
            if not element in elementValues:
                elementValues.append(element)
            df = pd.DataFrame.from_dict(b[element][score], orient='index')
            c = pd.concat([c, df], axis=1)
    multiIndex = pd.MultiIndex.from_product([elementValues, ['Mean', 'Std', 'Count', 'Class']], names=[None, None])
    c.columns = multiIndex
    c.fillna(0, inplace=True)
    break


#print(c)
testNo0 = c.replace(0, "worst")
class_columns = testNo0.xs('Class', level=1, axis=1)
print(class_columns)
testCut = class_columns.map(lambda x:cat_values[x]) 


row_sums = testCut.sum(axis=1)
row_sums_sroted = row_sums.sort_values(ascending=False).head(5)
filtered_testCut = testCut.loc[row_sums_sroted.index]

print(filtered_testCut)


fig = go.Figure()


for i in range(len(filtered_testCut)):
    fig.add_trace(
        go.Scatterpolar(
            r=filtered_testCut.iloc[i],
            theta=filtered_testCut.columns,
            fill='toself',
            opacity=0.5,
            name=f"Player-{filtered_testCut.index[i]}",  # Use player's name if available
            showlegend=True
        )
    )

fig.update_layout(
    font_size=13,
    legend=dict(
        yanchor="top",
        y=1.,
        xanchor="left",
        x=0.8
    ),
    polar=dict(
        radialaxis=dict(
            angle=45,
            tickangle=45,
            visible=True,
            gridwidth=2,
            range=[0, max(filtered_testCut.max(numeric_only=True))],  # Adjust range based on data
            tickvals=list(range(len(filtered_testCut.columns))),
            ticktext=categories,
            tickwidth=10
        )
    ),
    title="Player Variables"
)


             inD       outD      clust       betw       clos     eigenv  \
5203   excellent       good       good      worst  excellent      worst   
6379   excellent       good  excellent      worst       good      worst   
5506        good      worst       good      worst       good      worst   
4320   excellent       good      worst       good  excellent      worst   
5213   excellent       good      worst       good  excellent      worst   
5470        good  excellent       good      worst       good      worst   
5211   excellent  excellent      worst       good  excellent      worst   
5216   excellent      worst  excellent      worst  excellent      worst   
5246   excellent      worst  excellent      worst  excellent      worst   
4691       worst      worst  excellent      worst      worst      worst   
10609      worst      worst  excellent      worst      worst      worst   
4324       worst      worst      worst  excellent      worst       good   
3508       worst      wor