In [1]:
import pickle
import pandas as pd
import numpy as np

FCBarcelona study

In [41]:
individualMetricsBarcelonaPath = "../Data/Team_1/05Stage/Metrics/Raw/Individual/IndividualnetworkMetrics.pkl"
globalMetricsBarcelonaPath = "../Data/Team_1/05Stage/Metrics/Raw/Global/GlobalnetworkMetrics.pkl"
metadataBarcelonaPath = "../Data/Team_1/03Stage/finalMetadataBarcelona.csv"
playersBarcelonaPath =  "../Data/Team_1/04Stage/playersList.pkl"    

In [3]:
def readCSV(filePath):
    try:
        df = pd.read_csv(filePath)
        return df
    except FileNotFoundError:
        print(f"No s'ha trobat el fitxer: {filePath}")
    except pd.errors.EmptyDataError:
        print(f"El fitxer està buit: {filePath}")
    except pd.errors.ParserError:
        print(f"Error de parseig al fitxer: {filePath}")
    except Exception as e:
        print(f"Ha ocorregut un error: {e}")

In [43]:
def loadPickleFile(file):
    try:
        with open(file, "rb") as f:
            deserializedFile = pickle.load(f)
        return deserializedFile
    except FileNotFoundError:
        print(f"File  not found.")

Get rawed dict

In [5]:
file = loadPickleFile(individualMetricsBarcelonaPath)

Get filtered dict by players

In [6]:
def getCount(df):
    counts = df.notna().sum(axis=1)
    return counts

In [7]:
def dfCounts(counts):
    countDf = pd.DataFrame(counts, columns=['count'])
    return countDf

In [8]:
def filterDf(df):
    maxCount = df['count'].max()

    # Calculate the threshold (1/5 of the maximum count)
    threshold = maxCount / 4

    # Filter the DataFrame to include only rows where the count is at least the threshold
    filteredDf = df[df['count'] >= threshold]
    return filteredDf

In [9]:
def filtererdMetricsByPlayers(file):
    finalDict = {}
    for element in file:
        concatenated_dict = {}
        for score in file[element]:
            df = pd.DataFrame.from_dict(file[element][score], orient="index")
            counts = getCount(df)
            countDf = dfCounts(counts)
            filteredCountDf  = filterDf(countDf)
            filteredDf = df.loc[filteredCountDf.index]
            a = filteredDf.T.to_dict(orient="list")
            concatenated_dict[score] = a
        finalDict[element] = concatenated_dict
    return finalDict
            

In [10]:
filteredDict = filtererdMetricsByPlayers(file)

Get normalized dict

In [11]:
def normalizatedMetrics(file):
    finalDict = {}
    for element in file:
        concatenated_dict = {}
        for score in file[element]:
            df = pd.DataFrame.from_dict(file[element][score], orient="index")
            dfN = (df - df.min()) / (df.max() - df.min())
            dfN.set_index(df.index, inplace=True)
            a = dfN.T.to_dict(orient="list")
            concatenated_dict[score] = a
        finalDict[element] = concatenated_dict
    return finalDict

In [12]:
normalizedDict = normalizatedMetrics(filteredDict)

Get classified file

In [13]:
def calculateMetrics(data):
    dfNoNan = data.fillna(" ")
    dfNoBlank = dfNoNan.replace(" ", float('NaN'))  # Replace blank spaces with 0
    # Calculate mean, standard deviation, and count
    meanValues = dfNoBlank.mean(axis=1).round(2)
    stdValues = dfNoBlank.std(axis=1).round(2)
    countValues = dfNoBlank.count(axis=1)

    return meanValues, stdValues, countValues

In [14]:
def classifiedMetrics(file):
    finalDict = {}
    for element in file:
        concatenated_dict = {}
        for score in file[element]:
            metricsTable = pd.DataFrame()
            df = pd.DataFrame.from_dict(file[element][score], orient="index")
            meanValues, stdValues, countValues = calculateMetrics(df)  
            meanDropNa = meanValues.dropna(axis=0)
            if  not meanDropNa.empty:
                classifyValues = pd.cut(np.array(meanDropNa), 5, labels=["worst", "bad", "medium", "good", "excellent"]).astype(str)
                columnClassifiy = pd.DataFrame({'Class': classifyValues}, index=meanDropNa.index)
                metricsTable = pd.concat([metricsTable, meanDropNa.rename('Mean'), stdValues.rename('Std'), countValues.rename('Count'), columnClassifiy], axis=1)
                metricsTable.set_index(df.index, inplace=True)
                metricsTableDict = metricsTable.T.to_dict(orient="list")
                concatenated_dict[score] = metricsTableDict
        finalDict[element] = concatenated_dict
    return finalDict

In [15]:
classifiedDict = classifiedMetrics(normalizedDict)

Get filtered dict by scores

In [16]:
def filterScoresBarcelona(dfScore):
    # Convert 'Difference' column to numeric
    dfScore['Difference'] = pd.to_numeric(dfScore['Difference'], errors='coerce')

    # Group by the 'Score' column and calculate the size of each group
    scoreCounts = dfScore.groupby('Score').size()

    # Filter the DataFrame by the conditions: 'Difference' <= 3, size of 'Score' > 1, and 'Difference' < 0
    filteredDfScore = dfScore[((dfScore['Difference'] < 3) & dfScore['Score'].isin(scoreCounts[scoreCounts > 1].index)) | (dfScore['Difference'] <= 0) | (dfScore["Score"] == "3_2") | ((dfScore["Score"] == "3_0"))]
    filteredDfScore = filteredDfScore[filteredDfScore["Score"]!="NF"]

    return filteredDfScore

In [17]:
def filtererdMetricsByScore(df, keys):
    finalDict = {}
    # Iterate over elements in df
    for element in df:
        concatenatedDict = {}
        # Iterate over keys
        for key in keys:
            # Check if the current element has the key
            if str(key) in df[element]:
                # Assign the value corresponding to the key in concatenatedDict
                concatenatedDict[key] = df[element][str(key)]
        # Add concatenatedDict to finalDict under the current element
        finalDict[element] = concatenatedDict
    return finalDict

In [18]:
metadataBarcelonaFile = readCSV(metadataBarcelonaPath)

In [19]:
dfBarcelonaFiltered = filterScoresBarcelona(metadataBarcelonaFile)

In [20]:
uniqueScores = dfBarcelonaFiltered["Score"].unique()

In [21]:
finalDict = filtererdMetricsByScore(classifiedDict, uniqueScores)

Save dictionary

In [22]:
# Save dictionary as a object.
def saveDictToPickle(dictionary, filePath):
    try:
        with open(filePath, "wb") as f:
            pickle.dump(dictionary, f)
        # print("Dictionary saved to", filePath)
    except Exception as e:
        print("Error occurred while saving the dictionary:", str(e))

In [23]:
saveDictToPickle(finalDict, "../Data/Team_1/05Stage/Metrics/Filtered/Individual/finalIndividualnetworkMetrics.pkl")

Tables

Score tables

In [28]:
def saveIndividualMetrics(finalDict, uniqueScores, folderPath):
    for score in uniqueScores:
        metricsTable = pd.DataFrame()
        elementValues = []
        for element in finalDict:
            if score in finalDict[element]:
                if not element in elementValues:
                    elementValues.append(element)
                df = pd.DataFrame.from_dict(finalDict[element][score], orient='index')
                metricsTable = pd.concat([metricsTable, df], axis=1)
        multiIndex = pd.MultiIndex.from_product([elementValues, ['Mean', 'Std', 'Count', 'Class']], names=[None, None])
        metricsTable.columns = multiIndex
        metricsTable.fillna(0, inplace=True)
        metricsTable.to_pickle(f"{folderPath}/{score}_individualMetrics.pkl")

In [35]:
saveIndividualMetrics(finalDict, uniqueScores, "../Data/Team_1/06Stage/Tables/Score/Individual")

Player tables

In [39]:
playersList = loadPickleFile(playersBarcelonaPath)

In [37]:
def savePlayerMetrics(playersList, finalDict, folderPath):
    elementValues = []
    for key in playersList.keys():
        scoreMetrics = {}
        previousMetricsTable = pd.DataFrame()
        metricsTable = pd.DataFrame()
        for element in finalDict:
            if element not in elementValues:
                elementValues.append(element)
            for score in finalDict[element]:
                if score in finalDict[element]:
                    allValues = []
                    if str(key) in finalDict[element][score].keys():
                        values = finalDict[element][score][str(key)]
                        if values:           
                            # Create or update dictionary entry for the score
                            if score not in scoreMetrics:
                                scoreMetrics[score] = {'Mean': values[0], 'Std': values[1], 'Count': values[2], 'Class': values[3]}
                            else:
                                scoreMetrics[score]['Mean'] = values[0]
                                scoreMetrics[score]['Std'] = values[1]
                                scoreMetrics[score]['Count'] = values[2]
                                scoreMetrics[score]['Class'] = values[3]
                        else:
                            if score not in scoreMetrics:
                                scoreMetrics[score] = {'Mean': 0, 'Std': 0, 'Count': 0, 'Class':0}
                            else:
                                scoreMetrics[score]['Mean'] = 0
                                scoreMetrics[score]['Std'] = 0
                                scoreMetrics[score]['Count'] = 0
                                scoreMetrics[score]['Class'] = 0
                    else:
                        if score not in scoreMetrics:
                            scoreMetrics[score] = {'Mean': 0, 'Std': 0, 'Count': 0, 'Class': 0}
                        else:
                            scoreMetrics[score]['Mean'] = 0
                            scoreMetrics[score]['Std'] = 0
                            scoreMetrics[score]['Count'] = 0
                            scoreMetrics[score]['Class'] = 0

            previousMetricsTable = pd.DataFrame.from_dict(scoreMetrics, orient='index')
            metricsTable = pd.concat([metricsTable, previousMetricsTable], axis=1) 
        metricsTable.columns.name = None
        multiIndex = pd.MultiIndex.from_product([elementValues, ['Mean', 'Std', 'Count','Class']], names=[None, None])
        metricsTable.columns = multiIndex
        metricsTable.fillna(0, inplace=True)
        metricsTable.to_pickle(f"{folderPath}/{key}_individualMetrics.pkl")


In [40]:
savePlayerMetrics(playersList, finalDict, "../Data/Team_1/06Stage/Tables/Player")

Global tables

In [44]:
globalMetricsBarcelonaFile = loadPickleFile(globalMetricsBarcelonaPath)

In [None]:
print(globalMetricsBarcelonaFile)

In [58]:
filteredGlobal = filtererdMetricsByScore(globalMetricsBarcelonaFile, uniqueScores)

In [59]:
saveDictToPickle(finalDict, "../Data/Team_1/05Stage/Metrics/Filtered/Global/finalGlobalnetworkMetrics.pkl")

In [56]:
def saveGlobalMetrics(filteredGlobal,path ):
    elementValues = []
    metricsTable = pd.DataFrame()
    for element in filteredGlobal: 
        if element not in elementValues:
            elementValues.append(element)
        df = pd.DataFrame.from_dict(filteredGlobal[element], orient='index')
        meanValues, stdValues, countValues = calculateMetrics(df)
        metricsTable = pd.concat([metricsTable, meanValues.rename('Mean'), stdValues.rename('Std'), countValues.rename('Count')], axis=1)
    multiIndex = pd.MultiIndex.from_product([elementValues, ['Mean', 'Std', 'Count']], names=[None, None])
    metricsTable.columns = multiIndex
    metricsTable.fillna(0, inplace=True)
    metricsTable.to_pickle(f"{path}/06Stage/Tables/Score/Global/globalMetrics.pkl")
    return metricsTable

In [None]:
saveGlobalMetrics(filteredGlobal, "../Data/Team_1/")
