In [1]:
import os
import networkx as nx
from collections import defaultdict
import numpy as np
import pandas as pd

In [170]:
path = "../Data/Barcelona/04Stage/Graphs/diGraphs"

In [171]:
inDegree_players = defaultdict(list)
outDegree_players = defaultdict(list)

In [172]:
def readGraph(dataFolder, fileName):
    try:
        filePath = os.path.join(dataFolder, fileName)
        graph = nx.read_gexf(filePath)
        return graph
    except FileNotFoundError:
        print(f"File '{fileName}' not found in '{dataFolder}'.")
        return None
    except nx.NetworkXError as e:
        print(f"Error reading graph from '{filePath}': {e}")
        return None

In [173]:
def getScore(fileName):
    parts = fileName.split("_")
    if len(parts) == 4:
        pass
    elif len(parts) == 5 :
        return f"{parts[2]}_{parts[3]}"


In [174]:
# Initialize dictionaries to store metrics for each score
inDegree_players = {}
outDegree_players = {}
clustering_coefficients = {}
betweenness_centralities = {}
closeness_centralities = {}

# Iterate through all the files in the folder
for fileName in os.listdir(path):
    # Join the folder path with the file name
    graph = readGraph(path, fileName)
    score = getScore(fileName)
    
    # Initialize dictionaries for metrics if they don't exist
    if score not in inDegree_players:
        inDegree_players[score] = {}
    if score not in outDegree_players:
        outDegree_players[score] = {}
    if score not in clustering_coefficients:
        clustering_coefficients[score] = {}
    if score not in betweenness_centralities:
        betweenness_centralities[score] = {}
    if score not in closeness_centralities:
        closeness_centralities[score] = {}
    
    # Iterate over nodes in the graph
    for node in graph.nodes():
        # Print node label
        #print(f"{node},  label: {graph.nodes[node]['label']}")
        
        # Calculate in-degree and append to inDegree_players dictionary
        in_degree = graph.in_degree(node)
        if node not in inDegree_players[score]:
            inDegree_players[score][node] = []
        inDegree_players[score][node].append(in_degree)
        
        # Calculate out-degree and append to outDegree_players dictionary
        out_degree = graph.out_degree(node)
        if node not in outDegree_players[score]:
            outDegree_players[score][node] = []
        outDegree_players[score][node].append(out_degree)
        
        # Calculate clustering coefficient and store
        clustering_coefficient = nx.clustering(graph, node)
        clustering_coefficients[score][node] = clustering_coefficient
        
        # Calculate betweenness centrality and store
        betweenness_centrality = nx.betweenness_centrality(graph)[node]
        betweenness_centralities[score][node] = betweenness_centrality
        
        # Calculate closeness centrality and store
        closeness_centrality = nx.closeness_centrality(graph)[node]
        closeness_centralities[score][node] = closeness_centrality


In [None]:
for label, degrees in inDegree_players.items():
    print(f"Label: {label}, Degrees: {degrees}")

In [None]:
for label, degrees in outDegree_players.items():
    print(f"Label: {label}, Degrees: {degrees}")

In [None]:
for label, degrees in clustering_coefficients.items():
    print(f"Label: {label}, Degrees: {degrees}")

In [50]:
#### vIsaac
files = ["/Users/isaac/Desktop/100_1_0_1_diGraph.gexf","/Users/isaac/Desktop/108_1_0_1_diGraph.gexf"] #up to u

# indicators
out_degree = defaultdict(list)
closeness_centrality = defaultdict(list)
##...

for file in files:
    g = nx.read_gexf(file)
    og = g.out_degree()
    [out_degree[k].append(v) for k,v in og.items()]
    cg = nx.closeness_centrality(g) 
    [closeness_centrality[k].append(v) for k,v in cg.items()]
    #...


df = pd.DataFrame.from_dict(out_degree, orient='index') 
df2 = pd.DataFrame.from_dict(closeness_centrality, orient='index') 

dfall= pd.concat([df,df2],axis=1) #si vols, pots nomes agafar un df
dfall



Unnamed: 0,0,1,0.1,1.1
6616,3,,0.454545,
5503,1,8.0,0.434783,0.833333
5203,6,9.0,0.769231,0.714286
6379,5,7.0,0.526316,0.714286
5506,2,8.0,0.555556,0.714286
4691,3,5.0,0.4,0.714286
4324,1,,0.0,
4320,3,5.0,0.526316,0.769231
5213,3,10.0,0.526316,0.833333
11094,2,,0.526316,


In [51]:
## nomes un
df["mean"] = df.mean(axis=1)
df["count"] = df.count(axis=1)-1 #meyns mean column
df["std"] = df[:-2].std(axis=1) ## dos: mean i count 
df

df = df.sort_values(["count","mean"],ascending=False)
df.drop(columns=df.columns[0:2],inplace=True) # 2!! FILES/PARTITS!
df.reset_index(inplace=True)
df["match"]="0-1"
df

Unnamed: 0,index,mean,count,std,match
0,5203,7.5,2,3.010399,0-1
1,5213,6.5,2,3.637192,0-1
2,6379,6.0,2,2.160247,0-1
3,5506,5.0,2,2.872281,0-1
4,5503,4.5,2,3.119161,0-1
5,4691,4.0,2,1.290994,0-1
6,4320,4.0,2,1.290994,0-1
7,5470,7.0,1,3.464102,0-1
8,5211,7.0,1,3.464102,0-1
9,5216,5.0,1,,0-1


# per visualitzar/plot:
- agafa els mes representatius [:10] de cada match, que siguin comuns, i plot el mean & std
- https://seaborn.pydata.org/examples/errorband_lineplots.html
  

In [52]:
df.groupby(["match"]).head(5)

Unnamed: 0,index,mean,count,std,match
0,5203,7.5,2,3.010399,0-1
1,5213,6.5,2,3.637192,0-1
2,6379,6.0,2,2.160247,0-1
3,5506,5.0,2,2.872281,0-1
4,5503,4.5,2,3.119161,0-1
