In [11]:
import json
import networkx as nx
import pandas as pd

In [2]:

def load_graph_from_data(filename: str = "data_cache/graph_data.json"):
    """Load a graph from a JSON file containing nodes and edges."""
    with open(filename, "r") as f:
        data = json.load(f)
    
    graph = nx.Graph()
    graph.add_nodes_from(data["nodes"])
    graph.add_edges_from(data["edges"])

    return graph

In [3]:
graph = load_graph_from_data()

### Node Data

In [8]:
list(graph.nodes(data=True))[0]

('fabianocaruana',
 {'uid': 11177810,
  'name': 'Fabiano Caruana',
  'username': 'fabianocaruana',
  'country': 'US',
  'rating': 2823})

### Edge Data

In [9]:
list(graph.edges(data=True))[0]

('fabianocaruana',
 'vincentkeymer',
 {'weight': 50,
  'data': [{'pgn': '[Event "Live Chess"]\n[Site "Chess.com"]\n[Date "2024.10.01"]\n[Round "-"]\n[White "VincentKeymer"]\n[Black "FabianoCaruana"]\n[Result "0-1"]\n[CurrentPosition "8/5p2/2n5/1k2pP1p/1b2P1p1/1p1K2P1/3B4/1N6 w - -"]\n[Timezone "UTC"]\n[ECO "A30"]\n[ECOUrl "https://www.chess.com/openings/English-Opening-Symmetrical-Variation...5.Bg2-Nc6-6.Nc3-g6"]\n[UTCDate "2024.10.01"]\n[UTCTime "15:15:45"]\n[WhiteElo "2779"]\n[BlackElo "2817"]\n[TimeControl "600+2"]\n[Termination "FabianoCaruana won on time"]\n[StartTime "15:15:45"]\n[EndDate "2024.10.01"]\n[EndTime "15:39:16"]\n[Link "https://www.chess.com/game/live/121534814803"]\n\n1. c4 {[%clk 0:10:02]} 1... c5 {[%clk 0:10:00.1]} 2. Nf3 {[%clk 0:09:57.5]} 2... Nf6 {[%clk 0:10:00.3]} 3. Nc3 {[%clk 0:09:58.5]} 3... Nc6 {[%clk 0:10:01]} 4. g3 {[%clk 0:09:59.9]} 4... d5 {[%clk 0:10:01.5]} 5. cxd5 {[%clk 0:10:00]} 5... Nxd5 {[%clk 0:10:03.4]} 6. Bg2 {[%clk 0:10:01.5]} 6... g6 {[%clk 0

### Degree: Measures the number of direct connections

In [17]:
pd.DataFrame(dict(graph.degree()), index=["degree"]).T.sort_values("degree", ascending=False).head(20)

Unnamed: 0,degree
the_machine04,170
fitod,98
anasta10,97
yanhob,93
mklose11,87
yaacovn,80
rezamahdavi2008,65
0blivi0usspy,63
eagle_2019,59
jcibarra,45


### Betweenness: Nodes that act as bridges within the network

In [23]:
pd.DataFrame(nx.betweenness_centrality(graph, weight='weight'), index=["betweenness"]).T.sort_values("betweenness", ascending=False).head(20)

Unnamed: 0,betweenness
anasta10,0.36668
the_machine04,0.360734
yaacovn,0.206851
yanhob,0.202141
fitod,0.182639
chessgodisback2008,0.157372
0blivi0usspy,0.155314
mklose11,0.154071
tanitoluwaaps116,0.127031
jcibarra,0.124043


### Pagerank: Identifies influential players

In [26]:
pd.DataFrame(nx.pagerank(graph, weight='weight'), index=["pagerank"]).T.sort_values("pagerank", ascending=False).head(20)

Unnamed: 0,pagerank
the_machine04,0.077607
yanhob,0.051925
fitod,0.050356
anasta10,0.039886
mklose11,0.03485
yaacovn,0.033589
rezamahdavi2008,0.027263
0blivi0usspy,0.023514
eagle_2019,0.023221
fabianocaruana,0.021693


### Win Ratio Analysis

In [44]:
win_loss_ratio = {}
for u, v, data in graph.edges(data=True):
    games = data.get('data', [])
    for game in games:
        white_result = game['white']['result']
        black_result = game['black']['result']
        if white_result == "win":
            winner = game['white']['username']
            loser = game['black']['username']
        elif black_result == "win":
            winner = game['black']['username']
            loser = game['white']['username']
        else:
            continue
        if winner not in win_loss_ratio:
            win_loss_ratio[winner] = {"wins": 0, "losses": 0}
        if loser not in win_loss_ratio:
            win_loss_ratio[loser] = {"wins": 0, "losses": 0}
        win_loss_ratio[winner]["wins"] += 1
        win_loss_ratio[loser]["losses"] += 1

df = pd.DataFrame(win_loss_ratio).T
df['ratio'] = df['wins'] / (df['wins'] + df['losses'])
df[df['wins'] + df['losses'] > 10].sort_values("ratio", ascending=False).head(20)

Unnamed: 0,wins,losses,ratio
FabianoCaruana,44,9,0.830189
Antipov_Mikhail_Al,19,4,0.826087
jefferyx,20,5,0.8
jcibarra,38,12,0.76
Yaacovn,95,40,0.703704
onyshchuk_v,29,13,0.690476
h4parah5,20,11,0.645161
0blivi0usspy,74,44,0.627119
rezamahdavi2008,86,54,0.614286
Eagle_2019,52,36,0.590909


### Game time analysis?

In [51]:
avg_durations = {}
for u, v, data in graph.edges(data=True):
    durations = [game['duration'] for game in data['data']]
    avg_durations[(u, v)] = sum(durations) / len(durations)
df = pd.DataFrame(avg_durations, index=["avg_duration"]).T
df["avg_duration"] = df["avg_duration"] / (60 * 60 * 24 * 365)  # The heck is the unit of duration? haha
df.sort_values("avg_duration", ascending=False).head(20)

Unnamed: 0,Unnamed: 1,avg_duration
0blivi0usspy,dafny2005,54.843345
0blivi0usspy,lonelyqueen0,54.843198
0blivi0usspy,blefer66,54.843187
0blivi0usspy,masruri_rahman,54.842795
0blivi0usspy,nikitakhoroshev,54.842537
0blivi0usspy,turboplombir,54.842523
0blivi0usspy,oskariot,54.842513
seochesspie,0blivi0usspy,54.837436
blitzstream,0blivi0usspy,54.835296
gilbertelroy,0blivi0usspy,54.835286


### Communities: Investigate communities within the graph (Level = # of splits | ID = Unique community)

In [55]:
from itertools import islice


community_levels = list(islice(nx.community.girvan_newman(graph), 3))

In [58]:
data = []
for level, partition in enumerate(community_levels, 1):
    for community_id, community in enumerate(partition):
        for node in community:
            data.append({
                "Node": node,
                "Community_Level": level,
                "Community_ID": community_id
            })

df = pd.DataFrame(data)
df.groupby(["Community_Level", "Community_ID"]).size().reset_index(name="Node_Count")



Unnamed: 0,Community_Level,Community_ID,Node_Count
0,1,0,615
1,1,1,185
2,2,0,615
3,2,1,92
4,2,2,93
5,3,0,600
6,3,1,92
7,3,2,93
8,3,3,15
