In [1]:
import pandas as pd
import networkx as nx
import itertools

In [4]:
def update_nodes(G, results_df):

    names = results_df["Name"]
    for name in names:
        if name not in list(G.nodes()):
            G.add_node(name, events=1)
        else:
            G.nodes[name]["events"] += 1
    return G



def add_event_results(G, results_df, event_name):

    event_results = results_df[["Name", event_name]].dropna()
    rankings = list(event_results[event_name].unique())
    rankings.sort(reverse=True)

    for rank in rankings[:-1]:
        
        # GET SURFERS WHO FINISHED WITH RANK
        rank_batch = event_results[event_results[event_name] == rank]
        rank_surfers = list(rank_batch["Name"])
        
        # REMOVE SURFERS WITH RANK FROM RESULTS DATAFRAME
        event_results = event_results[~event_results["Name"].isin(rank_surfers)]
        remaining_surfers = list(event_results["Name"])

        # CREATE EDGES FOR SURFERS WITH RANK
        for rank_surfer in rank_surfers:
            for other_surfer in remaining_surfers:

                # CASE WHERE EDGE ALREADY EXISTS
                if G.has_edge(rank_surfer, other_surfer):
                    G[rank_surfer][other_surfer]["weight"] += 1

                # CASE WHERE EDGE DOES NOT EXIST
                else:
                    G.add_edge(rank_surfer, other_surfer, weight=1)

    return G



# def update_nodes(G, results_df):

#     names = results_df["Name"]
#     for name in names:
#         if name not in list(G.nodes()):
#             G.add_node(name)
#     return G



# def add_event_results(G, results_df, event_name):

#     event_results = results_df[["Name", event_name]].dropna()
#     rankings = list(event_results[event_name].unique())
#     rankings.sort(reverse=True)

#     for rank in rankings[:-1]:
        
#         # GET SURFERS WHO FINISHED WITH RANK
#         rank_batch = event_results[event_results[event_name] == rank]
#         rank_surfers = list(rank_batch["Name"])
        
#         # REMOVE SURFERS WITH RANK FROM RESULTS DATAFRAME
#         event_results = event_results[~event_results["Name"].isin(rank_surfers)]
#         remaining_surfers = list(event_results["Name"])

#         # CREATE EDGES FOR SURFERS WITH RANK
#         for rank_surfer in rank_surfers:
#             for other_surfer in remaining_surfers:

#                 # CASE WHERE EDGE ALREADY EXISTS
#                 if G.has_edge(rank_surfer, other_surfer):
#                     G[rank_surfer][other_surfer]["weight"] += 1

#                 # CASE WHERE EDGE DOES NOT EXIST
#                 else:
#                     G.add_edge(rank_surfer, other_surfer, weight=1)

#     return G



def condense_edge(G, n1, n2):

    # GET NODE WEIGHTS
    try:
        n1_n2 = G.get_edge_data(n1, n2)["weight"]
        n2_n1 = G.get_edge_data(n2, n1)["weight"]
    except:
        return G

    # GET DIFF.
    diff = n1_n2 - n2_n1

    # REMOVE EDGES
    G.remove_edges_from([(n1, n2), (n2, n1)])

    # ADD NET EDGE
    if diff > 0:
        G.add_edge(n1, n2, weight=diff)
    elif diff < 0:
        G.add_edge(n2, n1, weight=-diff)

    return G



def condense_all_edges(G):

    # GET ALL NODES
    all_nodes = list(G.nodes())

    # GET NODE PAIRS
    all_pairs = list(itertools.combinations(all_nodes, 2))

    # ITERATE OVER PAIRS
    for pair in all_pairs:
        G = condense_edge(G, pair[0], pair[1])

    return G

## Create event networks

In [5]:
# SET YEAR RANGE
first_year = 2010; last_year = 2019
years = [str(y) for y in range(first_year, last_year+1)]

event_dict = {}
yearly_data = {}

# LOOP OVER YEARS
for year in years:

    # LOAD DATA
    results_df = pd.read_csv(f"../data/results_data/wsl_results_{year}.csv")
    yearly_data[year] = results_df

    for colname in results_df.columns:
        if colname == "Name":
            continue

        if colname not in list(event_dict.keys()):
            event_dict[colname] = 1
        else:
            event_dict[colname] += 1


# KEEP ONLY EVENTS WITH ENOUGH DATA
events_to_remove = []
for key, value in event_dict.items():
    if value <=2:
        events_to_remove.append(key)
for key in events_to_remove:
    del event_dict[key]
events = list(event_dict.keys())

# LOOP OVER EVENTS
for event in events:

    # INITIALIZE NETWORK
    event_net = nx.DiGraph()
    
    # LOOP OVER DATAFRAMES FOR EACH YEAR
    for key, data in yearly_data.items():

        # CHECK IF EVENT IN YEAR
        if not event in list(data.columns):
            continue

        # FILTER TO EVENT
        filtered_results = data.filter(["Name", event], axis=1)

        # GET NODES
        event_net = update_nodes(event_net, filtered_results)

        # ADD EVENT RESULTS
        event_net = add_event_results(event_net, filtered_results, event)

    # CONDENSE EDGES USING NET WEIGHT
    event_net = condense_all_edges(event_net)

    # WRITE NETWORK TO DISK
    nx.write_gml(event_net, f"../data/networks/{event}_results_network.gml")