In [3]:
import csv
import networkx as nx
from networkx.algorithms import pagerank
from collections import defaultdict
from datetime import datetime

In [4]:
FINAL_DATE = datetime(2023, 1, 1)

In [12]:
def generate_network_filter(filename: str):
    with open(filename, 'r') as openfile:
        csv_reader = csv.reader(openfile)
        next(csv_reader)
        filter_map = defaultdict(bool)
        for row in csv_reader:
            # Check if the created/release date is after 2023, if it is then we can ignore it in the network.
            release_date = datetime.strptime(row[3], "%Y-%m-%d")
            if row[4] == "None":
                row[4] = "2017-10-22T05:21:29Z"
            created_date = datetime.strptime(row[4], "%Y-%m-%dT%H:%M:%SZ")
            if created_date < FINAL_DATE and release_date < FINAL_DATE:
                filter_map[row[0]] = True

    return filter_map

def get_weighted_edges_from_csv(filename, filter=None):
    with open(filename, 'r') as openfile:
        csv_reader = csv.reader(openfile)
        next(csv_reader)

        edges = list()
        for row in csv_reader:
            if filter is None:
                edges.append(tuple([row[0], row[1], int(row[2])]))
                continue
            
            if not filter.get(row[0]) or not filter.get(row[1]):
                continue

            edges.append(tuple([row[0], row[1], int(row[2])]))

    return edges

def generate_graph_from_edges(edges_list):
    directed_graph = nx.DiGraph()
    directed_graph.add_weighted_edges_from(edges_list)
    return directed_graph

In [6]:
def find_top_n_pagerank_nodes(g: nx.Graph, n=1):
    values = pagerank(g)
    values_sorted = dict(sorted(values.items(), key=lambda item: item[1], reverse=True))
    return list(values_sorted)[0:n]

def get_graph_order_size(g: nx.Graph):
    return g.order(), g.size()

def find_top_n_betweenness_centrality_nodes(g: nx.Graph, n=10):
    degree_centrality = nx.degree_centrality(g)
    betweenness_centrality = nx.betweenness_centrality(g, normalized=True, endpoints=True)

    degree_centrality_sorted = dict(sorted(degree_centrality.items(), key=lambda item: item[1], reverse=True))
    betweenness_centrality_sorted = dict(sorted(betweenness_centrality.items(), key=lambda item: item[1], reverse=True))

    keys_top_degree = list(degree_centrality_sorted)[0:n]
    keys_top_betweenness = list(betweenness_centrality_sorted)[0:n]
    return list(set(keys_top_degree) & set(keys_top_betweenness))

In [13]:
filter_filename = "../data/games_information/all_games.csv"
graph_filename = "../data/too_big/all_games.csv"
filter_map = generate_network_filter(filter_filename)
edges = get_weighted_edges_from_csv(graph_filename, filter=filter_map)
graph = generate_graph_from_edges(edges)
print(get_graph_order_size(graph))
# print(find_top_n_betweenness_centrality_nodes(graph, n=100))

(30434, 14744682)
