# Graph Lab

## Reading data

In [2]:
import json

import matplotlib.pyplot as plt
import networkx as nx
import pandas as pd
import numpy as np

%matplotlib inline

In [3]:
with open("graph_format/graph.json", "r") as f:
    G = nx.node_link_graph(json.load(f))

In [4]:
with open("graph_format/users.json", "r") as f:
    users = json.load(f)

In [5]:
df = pd.read_csv("data.csv")

In [6]:
df.head()

Unnamed: 0,name,id,vkid
0,Алексеева Екатерина,kate_al02,181400458
1,Бирюков Виктор,iktovr,136004593
2,Васильев Даниил,da_mil_ka,151143124
3,Воронов Кирилл,id248826936,248826936
4,Гребенков Дмитрий,irnevoir,172224066


In [7]:
new_users = {}
for k, v in users.items():
    new_users[int(k)] = v

In [8]:
for k, v in new_users.items():
    if G.has_node(k):
        G.add_node(k, **v)

## Writing graph in https://cosmograph.app/run/ format

In [10]:
u_df = pd.DataFrame(new_users).T.drop(["deactivated", "can_access_closed", "is_closed"], axis=1)

In [45]:
u_df.to_csv("users.csv", index_label="id")

In [11]:
with open("graph_raw.csv", "wb") as f:
    f.write(b"source,target\n")
    nx.write_edgelist(G, f, delimiter=',', data=False)

## Compute centralities

In [25]:
# %%timeit -r1
closeness_centralities = {}
for vkid in df["vkid"].tolist():
    if G.has_node(vkid):
        closeness_centralities[vkid] = nx.closeness_centrality(G, u=vkid)
    else:
        closeness_centralities[vkid] = 0

In [28]:
# %%timeit -r1
neighbors_to_process = set()

for _, row in df.iterrows():
    neighbors_to_process.add(row.vkid)

betweenness_centralities = nx.betweenness_centrality_subset(G, neighbors_to_process, neighbors_to_process, normalized=True)

In [32]:
eigenvector_centralities = nx.eigenvector_centrality_numpy(G)

## Output users with highest centrality

In [22]:
def output_best_centrality(vkids, centrality):
    c = [(centrality[vkid], vkid) for vkid in vkids if vkid in centrality]
    rc = list(range(len(c)))
    rc.sort(key=lambda x: c[x][0])
    return c[rc[-1]][1]

In [26]:
df[df["vkid"] == output_best_centrality(df["vkid"].tolist(), closeness_centralities)]

Unnamed: 0,name,id,vkid
11,Уваров Андрей,anonumovich,206240342


In [29]:
df[df["vkid"] == output_best_centrality(df["vkid"].tolist(), betweenness_centralities)]

Unnamed: 0,name,id,vkid
1,Бирюков Виктор,iktovr,136004593


In [33]:
df[df["vkid"] == output_best_centrality(df["vkid"].tolist(), eigenvector_centralities)]

Unnamed: 0,name,id,vkid
11,Уваров Андрей,anonumovich,206240342
