# Graph Lab

## Reading data

In [1]:
import json

import matplotlib.pyplot as plt
import networkx as nx
import pandas as pd
import numpy as np

%matplotlib inline

In [8]:
graph_df = pd.read_csv("graph.csv")
G = nx.from_pandas_edgelist(graph_df)

In [35]:
people_df = pd.read_csv("people.csv")

In [6]:
df = pd.read_csv("data.csv")

In [7]:
df.head()

Unnamed: 0,name,id,vkid
0,Алексеева Екатерина,kate_al02,181400458
1,Бирюков Виктор,iktovr,136004593
2,Васильев Даниил,da_mil_ka,151143124
3,Воронов Кирилл,id248826936,248826936
4,Гребенков Дмитрий,irnevoir,172224066


## Compute centralities

In [36]:
people_df.head()

Unnamed: 0,id,vk_id,name,level
0,0,181400458,Алексеева Екатерина,0
1,1,136004593,Бирюков Виктор,0
2,2,151143124,Васильев Даниил,0
3,3,248826936,Воронов Кирилл,0
4,4,172224066,Гребенков Дмитрий,0


In [41]:
# %%timeit -r1
closeness_centralities = {}
for vkid in df["vkid"].tolist():
    id_ = people_df[people_df["vk_id"] == vkid].iloc[0].id
    if G.has_node(id_):
        closeness_centralities[id_] = nx.closeness_centrality(G, u=id_)
    else:
        closeness_centralities[id_] = 0

In [42]:
# %%timeit -r1
neighbors_to_process = set()

for vkid in df["vkid"].tolist():
    id_ = people_df[people_df["vk_id"] == vkid].iloc[0].id
    if G.has_node(id_):
        neighbors_to_process.add(id_)

betweenness_centralities = nx.betweenness_centrality_subset(G, neighbors_to_process, neighbors_to_process, normalized=True)

In [43]:
eigenvector_centralities = nx.eigenvector_centrality_numpy(G)

## Output users with highest centrality

In [46]:
def output_best_centrality(vkids, centrality):
    mapping = {}

    for vkid in vkids:
        id_ = people_df[people_df["vk_id"] == vkid].iloc[0].id
        mapping[vkid] = id_
    
    c = [(centrality[id_], vkid) for vkid, id_ in mapping.items()]
    rc = list(range(len(c)))
    rc.sort(key=lambda x: c[x][0])
    return c[rc[-1]][1]

In [47]:
df[df["vkid"] == output_best_centrality(df["vkid"].tolist(), closeness_centralities)]

Unnamed: 0,name,id,vkid
11,Уваров Андрей,anonumovich,206240342


In [48]:
df[df["vkid"] == output_best_centrality(df["vkid"].tolist(), betweenness_centralities)]

Unnamed: 0,name,id,vkid
1,Бирюков Виктор,iktovr,136004593


In [49]:
df[df["vkid"] == output_best_centrality(df["vkid"].tolist(), eigenvector_centralities)]

Unnamed: 0,name,id,vkid
5,Инютин Максим,engineerxl,199361916
