# Betweenness

## Импорты

In [None]:
import json
from io import StringIO
import pandas as pd
import numpy as np
import seaborn as sns
import graph_tool as gt
from graph_tool.centrality import betweenness
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

In [None]:
sns.set_style("ticks",{'axes.grid' : True})

In [None]:
def to_df(read_file: str, key: str) -> pd.DataFrame:
    with open(read_file, "r", encoding="utf8") as f:
        data = json.load(f)
    buffer = StringIO(data.get(key))
    return pd.read_csv(buffer, sep=",")

In [None]:
part_name = "Адмиралтейский"
source_file = f"{part_name}.json"

## Работа с DataFrame

In [None]:
df_edges = to_df(source_file, "edges_csv")
df_nodes = to_df(source_file, "points_csv")

In [None]:
df_nodes

In [None]:
df_nodes = df_nodes.reset_index()
df_edges = df_edges.join(df_nodes[["id", "index"]].set_index("id"), on="source")
df_edges = df_edges.rename(columns={"index": "src_idx"})
df_edges = df_edges.join(df_nodes[["id", "index"]].set_index("id"), on="target")
df_edges = df_edges.rename(columns={"index": "trg_idx"})

## Ищем betweenness

In [None]:
g = gt.Graph()
g.add_edge_list(df_edges.loc[:, ["src_idx", "trg_idx"]].to_numpy())
weight = g.new_edge_property("double", vals=df_edges["weight"])
vp, ep = betweenness(g, weight=weight)
df_nodes["bw"] = vp.get_array()

## Присоединяем названия улиц

In [None]:
df_way_properties = to_df(source_file, "ways_properties_csv")

In [None]:
df_way_properties[df_way_properties["property"] == "name"]

In [None]:
df_way_names = df_way_properties[df_way_properties["property"] == "name"].reset_index()

In [None]:
df_bw = df_nodes.join(df_edges[["source", "id_way"]].set_index("source"), on="id").loc[:, ["id_way", "bw"]]
# df_bw = pd.concat([ df_bw, df_nodes.join(df_edges[["target", "id_way"]].set_index("target"), on="id").loc[:, ["id_way", "bw"]] ])
df_bw = df_bw.join(df_way_names.set_index("id"), on="id_way", how="inner").loc[:, ["value", "bw"]]
df_bw.groupby(by="value", as_index=False).sum().sort_values(by="bw", ascending=False)