# Средняя длинна пути и коэффициент кластеризации

## Импорты

In [21]:
import json
from io import StringIO
import pandas as pd
import numpy as np
import seaborn as sns
import graph_tool as gt
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from graph_tool.topology import shortest_distance
from graph_tool.clustering import local_clustering

In [2]:
def to_df(read_file: str, key: str) -> pd.DataFrame:
    with open(read_file, "r", encoding="utf8") as f:
        data = json.load(f)
    buffer = StringIO(data.get(key))
    return pd.read_csv(buffer, sep=",")

In [196]:
part_name = "СПб"
source_file = f"{part_name}.json"

## Работа с DataFrame

In [197]:
df_reversed_edges = to_df(source_file, "reversed_edges_csv")
df_reversed_nodes = to_df(source_file, "reversed_nodes_csv")
reversed_g = gt.Graph()
reversed_g.add_edge_list(df_reversed_edges.loc[:, ["src_index", "dest_index"]].to_numpy())

## Средняя длина пути

In [198]:
shortest_distances = shortest_distance(reversed_g, directed=True)

In [199]:
sum_g = 0.
count_g = 0
for vertex in reversed_g.vertices():
    count_v = np.count_nonzero(shortest_distances[vertex].a != 2147483647)
    sum_v = np.sum(shortest_distances[vertex].a[shortest_distances[vertex].a != 2147483647])
    sum_g += sum_v
    count_g += count_v - 1
avg_g = sum_g / count_g
n = len(reversed_g.get_vertices())

## Коэффициент кластеризации

In [200]:
cc = local_clustering(reversed_g, undirected=False)
cc_g = np.sum(cc.a) / n

## Степень

In [201]:
d = reversed_g.degree_property_map("out")
d_g = np.sum(d.a) / n

## Запись в файл

В итоге получаем csv-файл, по структуре напоминающий таблицу из оригинальной статьи.

In [203]:
with open("small_world.csv", "a") as f:
    l_random = np.log(n) / np.log(d_g)
    c_random = d_g / n
    f.writelines(f"{part_name},{n},{d_g:.2f},{avg_g:.2f},{cc_g:.2f},{l_random:.2f},{c_random:.4f}\n")