In [1]:
import peartree as pt
import networkx as nx
import pandas as pd
import warnings
import random
import numpy as np
import os
import glob
import pickle

warnings.filterwarnings("ignore")

def simulate_attack_degree(G):
    G_copy = G.copy()
    lcc_sizes = []

    while len(G_copy) > 0:
        # Calcular o LCC atual
        largest_cc = max(nx.connected_components(G_copy), key=len)
        lcc_sizes.append(len(largest_cc))
        node_to_remove = max(G_copy.degree, key=lambda x: x[1])[0]
        G_copy.remove_node(node_to_remove)
    return lcc_sizes


def simulate_attack_static_betweenness(G):
    G_copy = G.copy()
    lcc_sizes = []

    # Betweenness apenas uma vez
    betweenness = nx.betweenness_centrality(G_copy)
    nodes_sorted = sorted(betweenness, key=betweenness.get, reverse=True)

    for node_to_remove in nodes_sorted:
        largest_cc = max(nx.connected_components(G_copy), key=len)
        lcc_sizes.append(len(largest_cc))
        G_copy.remove_node(node_to_remove)

    return lcc_sizes

def simulate_random_failures(G, num_failures):
    G_copy = G.copy()
    nodes_to_remove = random.sample(list(G_copy.nodes()), num_failures)
    G_copy.remove_nodes_from(nodes_to_remove)

    # Recalcular o tamanho do LCC
    if G_copy.number_of_nodes() > 0:
        largest_cc = max(nx.connected_components(G_copy.to_undirected()), key=len)
        return len(largest_cc) / G.number_of_nodes()
    else:
        return 0
    
    
def calcula_resistencia_a_falhas_aleatorias(grafo,num_trials, percent_max_falhas):
    fractions = np.linspace(0, percent_max_falhas, 11)  # de 0% até 50% de falhas
    results = []

    for f in fractions:
        avg_lcc = np.mean([simulate_random_failures(grafo, int(f*grafo.number_of_nodes()))
                        for _ in range(num_trials)])
        results.append(avg_lcc)
    return pd.DataFrame({"percent_removed":fractions,"fraction_remaining":results})

In [None]:

def cria_metricas_grafo(path):
    feed = pt.get_representative_feed(path)

    start = 6*60*60  
    end = 8*60*60  

    G = pt.load_feed_as_graph(feed, start, end)
    prefixo = G.graph['name']

    stops_df = feed.stops
    stops_df['node_id'] = prefixo + '_' + stops_df['stop_id'].astype(str)

    nos=G.number_of_nodes()
    edges=G.number_of_edges()

    # number_strongly_connected_components=nx.number_strongly_connected_components(G)

    betweenness_centrality = nx.betweenness_centrality(G,  weight='length')
    df_betweenness_centrality = pd.DataFrame(betweenness_centrality.items(), columns=['node_id', 'betweenness_centrality'])
    df_betweenness_centrality = df_betweenness_centrality.sort_values(by='betweenness_centrality', ascending=False)

    degree_centrality=nx.degree_centrality(G)
    closeness_centrality=nx.closeness_centrality(G)

    df_degree_centrality = pd.DataFrame(degree_centrality.items(), columns=['node_id', 'degree_centrality'])
    df_closeness_centrality = pd.DataFrame(closeness_centrality.items(), columns=['node_id', 'closeness_centrality'])

    df_metricas_paradas=df_betweenness_centrality.merge(df_degree_centrality,on='node_id').merge(df_closeness_centrality,on='node_id').merge(stops_df, on='node_id')

    # node_connectivity=nx.node_connectivity(G)
    # edge_connectivity=nx.edge_connectivity(G)

    # G_undirected = G.to_undirected()

    # articulation_points = list(nx.articulation_points(G_undirected))
    # bridges=list(nx.bridges(G_undirected))

    # # df_articulation_points = pd.DataFrame(articulation_points, columns=['node_id', 'articulation_points'])
    # # df_bridges = pd.DataFrame(bridges, columns=['node_id', 'bridges'])
    # # df_metricas_paradas=df_metricas_paradas.merge(df_bridges,on='node_id').merge(df_articulation_points,on='node_id')


    # #calcular a média considerando todos os pares conectados usando all_pairs_dijkstra_path_length
    # lengths = dict(nx.all_pairs_dijkstra_path_length(G_undirected, weight='length'))
    # all_lengths = []
    # for target_dict in lengths.values():
    #     all_lengths.extend(target_dict.values())
    # avg_path_len_all_pairs_dijkstra_path_length = sum(all_lengths)/len(all_lengths)

    # #calcular a média apenas para o maior componente conectado
    # largest_cc = max(nx.connected_components(G_undirected), key=len)
    # G_largest = G_undirected.subgraph(largest_cc)
    # excluded_nodes = set(G_undirected.nodes()) - set(G_largest.nodes())
    # avg_path_len_largest_cc = nx.average_shortest_path_length(G_largest, weight='length')

    # df_resistence_random_failures=calcula_resistencia_a_falhas_aleatorias(G_undirected,50, 0.5)

    # result_simulate_attack_degree = simulate_attack_degree(G_undirected, strategy='degree')
    # result_simulate_attack_static_betweenness = simulate_attack_static_betweenness(G_undirected)

    # df_metricas=pd.DataFrame([{
    #     "num_nodes":nos,
    #     "num_edges":edges,
    #     "number_strongly_connected_components":number_strongly_connected_components,
    #     "node_connectivity":node_connectivity,
    #     "edge_connectivity":edge_connectivity,
    #     "avg_path_len_all_pairs_dijkstra_path_length":avg_path_len_all_pairs_dijkstra_path_length,
    #     "avg_path_len_largest_cc":avg_path_len_largest_cc,
    #     "result_simulate_attack_degree":result_simulate_attack_degree,
    #     "result_simulate_attack_static_betweenness":result_simulate_attack_static_betweenness,
        
    # }])

    for _, row in df_metricas_paradas.iterrows():
        node_id = row['node_id']
        attr_dict = row.drop('node_id').to_dict()
        G.nodes[node_id].update(attr_dict)

    # return G, df_metricas,df_metricas_paradas, df_resistence_random_failures,articulation_points,bridges
    return G, df_metricas_paradas

In [None]:
# G, df_metricas, df_metricas_paradas,df_resistence_random_failures,articulation_points,bridges=cria_metricas_grafo(path)

In [None]:
# path = 'dados\GTFS\google_transit_202503.zip'

# G, df_metricas_paradas=cria_metricas_grafo(path)


Unnamed: 0,node_id,betweenness_centrality,degree_centrality,closeness_centrality,stop_id,stop_name,stop_desc,stop_lat,stop_lon
0,H79ZX_600005414,0.005175,0.000462,0.004098,600005414,"R. Uicó, 135",Ref.: R Coronda/ Av Sao Miguel,-23.521087,-46.524951
1,H79ZX_600012396,0.004960,0.000693,0.004170,600012396,"R. Rodovalho Júnior, 637",Ref.: Rua Henrique De Sousa Queiros,-23.520526,-46.549000
2,H79ZX_360004879,0.004874,0.000346,0.002899,360004879,"Av. Campanella, 2053",Ref.: R Catarina Lopes/ R Rosina Ferraresi Mar...,-23.533931,-46.457682
3,H79ZX_360004877,0.004873,0.000231,0.002824,360004877,"Av. Campanella, 1845",Ref.: R Meleiro/ R Doutor Alexandre Melo Morais,-23.533639,-46.459263
4,H79ZX_360004875,0.004872,0.000231,0.002753,360004875,"Av. Campanella, 1604",Ref.: R Teolandia/ R Waldomiro Fonseca,-23.532552,-46.460962
...,...,...,...,...,...,...,...,...,...
8659,H79ZX_7805183,0.000000,0.000115,0.000000,7805183,Av. Arraias Do Araguaia,Av. Arraias Do Araguaia Ref.: Av. Barreira Grande,-23.590905,-46.507021
8660,H79ZX_380003746,0.000000,0.000000,0.000000,380003746,"R. Rei Alberto, 395","R. Rei Alberto, 395 Ref.: V-ped Morango Silves...",-23.473841,-46.566141
8661,H79ZX_8615248,0.000000,0.000115,0.000000,8615248,R. Fernandez De Navarrete,,-23.526017,-46.405058
8662,H79ZX_2314547,0.000000,0.000115,0.000000,2314547,"R. Francisco Pinto De Andrade, 0",R. Francisco Pinto De Andrade Ref.: Tp 6076-41,-23.718541,-46.697046


In [None]:
input_folder = "dados/GTFS"
output_metricas = "dados/analises_grafo/metricas_paradas"
output_grafos = "dados/analises_grafo/grafos"

# Criar pastas se não existirem
os.makedirs(output_metricas, exist_ok=True)
os.makedirs(output_grafos, exist_ok=True)

# Pegar todos os .zip
for path in glob.glob(os.path.join(input_folder, "*.zip")):
    nome = os.path.splitext(os.path.basename(path))[0].split("_")[-1]  # exemplo: google_transit_202503

    print(f"Processando {nome}...")

    G, df_metricas = cria_metricas_grafo(path)
    df_metricas['data']='nome'

    # salvar métricas em parquet
    df_metricas.to_parquet(os.path.join(output_metricas, f"metricas_{nome}.parquet"), index=False)

    with open(os.path.join(output_grafos, f"grafo_{nome}.pkl"), "wb") as f: pickle.dump(G, f)


Processando 201501...
Processando 201502...
Processando 201503...
Processando 201504...
Processando 201505...
Processando 201506...
Processando 201507...
Processando 201508...
Processando 201509...
Processando 201510...
Processando 201511...
Processando 201512...
Processando 201601...
Processando 201602...
Processando 201603...
Processando 201604...
Processando 201605...
Processando 201606...
Processando 201607...
Processando 201608...
Processando 201609...
Processando 201610...
Processando 201611...
Processando 201612...
Processando 201701...
Processando 201702...
Processando 201703...
Processando 201704...
Processando 201705...
Processando 201706...
Processando 201707...
Processando 201708...
Processando 201709...
Processando 201710...
Processando 201711...
Processando 201712...
Processando 201801...
Processando 201802...
Processando 201803...
Processando 201804...
Processando 201805...
Processando 201806...
Processando 201807...
Processando 201808...
Processando 201809...
Processand