In [1]:
# Import necessary initial libraries
import os
import sys
import logging
import re
from datetime import timedelta
import plotly.express as px

In [2]:
# Sets the root directory of the project as the working directory
os.chdir('..')

In [3]:
# Check the current working directory
os.getcwd()

'/Users/darlanmnunes/Dev/DSc_git/PhD_Thesis_Step3_OSM_Toponyms'

In [4]:
# Import the custom module
from src import merge_geojsonGrids

In [None]:
# Reload the module to ensure any changes are reflected
import importlib
importlib.reload(merge_geojsonGrids)

### Merge GeoJSON files
- Esta etapa realiza a consolidação dos arquivos GeoJSON referentes a grade estatística com os resultantes de cada step, em um único arquivo de saída (GeoPackage e GeoJSON)

In [None]:
# Lista dos arquivos GeoJSON para merge (ordem importa - primeiro arquivo tem prioridade)
# Usando pathlib para tornar o código mais legível e robusto
from pathlib import Path

path_dir = Path(os.getcwd()) / 'results/1_output_grid/partial_results'
output_code1_files = sorted([str(f) for f in path_dir.glob('*.geojson')])
output_code1_files


In [None]:
# Salva os arquivos GeoJSON consolidados em um arquivo GeoPackage
output_file_name = "steps_merged_1to6.gpkg"
output_path = os.path.join(path_dir, output_file_name)

merge_geojsonGrids.merge_geojson2gpkg(output_code1_files, str(output_path))

Merge concluído!
Arquivo salvo como GeoPackage em: /Users/darlanmnunes/Dev/DSc_git/PhD_Thesis_Step3_OSM_Toponyms/results/1_output_grid/partial_results/test_dir/steps_merged_1to6.gpkg
Steps processados: ['step1_consolidado', 'step2_consolidado', 'step4_consolidado', 'step5_consolidado', 'step6_consolidado']
CRS mantido: EPSG:4674
Total de células únicas: 8652


Unnamed: 0,id,geometry,POP10,step1_consolidado_edif_ensino_total_count,step1_consolidado_edif_ensino_name_count,step1_consolidado_edif_ensino_name_ratio,step1_consolidado_edif_saude_total_count,step1_consolidado_edif_saude_name_count,step1_consolidado_edif_saude_name_ratio,step1_consolidado_edif_desenv_social_total_count,...,step6_consolidado_edif_metro_ferroviaria_sigmoid_rmse,step6_consolidado_edif_metro_ferroviaria_sigmoid_pct_erro,step6_consolidado_edif_metro_ferroviaria_sigmoid_a,step6_consolidado_edif_metro_ferroviaria_sigmoid_b,step6_consolidado_edif_metro_ferroviaria_sigmoid_c,step6_consolidado_edif_metro_ferroviaria_sigmoid_d,step6_consolidado_edif_metro_ferroviaria_inflexao_idx,step6_consolidado_edif_metro_ferroviaria_inflexao_data,step6_consolidado_edif_metro_ferroviaria_sigmoid_fit_overflow,step6_consolidado_edif_metro_ferroviaria_dias_desde_inflexao
0,200ME60338N90882,"MULTIPOLYGON (((-44.06371 -19.97798, -44.06378...",345,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
1,200ME60338N90884,"MULTIPOLYGON (((-44.06378 -19.97618, -44.06385...",597,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
2,200ME60338N90886,"MULTIPOLYGON (((-44.06385 -19.97439, -44.06392...",497,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
3,200ME60346N90858,"MULTIPOLYGON (((-44.05521 -19.99927, -44.05528...",227,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
4,200ME60348N90858,"MULTIPOLYGON (((-44.05329 -19.99920, -44.05336...",25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8647,200ME60554N91040,"MULTIPOLYGON (((-43.86179 -19.82923, -43.86186...",7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
8648,200ME60550N91042,"MULTIPOLYGON (((-43.86570 -19.82756, -43.86577...",0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
8649,200ME60552N91042,"MULTIPOLYGON (((-43.86378 -19.82750, -43.86385...",0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
8650,200ME60554N91042,"MULTIPOLYGON (((-43.86186 -19.82743, -43.86193...",9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,


In [None]:
# Salva os arquivos GeoJSON consolidados em um único arquivo GeoJSON
output_file_name = "steps_merged_1to6.geojson"
output_path = os.path.join(path_dir, output_file_name)

merge_geojsonGrids.merge_geojson2geojson(output_code1_files, str(output_path))

### Determine the time metrics of each step
- Esta etapa extrai os tempos de processamento de cada lote para cada step

In [None]:
# Leitura do Arquivo Excel
file_name = [
    "results/1_output_grid/time_metrics/time_log_OHSOME_requests.xlsx"
]

In [None]:
# Leitura do Arquivo Excel
df_time = pd.read_excel(file_name[0], sheet_name='step1')

In [None]:
# Exibe as primeiras linhas para validar
display(df_time.head(10))

In [None]:
# Exibe as últimas linhas para validar
display(df_time.tail(10))

In [None]:
# Função para extrair tempos dos lotes
def extrair_tempos(df):
    resultados = []

    for line in df.iloc[:, 0].dropna():
        match = re.search(r'Tempo do lote (\d+): (\d+\.?\d*) min (\d+\.?\d*) seg', line)
        if match:
            lote = int(match.group(1))
            minutos = float(match.group(2))
            segundos = float(match.group(3))
            total_segundos = int(minutos * 60 + segundos)
            resultados.append({
                'Lote': lote,
                'Minutos': minutos,
                'Segundos': segundos,
                'Total (s)': total_segundos,
                'Total (min)': total_segundos / 60,
                'Total (h)': total_segundos / 3600,
                'Total (dias)': total_segundos / (3600 * 24),
                'Duração (timedelta)': timedelta(seconds=total_segundos)
            })

    return pd.DataFrame(resultados)

In [None]:
# Aplicar a função para extrair os tempos
df_tempos = extrair_tempos(df_time)
df_tempos

In [None]:
# Cálculo do tempo total consolidado de todos os lotes
total_horas = df_tempos['Total (h)'].sum()
total_dias = df_tempos['Total (dias)'].sum()
print(f"Tempo Total (todos os lotes): {total_horas :.2f} horas")
print(f"Tempo Total (todos os lotes): {total_dias :.2f} dias")

In [None]:
# Gráfico de barras para visualizar os tempos de processamento
fig = px.bar(
    df_tempos,
    x='Lote',
    y='Total (min)',
    hover_data={
        'Minutos': ':.1f',
        'Segundos': ':.1f',
        'Total (h)': ':.3f'
    },
    labels={'Total (min)': 'Duração (minutos)'},
    title='Tempo de Processamento por Lote (20 células) - Step 1 (name_ratio)',
)

# Cálculo do tempo total consolidado de todos os lotes
total_horas = df_tempos['Total (h)'].sum()
total_dias = df_tempos['Total (dias)'].sum()

# Adiciona anotação com a duração total
fig.add_annotation(
    xref="paper", 
    yref="paper",
    x=0.5, 
    y=-0.35,  # Ajuste a posição vertical aqui
    text=f"Duração Total: <br>({total_horas:.2f} horas / {total_dias:.2f} dias)",
    showarrow=False,
    font=dict(size=12),
    align="center"
)

# Ajustes estéticos
fig.update_traces(marker_color='royalblue')
fig.update_layout(
    title_x=0.5,
    xaxis_title='Lote',
    yaxis_title='Duração (minutos)',
    hoverlabel=dict(bgcolor="white", font_size=13),
    margin=dict(b=100)  # Aumenta a margem inferior para caber a anotação
)

# Salva o gráfico em formato HTML
fig.write_html("step1_tempo_processamento.html")

fig.show()