In [1]:
import os
import pandas as pd

In [2]:
phase_order = [
    'P0_READ_INPUT',
    'RADIANCE',
    'REFLECTANCE',
    'ALBEDO',
    'NDVI',
    'PAI',
    'LAI',
    'EVI',
    'ENB_EMISSIVITY',
    'EO_EMISSIVITY',
    'EA_EMISSIVITY',
    'SURFACE_TEMPERATURE',
    'SHORT_WAVE_RADIATION',
    'LARGE_WAVE_RADIATION_SURFACE',
    'LARGE_WAVE_RADIATION_ATMOSPHERE',
    'NET_RADIATION',
    'SOIL_HEAT_FLUX',
    'P1_INITIAL_PROD',
    'PIXEL_FILTER',
    'P2_PIXEL_SEL',
    'D0',
    'ZOM',
    'USTAR',
    'KB1',
    'RAH_INI',
    'RAH_CYCLE_0',
    'RAH_CYCLE_1',
    'RAH_CYCLE_2',
    'RAH_CYCLE_3',
    'P3_RAH',
    'SENSIBLE_HEAT_FLUX',
    'LATENT_HEAT_FLUX',
    'NET_RADIATION_24H',
    'EVAPOTRANSPIRATION_FRACTION',
    'SENSIBLE_HEAT_FLUX_24H',
    'LATENT_HEAT_FLUX_24H',
    'EVAPOTRANSPIRATION_24H',
    'EVAPOTRANSPIRATION',
    'P4_FINAL_PROD',
    'P_TOTAL',
]

In [3]:

def get_mode(series):
    """Função para obter a moda de uma série."""
    mode_values = series.mode()
    # Retorna o primeiro valor da moda se houver múltiplos valores
    return mode_values.iloc[0] if not mode_values.empty else None

def combine_dfs(base_dir):
    dataframes = []

    # Walk through the directory
    for root, dirs, files in os.walk(base_dir):
        if 'time.csv' in files:
            # Read the CSV file and append the DataFrame to the list
            df = pd.read_csv(os.path.join(root, 'time.csv'))
            dataframes.append(df[['PHASE', 'TIMESTAMP']])

    # Concatenate all DataFrames in the list into a single DataFrame
    if dataframes:
        combined_df = pd.concat(dataframes, ignore_index=True)
    else:
        combined_df = pd.DataFrame()  # Empty DataFrame if no files found

    return combined_df

In [6]:
directories = {
    "./output-kernels": ["kernels-sebal-6502","kernels-steep-6502"],
    "./output-cpp": ["cpp-sebal-6502","cpp-steep-6502"],
    "./output-hybrid": ["hybrid-sebal-6502","hybrid-steep-6502"]
}

dic_combined = dict()
for dir, subdirs in directories.items():
    for subdir in subdirs:
        subdir_path = os.path.join(dir, subdir)
        dic_combined[subdir] = combine_dfs(subdir_path)

dic_summarized = dict()
for key in dic_combined.keys():
    combined_df = dic_combined[key]
    combined_df.groupby('PHASE')
    summarized = combined_df.groupby('PHASE')['TIMESTAMP'].mean().reset_index()
    summarized = combined_df.groupby('PHASE')['TIMESTAMP'].apply(get_mode).reset_index()

    summarized['PHASE'] = pd.Categorical(
        summarized['PHASE'], 
        categories=phase_order, 
        ordered=True
    )
    
    summarized = summarized.sort_values('PHASE').reset_index(drop=True)

    dic_summarized[key] = summarized


In [7]:
dic_summarized["hybrid-steep-6502"]

Unnamed: 0,PHASE,TIMESTAMP
0,P0_READ_INPUT,2571.44
1,RADIANCE,3.838592
2,REFLECTANCE,3.381856
3,ALBEDO,4.02384
4,NDVI,0.712096
5,PAI,1.049248
6,LAI,8.50144
7,EVI,4.393376
8,ENB_EMISSIVITY,1.174336
9,EO_EMISSIVITY,1.171264


In [8]:
import pandas as pd
import re

# Função para extrair o tamanho (size) da chave
def extract_size(key):
    match = re.search(r'(\d+)$', key)
    return int(match.group(1)) if match else None

# Dicionário com DataFrames (exemplo simplificado)
# dic_mean_times = {'cores-sebal-120': df1, 'cores-sebal-1279': df2, ...}
# Substitua esta linha com o seu dicionário realdic_summarized
# dic_mean_times = {'cores-sebal-120': df1, 'cpp-sebal-120': df2, 'cutensor-sebal-120': df3}

def consolidate_dataframes(dic_mean_times):
    consolidated_data = []
    
    # Itera sobre cada chave e DataFrame no dicionário
    for key, df in dic_mean_times.items():
        # Extrai o tamanho (size) da chave
        size = extract_size(key)
        
        # Adiciona uma nova coluna 'size' ao DataFrame
        df['size'] = size
        df['experiment'] = key
        
        # Reorganiza o DataFrame para ter as colunas ['size', 'PHASE', 'TIMESTAMP']
        df = df[['size', 'PHASE', 'TIMESTAMP', 'experiment']]
        
        # Adiciona o DataFrame à lista consolidada
        consolidated_data.append(df)
    
    # Concatena todos os DataFrames da lista em um único DataFrame
    combined_df = pd.concat(consolidated_data, ignore_index=True)
    
    # Usa pivot para reorganizar o DataFrame de acordo com o formato desejado
    pivot_df = combined_df.pivot_table(
        index=['size', 'PHASE'],
        columns='experiment',
        values='TIMESTAMP',
        aggfunc='mean'
    ).reset_index()
    
    # Renomeia as colunas para remover o prefixo criado pelo pivot
    pivot_df.columns.name = None
    
    return pivot_df

# Executa a função para consolidar os DataFrames
final_df = consolidate_dataframes(dic_summarized)

# Exibe o DataFrame consolidado
final_df[final_df['size'] == 6502].dropna(axis=1, how='all')


  pivot_df = combined_df.pivot_table(


Unnamed: 0,size,PHASE,cpp-sebal-6502,cpp-steep-6502,hybrid-sebal-6502,hybrid-steep-6502,kernels-sebal-6502,kernels-steep-6502
0,6502,P0_READ_INPUT,2134.55,2135.13,2569.04,2571.44,2346.51,2353.71
1,6502,RADIANCE,918.453003,917.682007,3.85152,3.838592,3.983872,4.032736
2,6502,REFLECTANCE,964.813782,966.410339,3.37696,3.381856,3.364576,3.366592
3,6502,ALBEDO,297.498962,298.075897,4.03168,4.02384,3.989536,3.98896
4,6502,NDVI,170.883163,171.134583,0.71136,0.712096,0.70464,0.704224
5,6502,PAI,168.728851,168.845291,1.048672,1.049248,1.035808,1.03008
6,6502,LAI,466.973114,467.906097,8.499712,8.50144,8.495872,8.406848
7,6502,EVI,184.716644,185.047852,4.42368,4.393376,4.408832,4.38256
8,6502,ENB_EMISSIVITY,139.037735,139.615265,1.182496,1.174336,1.170624,1.16368
9,6502,EO_EMISSIVITY,140.208694,140.092484,1.179712,1.171264,1.1712,1.163776


In [9]:
final_df[~final_df['PHASE'].str.startswith('P')]

Unnamed: 0,size,PHASE,cpp-sebal-6502,cpp-steep-6502,hybrid-sebal-6502,hybrid-steep-6502,kernels-sebal-6502,kernels-steep-6502
1,6502,RADIANCE,918.453003,917.682007,3.85152,3.838592,3.983872,4.032736
2,6502,REFLECTANCE,964.813782,966.410339,3.37696,3.381856,3.364576,3.366592
3,6502,ALBEDO,297.498962,298.075897,4.03168,4.02384,3.989536,3.98896
4,6502,NDVI,170.883163,171.134583,0.71136,0.712096,0.70464,0.704224
6,6502,LAI,466.973114,467.906097,8.499712,8.50144,8.495872,8.406848
7,6502,EVI,184.716644,185.047852,4.42368,4.393376,4.408832,4.38256
8,6502,ENB_EMISSIVITY,139.037735,139.615265,1.182496,1.174336,1.170624,1.16368
9,6502,EO_EMISSIVITY,140.208694,140.092484,1.179712,1.171264,1.1712,1.163776
10,6502,EA_EMISSIVITY,816.283081,816.788757,4.249248,4.266656,15.410368,15.412448
11,6502,SURFACE_TEMPERATURE,243.698364,243.603912,0.748864,0.746048,0.740512,0.740576


In [19]:
filtered_phases = final_df[~final_df['PHASE'].str.match(r'^P.\_') & (final_df['PHASE'] != 'P_TOTAL')]

# Calculate the sum of each column
sum_row = filtered_phases.sum(numeric_only=True)

# Create a new DataFrame for the sum row
sum_row_df = pd.DataFrame(sum_row).T  # Transpose to make it a single row DataFrame

# Optionally, you can set a specific index for the sum row
sum_row_df.index = ['Total']  # Set the index name to 'Total'

# Append the sum row to the original DataFrame
filtered_phases = pd.concat([filtered_phases, sum_row_df], ignore_index=False)

# Print the updated DataFrame
filtered_phases


Unnamed: 0,size,PHASE,cpp-sebal-6502,cpp-steep-6502,hybrid-sebal-6502,hybrid-steep-6502,kernels-sebal-6502,kernels-steep-6502
1,6502.0,RADIANCE,918.453003,917.682007,3.85152,3.838592,3.983872,4.032736
2,6502.0,REFLECTANCE,964.813782,966.410339,3.37696,3.381856,3.364576,3.366592
3,6502.0,ALBEDO,297.498962,298.075897,4.03168,4.02384,3.989536,3.98896
4,6502.0,NDVI,170.883163,171.134583,0.71136,0.712096,0.70464,0.704224
5,6502.0,PAI,168.728851,168.845291,1.048672,1.049248,1.035808,1.03008
6,6502.0,LAI,466.973114,467.906097,8.499712,8.50144,8.495872,8.406848
7,6502.0,EVI,184.716644,185.047852,4.42368,4.393376,4.408832,4.38256
8,6502.0,ENB_EMISSIVITY,139.037735,139.615265,1.182496,1.174336,1.170624,1.16368
9,6502.0,EO_EMISSIVITY,140.208694,140.092484,1.179712,1.171264,1.1712,1.163776
10,6502.0,EA_EMISSIVITY,816.283081,816.788757,4.249248,4.266656,15.410368,15.412448


In [15]:
filtered_phases = final_df[final_df['PHASE'].str.match(r'^P.\_')]

# Calculate the sum of each column
sum_row = filtered_phases.sum(numeric_only=True)

# Create a new DataFrame for the sum row
sum_row_df = pd.DataFrame(sum_row).T  # Transpose to make it a single row DataFrame

# Optionally, you can set a specific index for the sum row
sum_row_df.index = ['Total']  # Set the index name to 'Total'

# Append the sum row to the original DataFrame
filtered_phases = pd.concat([filtered_phases, sum_row_df], ignore_index=False)

# Print the updated DataFrame
filtered_phases


Unnamed: 0,size,PHASE,cpp-sebal-6502,cpp-steep-6502,hybrid-sebal-6502,hybrid-steep-6502,kernels-sebal-6502,kernels-steep-6502
0,6502.0,P0_READ_INPUT,2134.55,2135.13,2569.04,2571.44,2346.51,2353.71
17,6502.0,P1_INITIAL_PROD,5245.029297,5253.337402,1595.514648,1597.645264,1604.724731,1603.40271
19,6502.0,P2_PIXEL_SEL,2623.024414,1755.163574,615.830322,50.524994,632.070068,51.332767
29,6502.0,P3_RAH,17012.414062,11790.533203,930.600708,604.613098,1133.950317,801.891785
38,6502.0,P4_FINAL_PROD,993.323547,1008.152466,425.282745,425.319855,428.559662,427.618988
Total,32510.0,,28008.34132,21942.316645,6136.268423,5249.543211,6145.814778,5237.95625
