# OpenQuake calculations summary

Wrtie the `oq_calcs_summary.csv` file in the `Summary` folder.
It uses as a reference the log files and the gmfs in csv store in the folder.

## Configuration

In [None]:
import os
import glob
import pandas as pd

from oq_parse_gmfs import read_log

## User input

In [None]:
# Event name
event = '19970926_M5.97_UmbriaMarche'

# Select log files for OQ calculations (including condintioning gmfs)
logs = sorted(glob.glob(os.path.join('..', '**', event, 'OpenQuake_gmfs',
                               'Sensitivity', 'log_*.txt'), recursive=True))
assert len(logs)!=0, 'No log files to parse'

# Get folders
folder = os.path.dirname(logs[0])
print('Sensitivity folder', folder)

logs

## Get DataFrame with calculations summary

In [None]:
oq_summary = []
cols = ['calc_id', 'description', 'cal_time', 
        'recording_stations', 'gmlt', 'rupture', 'gmpe', 'imt',
        'max_gmv', 'nominal_bias_mean', 'nominal_bias_stdev']
for log in logs:
    # Get information from log
    df_log, calc_id = read_log(log)

    try:
        # Read gmfs from csv file
        gmfs_path = log.replace('log_calc', 'gmf_median_PGA').replace('txt', 'csv')
        gmfs = pd.read_csv(gmfs_path, comment='#')

        # Extract maximum gmvs
        max_gmv = gmfs.iloc[:, 3:].max(axis=0).to_frame().reset_index()
        max_gmv.columns = ['gmpe', 'max_gmv']
        max_gmv['calc_id'] = calc_id
        max_gmv['imt'] = 'PGA'

        # For special GMPEs, adjust names
        max_gmv.gmpe = max_gmv.gmpe.str.split('.').str[0]

        # Add max_gmv to summary data
        if len(df_log) == 1 and any(df_log.gmpe == ''):
            # Calculation woth NO gmf conditioning:
            df = df_log.merge(max_gmv, on=['calc_id'], suffixes=('_x', ''))
        else:
            df = df_log.merge(max_gmv, on=['calc_id', 'gmpe', 'imt'])
                
    except FileNotFoundError as e:
        df = df_log
        print(f'Calculation {calc_id} has not the corresponding gmfs in csv format')
        
    oq_summary.append(df[cols])

# At least one calculation included in summary
assert len(oq_summary)!=0, 'No OQ calculations included in summary'
oq_summary = pd.concat(oq_summary, ignore_index=True)

# Remove duplicated rows
dup = oq_summary.duplicated(subset=oq_summary.columns[3:])
dup_details = oq_summary[['calc_id', 'description']][dup].drop_duplicates()
assert dup.any() == False, f'Duplicated calculations. Remove unnecessary files:\n{dup_details}'

oq_summary['abs_bias'] = oq_summary.nominal_bias_mean.abs()
oq_summary.sort_values('abs_bias', ascending=True, inplace=True)

oq_summary.head(10)


## Save calculations summary

In [None]:
# Save calculation summary in Sensitivity folder
save_as = os.path.join(folder, 'calculation_summary.csv')
oq_summary.to_csv(save_as, index=False)
print(' File save in', save_as) 