In [1]:
# notebook to compile all of the csvs into a single array
import os
import glob
import pandas as pd
import numpy as np

In [2]:
mech_dir = '/work/westgroup/harris.se/autoscience/reaction_calculator/delay_uncertainty/aramco'
# mech_dir = '/work/westgroup/harris.se/autoscience/reaction_calculator/delay_uncertainty/base_rmg24'


In [None]:
# compile everything into a humongous array

#             table1 table2 ... table12
# species 1
# species 2
# .........
# species N
# reaction 1
# reaction 2
# .........
# reaction M


# load examples to get the right size
test_sp_file = os.path.join(mech_dir, 'table_0001', 'species_delays_0001.npy')
test_rxn_file = os.path.join(mech_dir, 'table_0001', 'reaction_delays_0001_0000.npy')

K = 51
N = np.load(test_sp_file).shape[0]
M = np.load(test_rxn_file).shape[0]
print(f'N={N}', 'species')
print(f'M={M}', 'reactions')

all_delays_ever = np.zeros((N + M, 12 * K))


for table_index in range(1, 13):
    table_dir = os.path.join(mech_dir, f'table_{table_index:04}')
    
    rxn_files = glob.glob(os.path.join(table_dir, f'reaction_delays_{table_index:04}_*.npy'))
    
    # make sure the broken up calculations are all complete
    assert len(rxn_files) == 51, table_index
    
    
    # insert all the species delays for that table
    sp_file = os.path.join(table_dir, f'species_delays_{table_index:04}.npy')
    if not os.path.exists(sp_file):
        print(f'missing species delay file {sp_file}')
        continue
        raise OSError(f'missing species delay file {sp_file}')

    all_delays_ever[0:N, (table_index - 1) * K: table_index * K] = np.load(sp_file)
    
    
    # fill in the reaction files
    rxn_table = np.zeros((M, K))
    for i in range(0, 51):
        rxn_delay_file = os.path.join(table_dir, f'reaction_delays_{table_index:04}_{i * 50:04}.npy')
        if not os.path.exists(rxn_delay_file):
            print('missing: ', i, rxn_delay_file[-50:])
            continue  # TODO use assert and do not continue
        rxn_table += np.load(rxn_delay_file)
    all_delays_ever[N:, (table_index - 1) * K: table_index * K] = rxn_table

In [None]:
# save the resulting delay array
np.save(os.path.join(mech_dir, 'total_perturbed_mech_delays.npy'), all_delays_ever)

In [4]:
K = 51

In [5]:
# Also compile the base delays into a giant 1 x (12 * K) array
total_base_delays = np.zeros(12 * K)
for table_index in range(1, 13):
    table_dir = os.path.join(mech_dir, f'table_{table_index:04}')
    base_delay_file = os.path.join(table_dir, f'base_delays_{table_index:04}.npy')
    if not os.path.exists(base_delay_file):
        raise OSError(f'Missing base delay file {base_delay_file}')
    
    total_base_delays[(table_index - 1) * K:table_index * K] = np.load(base_delay_file)


In [7]:
np.sum(total_base_delays == 0)

0

In [8]:
# save the resulting base delay array
np.save(os.path.join(mech_dir, 'total_base_delays.npy'), total_base_delays)

In [None]:
415 / 51

In [None]:
415 % 51

In [None]:
# see how many species calcs zero
zeros = 0
cols = set()
rows = set()
for i in range(0, 130):
    for j in range(all_delays_ever.shape[1]):
#         if j == 415:
#             continue
        if all_delays_ever[i, j] == 0:
            print(i, j)
#             cols.add(j)
#             if j != 415:
#                 rows.add(i)
#             zeros += 1
# print(zeros, '/', 130 * all_delays_ever.shape[1])
# print(cols)
# print(rows)

In [None]:
all_delays_ever[67, :]

In [None]:
# see how many rows are completely zero
zero_rows = 0
zero_row_set = set()
for i in range(0, all_delays_ever.shape[0]):
    if np.sum(all_delays_ever[i,:]) == 0:
        zero_rows += 1
        zero_row_set.add(i)
print(zero_rows, '/', all_delays_ever.shape[0])

In [None]:
# print out what's missing - for debugging
for table_index in range(1, 13):
    table_dir = os.path.join(mech_dir, f'table_{table_index:04}')
    
    for i in range(0, 51):
        delay_file = os.path.join(table_dir, f'reaction_delays_{table_index:04}_{i * 50:04}.npy')
        if not os.path.exists(delay_file):
            print('missing: ', i, delay_file[-50:])

In [None]:
for j in range(0, all_delays_ever.shape[1]):
    for i in range(0, all_delays_ever.shape[0]):
        if all_delays_ever[i, j] == 0 and i not in zero_row_set:
            print(f'({i}, {j}) is blank\ttable {int(j / 51) + 1}\tblock {int((i - 130) / 50)}')

In [None]:
# count total zeros
np.sum(all_delays_ever == 0)

In [None]:
668 * 51 * 12

In [None]:
all_delays_ever[:, 415]

In [None]:
# /work/westgroup/harris.se/autoscience/reaction_calculator/delay_uncertainty/base_rmg_1week/chem_annotated.inp