Copyright 2024 Google LLC.

Licensed under the Apache License, Version 2.0 (the "License");

In [None]:
#@title License
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Is Config Cached
Read from cache dir to analyze and present which configs are already cached and which are missing.

# Imports

In [None]:
import gin
import numpy as np
import pandas as pd
import os

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

from eq_mag_prediction.forecasting import training_examples
from eq_mag_prediction.scripts import magnitude_predictor_trainer   # import unused for gin config
from eq_mag_prediction.forecasting import one_region_model
from eq_mag_prediction.forecasting import encoders

# Read configs

In [None]:
MODEL_NAME = 'JMA'
DEFAULT_FEATURE_CACHE_DIR = os.path.join(os.getcwd(), '..', 'results/cached_features')

In [None]:
experiment_dir = os.path.join(os.getcwd(), '..', 'results/trained_models/', MODEL_NAME)
custom_objects={
    '_repeat': encoders._repeat,
    }


In [None]:
# set gin configs
with open(os.path.join(experiment_dir, 'config.gin')) as f:
    with gin.unlock_config():
        gin.parse_config(f.read(), skip_unknown=False)

In [None]:
domain = training_examples.CatalogDomain()
all_encoders = one_region_model.build_encoders(domain)

In [None]:
existing_files_dict = {}
for encoder in all_encoders.values():
  relevant_id = one_region_model.encoder_domain_id(domain, encoder)
  relevant_suffixes, files_exist = one_region_model.id_files_exist_in_dir(
      relevant_id, DEFAULT_FEATURE_CACHE_DIR
  )
  files_exist = np.array(files_exist)
  suffix_logical = one_region_model._create_suffix_logical(
      encoder, relevant_suffixes, files_exist
  )
  existing_files_dict[encoder.name] = list(
      zip(
          np.array(relevant_suffixes)[suffix_logical],
          files_exist[suffix_logical],
      )
  )

In [None]:
def existing_files_dict_to_df(existing_files_dict):
  existing_files_summary = pd.DataFrame(columns=['encoder', 'suffix', 'file_exists'])
  for k, v in existing_files_dict.items():
    for suf in v:
      existing_files_summary = pd.concat([
          existing_files_summary,
          pd.DataFrame({'encoder': k, 'suffix': suf[0], 'file_exists': suf[1], 'cache id': suf[2]}, index=(0,))
      ])
  return existing_files_summary

def existing_files_for_gin_config():
  existing_files_dict = {}
  for encoder in all_encoders.values():
    relevant_id = one_region_model.encoder_domain_id(domain, encoder)
    relevant_suffixes, files_exist = one_region_model.id_files_exist_in_dir(
        relevant_id, DEFAULT_FEATURE_CACHE_DIR
    )
    files_exist = np.array(files_exist)
    suffix_logical = one_region_model._create_suffix_logical(
        encoder, relevant_suffixes, files_exist
    )
    existing_files_dict[encoder.name] = list(
        zip(
            np.array(relevant_suffixes)[suffix_logical],
            files_exist[suffix_logical],
            [relevant_id]*int(suffix_logical.sum()),
        )
    )
  return existing_files_dict_to_df(existing_files_dict).reset_index()

def boolean_color(v):
    return "color: green;" if v else "color: red;"

# Present existing files

In [None]:
existing_files_for_gin_config().style.applymap(boolean_color, subset=['file_exists'])