Copyright 2024 Google LLC.

Licensed under the Apache License, Version 2.0 (the "License");

In [10]:
#@title License
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Is Config Cached
An experiment is defined by a config.gin file.
The gin file specifies which catalog is used, what training parameters, how the
data is devided into train/validation/test sets, etc.

This notebook reads a gin file and presents which features (encoders) are
already calculated and cached for the specified config (experiment).

# Imports

In [15]:
import gin
import numpy as np
import pandas as pd
import os

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

from eq_mag_prediction.forecasting import training_examples
from eq_mag_prediction.scripts import magnitude_predictor_trainer   # import unused for gin config
from eq_mag_prediction.forecasting import one_region_model
from eq_mag_prediction.forecasting import encoders
from eq_mag_prediction.utilities import simulate_catalog

# Read configs
Find and read the ```config.gin``` file, by specifying the model name.

***If model does not exists, or catalog is not ingested, an error may be raised.***

In [25]:
MODEL_NAME = 'Mock'
# MODEL_NAME = 'Hauksson'
# MODEL_NAME = 'JMA'

In [26]:
if MODEL_NAME.lower() == 'mock':
  simulate_catalog.mock_catalog_and_config_ingestion()

In [27]:
DEFAULT_FEATURE_CACHE_DIR = os.path.join(os.getcwd(), '..', 'results/cached_features')

In [28]:
experiment_dir = os.path.join(os.getcwd(), '..', 'results/trained_models/', MODEL_NAME)
custom_objects={
    '_repeat': encoders._repeat,
    }


In [29]:
# set gin configs
with open(os.path.join(experiment_dir, 'config.gin')) as f:
    with gin.unlock_config():
        gin.parse_config(f.read(), skip_unknown=False)

([], [])

In [30]:
domain = training_examples.CatalogDomain()
all_encoders = one_region_model.build_encoders(domain)

  catalog_cp.loc[indexes_2_replace, 'time'] = new_time_values


In [31]:
existing_files_dict = {}
for encoder in all_encoders.values():
  relevant_id = one_region_model.encoder_domain_id(domain, encoder)
  relevant_suffixes, files_exist = one_region_model.id_files_exist_in_dir(
      relevant_id, DEFAULT_FEATURE_CACHE_DIR
  )
  files_exist = np.array(files_exist)
  suffix_logical = one_region_model._create_suffix_logical(
      encoder, relevant_suffixes, files_exist
  )
  existing_files_dict[encoder.name] = list(
      zip(
          np.array(relevant_suffixes)[suffix_logical],
          files_exist[suffix_logical],
      )
  )

In [32]:
def existing_files_dict_to_df(existing_files_dict):
  existing_files_summary = pd.DataFrame(columns=['encoder', 'suffix', 'file_exists'])
  for k, v in existing_files_dict.items():
    for suf in v:
      existing_files_summary = pd.concat([
          existing_files_summary,
          pd.DataFrame({'encoder': k, 'suffix': suf[0], 'file_exists': suf[1], 'cache id': suf[2]}, index=(0,))
      ])
  return existing_files_summary

def existing_files_for_gin_config():
  existing_files_dict = {}
  for encoder in all_encoders.values():
    relevant_id = one_region_model.encoder_domain_id(domain, encoder)
    relevant_suffixes, files_exist = one_region_model.id_files_exist_in_dir(
        relevant_id, DEFAULT_FEATURE_CACHE_DIR
    )
    files_exist = np.array(files_exist)
    suffix_logical = one_region_model._create_suffix_logical(
        encoder, relevant_suffixes, files_exist
    )
    existing_files_dict[encoder.name] = list(
        zip(
            np.array(relevant_suffixes)[suffix_logical],
            files_exist[suffix_logical],
            [relevant_id]*int(suffix_logical.sum()),
        )
    )
  return existing_files_dict_to_df(existing_files_dict).reset_index()

def boolean_color(v):
    return "color: green;" if v else "color: red;"

# Present existing files
A table presenting which encoder (AKA feature) has already been calculated and
chached, together with its hash code.

In [33]:
existing_files_for_gin_config().style.applymap(boolean_color, subset=['file_exists'])

  existing_files_for_gin_config().style.applymap(boolean_color, subset=['file_exists'])


Unnamed: 0,index,encoder,suffix,file_exists,cache id
0,0,catalog_earthquakes,_train,True,encoder_13f4c94a5ca65c5b77b02e9d276a618a663718c1_build_features_bf21a9e8fbc5a3846fb05b4fa0859e0917b2202f_domain_0eea005eafeeb5894f51e655d29f4d38b79622cf
1,0,catalog_earthquakes,_validation,True,encoder_13f4c94a5ca65c5b77b02e9d276a618a663718c1_build_features_bf21a9e8fbc5a3846fb05b4fa0859e0917b2202f_domain_0eea005eafeeb5894f51e655d29f4d38b79622cf
2,0,catalog_earthquakes,_test,True,encoder_13f4c94a5ca65c5b77b02e9d276a618a663718c1_build_features_bf21a9e8fbc5a3846fb05b4fa0859e0917b2202f_domain_0eea005eafeeb5894f51e655d29f4d38b79622cf
3,0,catalog_earthquakes,_scalers,True,encoder_13f4c94a5ca65c5b77b02e9d276a618a663718c1_build_features_bf21a9e8fbc5a3846fb05b4fa0859e0917b2202f_domain_0eea005eafeeb5894f51e655d29f4d38b79622cf
4,0,catalog_earthquakes,.enc,True,encoder_13f4c94a5ca65c5b77b02e9d276a618a663718c1_build_features_bf21a9e8fbc5a3846fb05b4fa0859e0917b2202f_domain_0eea005eafeeb5894f51e655d29f4d38b79622cf
5,0,recent_earthquakes,_train,True,encoder_4df3432ecee8adcf4acd6ab1616edb80fbcf836f_build_features_682afa5d6b3bc783332f46f785f4041cd23be814_domain_0eea005eafeeb5894f51e655d29f4d38b79622cf
6,0,recent_earthquakes,_validation,True,encoder_4df3432ecee8adcf4acd6ab1616edb80fbcf836f_build_features_682afa5d6b3bc783332f46f785f4041cd23be814_domain_0eea005eafeeb5894f51e655d29f4d38b79622cf
7,0,recent_earthquakes,_test,True,encoder_4df3432ecee8adcf4acd6ab1616edb80fbcf836f_build_features_682afa5d6b3bc783332f46f785f4041cd23be814_domain_0eea005eafeeb5894f51e655d29f4d38b79622cf
8,0,recent_earthquakes,_scalers,True,encoder_4df3432ecee8adcf4acd6ab1616edb80fbcf836f_build_features_682afa5d6b3bc783332f46f785f4041cd23be814_domain_0eea005eafeeb5894f51e655d29f4d38b79622cf
9,0,recent_earthquakes,_location_train,True,encoder_4df3432ecee8adcf4acd6ab1616edb80fbcf836f_build_features_682afa5d6b3bc783332f46f785f4041cd23be814_domain_0eea005eafeeb5894f51e655d29f4d38b79622cf
