This Notebook is designed to set up all essential inputs for the project, encompassing folder paths, keywords, model parameters, and more. The input_path, which represents the location of this file, is crucial to be pasted at the start of every Python file in this project. Doing so ensures that all required inputs are correctly loaded, thereby enabling the successful execution of the code.

Recommended Google Colab Runtime Type: CPU, as this notebook does not involve running machine learning models.

In [None]:
# Input file path (must navigate at the beginning of each file)
input_path = "/content/drive/My Drive/ImpactDataMining/Turkiye_Earthquake/Result"

In [None]:
# Input for 1a_preprocessing.ipynb
data_path = "/content/drive/My Drive/ImpactDataMining/Turkiye_Earthquake/Data"
result_path =  "/content/drive/My Drive/ImpactDataMining/Turkiye_Earthquake/Result"
file_name = "2023_Mw_7.8_Kahramanmaras,_Turkiye_Earthquake_Sequence_PVRR.docx"

In [None]:
# Input for 1b_preprocessing.ipynb
drop_headings = [
    '2023 Türkiye Earthquake Sequence',
    'JOINT PRELIMINARY VIRTUAL RECONNAISSANCE REPORT (PVRR)',
    'DEDICATION',
    'PREFACE',
    'COPYRIGHT DISCLAIMER',
    'ATTRIBUTION GUIDANCE',
    'ACKNOWLEDGMENTS',
    'TABLE OF CONTENTS',
    'Geotechnical Performance',
    'Recommended Response Strategy',
    'Appendix A. Road Closure Chronology',
    'References',
    ]

In [None]:
# Input for 2a_extracting_impact_sentences.ipynb and 2b_selecting_threshold.ipynb
# Define keywords based on categories and combine at the end
building_types = [
    'family housing damage', 'school damage', 'mobile home damage', 'manufactured home damage', 'commercial facility damage', 'hospital damage',
    'religious infrastructure damage', 'historical building damage', 'industrial facility damage', 'agricultural infrastructure damage',
    'fire station damage', 'police station damage', 'parking damage', 'critical facility damage'
    ]
building_components = [
    'wall damage', 'column damage', 'slab damage', 'beam damage', 'footing damage', 'cladding damage', 'foundation damage', 'connector damage',
    'floor damage', 'frame damage', 'diaphragm damage', 'coupling beam damage', 'brace damage', 'panel damage', 'ceiling damage',
    'strut damage', 'stair damage', 'seismic base isolator'
]
other_infrastructure = [
    'airport damage', 'port damage', 'bridge damage', 'road damage', 'causeway damage', 'pavement damage', 'water tank damage', 'power plants damage'
]
structure_related_terms = [
     'structural behavior', 'structural damage', 'structural failure mechanisms', 'building resilience', 'structural connection damage',
     'building interior damage', 'building exterior damage', 'building envelope damage', 'building damage', 'lateral force resisting systems',
     'insufficient unbraced length', 'loss of load-path', 'insufficient building gap', 'insufficient anchorage length', 'ductile building',
     'non-ductile building', 'plastic hinging', 'brittle failure', 'soft story', 'insufficient confinement', 'insufficient inelastic deformation',
     'shear failure', 'crushing of unconfined concrete', 'bond failure', 'splice failure', 'compression bars buckle', 'sliding at base',
     'insufficient seismic hoops', 'insufficient seismic hooks', 'cover spalling', 'drift failure', 'P-delta effect', 'structural irregularity',
     'vertical irregularity', 'horizontal irregularity', 'overturning', 'accidental torsion', 'non-compliance seismic detailing',
     'excessive lateral displacement', 'collapsed building', 'nonstructural damage', 'degradation', 'content damage', 'drift compatibility',
     'good performance', 'undamaged infrastructure'
]
community_damage = [
    'fatalities', 'economic loss', 'power outage', 'healthcare disruption', 'agricultural damage', 'uninsured loss', 'telecommunication system disruption',
    'transportation disruption', 'utility outage', 'water outage', 'service loss', 'supply halt', 'injuries', 'temporary medical facility', 'transaction suspension'
]
non_impact_terms = [
    'building code', 'research support', 'ground motion intensity'
    ]

keywords_struct = building_types + building_components + other_infrastructure + structure_related_terms
keywords_comm = community_damage
keywords_non_impact = non_impact_terms

keywords = keywords_struct + keywords_comm + keywords_non_impact

In [None]:
# Input for 2a_extracting_impact_sentences.ipynb and 2b_selecting_threshold.ipynb
batch_size = 2 # number of sentences / batch for BART large MNLI
threshold = .9 # entailment threshold for extracting impact sentences
overlap_tokens =200 # overlapping text between summary chunk for BART large CNN

The following section compiles all previously defined inputs and saves them. Therefore, there is no need to make any edits beyond this point.

In [None]:
# Combine all the inputs
input = {
    'data_path': data_path,
    'result_path': result_path,
    'file_name': file_name,
    'drop_headings': drop_headings,
    'keywords_struct': keywords_struct,
    'keywords_comm': keywords_comm,
    'keywords_non_impact': keywords_non_impact,
    'keywords': keywords,
    'batch_size': batch_size,
    'threshold': threshold,
    'overlap_tokens': overlap_tokens
    }

In [None]:
import os
import json

from google.colab import drive

In [None]:
def current_path():
  print("Current working directory")
  print(os.getcwd())
  print()

current_path()
drive.mount('/content/drive')
os.chdir(input_path)
current_path()

Current working directory
/content

Mounted at /content/drive
Current working directory
/content/drive/My Drive/ResilienceDataMining/Turkiye_Earthquake/Result



In [None]:
# Convert the Python dictionary to a JSON string
input = json.dumps(input, indent=4)

with open('0_input.json', 'w') as file:
    file.write(input)