This Notebook is designed to set up all essential inputs for the project, encompassing folder paths, keywords, model parameters, and more. The input_path, which represents the location of this file, is crucial to be pasted at the start of every Python file in this project. Doing so ensures that all required inputs are correctly loaded, thereby enabling the successful execution of the code.

Recommended Google Colab Runtime Type: CPU, as this notebook does not involve running machine learning models.

In [None]:
# Input file path (must navigate at the beginning of each file)
input_path =  "/content/drive/My Drive/ImpactDataMining/Hurricane_Ian/Result"

In [None]:
# Input for 1a_preprocessing.ipynb
data_path = "/content/drive/My Drive/ImpactDataMining/Hurricane_Ian/Data"
result_path =  "/content/drive/My Drive/ImpactDataMining/Hurricane_Ian/Result"
file_name = "Hurricane Ian_PVRR.docx"

In [None]:
# Input for 1b_preprocessing.ipynb
drop_headings = [
    'PREFACE',
    'ATTRIBUTION GUIDANCE',
    'ACKNOWLEDGMENTS',
    'TABLE OF CONTENTS',
    'Recommended Response Strategy',
    'Appendix A. Loss Estimates',
    'Appendix B. Evacuation Orders',
    'Appendix C. Hazard Observations',
    'References',
    ]

In [None]:
# Input for 2a_extracting_impact_sentences.ipynb and 2b_selecting_threshold.ipynb
# Define keywords based on categories and combine at the end
building_types = [
    'family housing damage', 'school damage', 'mobile home damage', 'manufactured home damage', 'commercial facility damage', 'hospital damage',
    'religious infrastructure damage', 'historical building damage', 'industrial facility damage', 'agricultural infrastructure damage',
    'fire station damage', 'police station damage', 'parking damage', 'critical facility damage', 'shelter damage'
    ]
building_components = [
    'roof damage', 'secondary water resistance', 'roof deck attachment damage', 'metal roof deck attachment damage', 'roof-wall connection damage',
    'toenail damage', 'hurricane strap damage', 'shutter damage', 'roof cover damage', 'shingle damage', 'built-up roofing damage',
    'single-ply membrane damage', 'roof frame damage', 'wood truss damage', 'open web steel joist damage', 'roof deck age damage', 'joist spacing',
    'window area damage', 'tie-downs damage', 'foundation anchorage damage', 'door damage'
]
other_infrastructure = [
    'airport damage', 'port damage', 'bridge damage', 'road damage', 'causeway damage', 'pavement damage', 'seawall damage', 'levee damage',
    'water tank damage', 'power plants damage', 'offshore structure damage'
]
structure_related_terms = [
     'structural behavior', 'structural damage', 'structural failure mechanisms', 'infrastructure resilience', 'structural connection damage',
     'building interior damage', 'building exterior damage', 'building envelope damage', 'building damage', 'lateral force resisting systems',
     'undamaged infrastructure'
]
terrestrial_damage = [
    'erosion', 'washout', 'wind-borne debris', 'scour', 'tree-related damage'
]
community_damage = [
    'fatalities', 'economic loss', 'power outage', 'healthcare disruption', 'agricultural damage', 'uninsured loss',
    'telecommunication system disruption', 'transportation disruption', 'utility outage', 'water outage', 'service loss', 'supply halt',
    'temporary medical facility', 'transaction suspension'
]
non_impact_terms = [
    'storm tracking', 'building code', 'research support'
    ]

keywords_struct = building_types + building_components + other_infrastructure + structure_related_terms + terrestrial_damage
keywords_comm = community_damage
keywords_non_impact = non_impact_terms

keywords = keywords_struct + keywords_comm + keywords_non_impact

In [None]:
# Input for 2a_extracting_impact_sentences.ipynb and 2b_selecting_threshold.ipynb
batch_size = 4 # number of sentences / batch for BART large MNLI
threshold = .9 # entailment threshold for extracting impact sentences
overlap_tokens =200 # overlapping text between summary chunk for BART large CNN

The following section compiles all previously defined inputs and saves them. Therefore, there is no need to make any edits beyond this point.

In [None]:
# Combine all the inputs
input = {
    'data_path': data_path,
    'result_path': result_path,
    'file_name': file_name,
    'drop_headings': drop_headings,
    'keywords_struct': keywords_struct,
    'keywords_comm': keywords_comm,
    'keywords_non_impact': keywords_non_impact,
    'keywords': keywords,
    'batch_size': batch_size,
    'threshold': threshold,
    'overlap_tokens': overlap_tokens
    }

In [None]:
import os
import json

from google.colab import drive

In [None]:
def current_path():
  print("Current working directory")
  print(os.getcwd())
  print()

current_path()
drive.mount('/content/drive')
os.chdir(input_path)
current_path()

Current working directory
/content/drive/MyDrive/ResilienceDataMining/Hurricane_Ian/Result

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Current working directory
/content/drive/My Drive/ResilienceDataMining/Hurricane_Ian/Result



In [None]:
# Convert the Python dictionary to a JSON string
input = json.dumps(input, indent=4)

with open('0_input.json', 'w') as file:
    file.write(input)