This notebook is centered on selecting impactful sentences based on a pre-defined threshold specified in 0_input.ipynb. It segregates sentences pertaining to structural and community impacts, preparing them for subsequent summary generation.

Please ensure to paste the input_path, which is the location of 0_input.ipynb file, at the start of this notebook. This step is the only requirement to load all necessary information for the execution of the code.

Recommended Google Colab Runtime Type: CPU, as this notebook does not involve running machine learning models.

In [None]:
# Input file path (must navigate at the beginning of each file)
input_path =  "/content/drive/My Drive/ImpactDataMining/Hurricane_Ian/Result"

All the below sections automatically retrieve data from the 0_input.ipynb file, as well as results from previous notebooks in this series. The code is designed to run using this information, so no further edits are required beyond this point.

In [None]:
import os
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn

from google.colab import drive
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.metrics import accuracy_score, precision_score, recall_score,  f1_score

In [None]:
import time

start_time = time.time()

In [None]:
def current_path():
  print("Current working directory")
  print(os.getcwd())
  print()

current_path()
drive.mount('/content/drive')
os.chdir(input_path)
current_path()

Current working directory
/content

Mounted at /content/drive
Current working directory
/content/drive/My Drive/ResilienceDataMining/Hurricane_Ian/Result



In [None]:
with open('0_input.json', 'r') as file:
    data = json.load(file)
    result_path = data['result_path']
    labels_struct = data['keywords_struct']
    labels_comm = data['keywords_comm']
    labels_non_impact = data['keywords_non_impact']
    threshold = data['threshold']

In [None]:
current_path()
drive.mount('/content/drive')
os.chdir(result_path)
current_path()

Current working directory
/content/drive/My Drive/ResilienceDataMining/Hurricane_Ian/Result

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Current working directory
/content/drive/My Drive/ResilienceDataMining/Hurricane_Ian/Result



In [None]:
with open('1_results.json', 'r') as file:
    data = json.load(file)
    text_body = data['text_body']
    text_table = data['text_table']
    idx_body = data['idx_body']
    idx_table = data['idx_table']

with open('2a_results.json', 'r') as file:
    data = json.load(file)
    sent_all = data['sent_all']
    keywords = data['labels']
    label_pred = data['result_probs']
    result_labels = data['result_labels']

In [None]:
for i, n in enumerate(result_labels):
  if n in labels_non_impact:
    label_pred[i] = 0

In [None]:
y_pred = [1 if n >= threshold else 0 for n in label_pred]

In [None]:
idx_flat = [i for i, n in enumerate(y_pred) if n == 1]

idx_para_nested = idx_table + idx_body
idx_len = [len(n) for n in idx_para_nested]
idx_para_flat = [k for n in idx_para_nested for k in n]
idx_count = list(range(len(idx_para_flat)))

In [None]:
idx_count_nested = []; temp_list = [];
for i, (n, k) in enumerate(zip(idx_para_flat, idx_count)):
  if i > 0 and n <= idx_para_flat[i-1]:
    idx_count_nested.append(temp_list)
    temp_list = []
  temp_list.append(k)

if temp_list:
  idx_count_nested.append(temp_list)

idx_nested = [];
for n in idx_count_nested:
  temp_list = [k for k in n if k in idx_flat]
  if temp_list != []:
    idx_nested.append(temp_list)

In [None]:
sent_nested = []
for n in idx_nested:
  temp_list = [sent_all[i] for i in n]
  sent_nested.append(temp_list)

In [None]:
pos_labels = [result_labels[i] for i in idx_flat]

In [None]:
idx_struct_flat = [n for n, k in zip(idx_flat, pos_labels) if k in labels_struct]

idx_struct_nested = []
for n in idx_nested:
  temp_list = [k for k in n if k in idx_struct_flat]
  if temp_list != []:
    idx_struct_nested.append(temp_list)

sent_struct_nested = []
for n in idx_struct_nested:
  temp_list = [sent_all[i] for i in n]
  sent_struct_nested.append(temp_list)

In [None]:
idx_comm_flat = [n for n, k in zip(idx_flat, pos_labels) if k in labels_comm]

idx_comm_nested = []
for n in idx_nested:
  temp_list = [k for k in n if k in idx_comm_flat]
  if temp_list != []:
    idx_comm_nested.append(temp_list)

sent_comm_nested = []
for n in idx_comm_nested:
  temp_list = [sent_all[i] for i in n]
  sent_comm_nested.append(temp_list)

In [None]:
# Saving results to a JSON file
with open('2b_results.json', 'w') as file:
    json.dump(
        {'sent_nested': sent_nested, 'sent_struct_nested': sent_struct_nested,
         'sent_comm_nested': sent_comm_nested}, file
        )

In [None]:
end_time = time.time()
execution_time = end_time - start_time

print("Execution time:", execution_time, "seconds")

Execution time: 18.347049713134766 seconds
