## Importing Packages

In [36]:
import os
import json
import pandas as pd
import numpy as np
import cv2
import boto3
import glob
import matplotlib.pyplot as plt
from skimage import io
from shapely import wkt
from shapely.geometry import mapping
from collections import defaultdict

import warnings
warnings.filterwarnings("ignore")

## Download images from AWS S3 bucket

In [37]:
# # S3 Bucket Name & Image Directory Location
# S3_Bucket_Name = 'hurricaneimagebucket'  
# Image_Directory = '../Images'

# # boto3 to initialize S3 client
# s3_client = boto3.client('s3')

# # S3 Bucket Download Images Function
# def S3_Download(S3_Bucket_Name, Image_Directory):
#     # Objects list within S3 Bucket
#     S3_Response = s3_client.list_objects_v2(Bucket = S3_Bucket_Name)

#     # Check if the bucket contains any objects
#     if 'Contents' in S3_Response:
#         for S3_Object in S3_Response['Contents']:
#             file_name = S3_Object['Key']

#             # Check if file is .png if want to add other formats i.e. file_name.endswith(('.png','.jpeg'))
#             if file_name.endswith('.png'):  # Image format
#                 # Uncomment line below if you are requiring to see the downloads
#                 # print(f'Downloading {file_name}...')

#                 # Join Path
#                 Image_path = os.path.join(Image_Directory, file_name)

#                 # Create any necessary directories
#                 os.makedirs(os.path.dirname(Image_path), exist_ok = True)

#                 # Download the image
#                 s3_client.download_file(S3_Bucket_Name, file_name, Image_path)
#                 # Uncomment line below if you are requiring to see the downloads
#                 # print(f'Image downloaded {file_name}')
#     else:
#         print('No images found in the bucket')

# # Call the function
# S3_Download(S3_Bucket_Name, Image_Directory)

## Data Loading

In [38]:
# Function to read JSON files
def extract_JSON_data(directory):
    pre_data = []
    post_data = []

    for filename in os.listdir(directory):
        file_path = os.path.join(directory, filename)
        with open(file_path, 'r') as file:
            content = json.load(file)

        # Filter by hurricane natural disaster only
        disaster = content['metadata'].get('disaster')
        if disaster and "hurricane" in disaster.lower():
            img_name = content['metadata'].get('img_name', "")
            data = {
                'img_name': img_name,
                'xy': content['features'].get('xy'),
                'sensor': content['metadata'].get('sensor'),
                'provider_asset_type': content['metadata'].get('provider_asset_type'),
                'gsd': content['metadata'].get('gsd'),
                'capture_date': content['metadata'].get('capture_date'),
                'off_nadir_angle': content['metadata'].get('off_nadir_angle'),
                'pan_resolution': content['metadata'].get('pan_resolution'),
                'sun_azimuth': content['metadata'].get('sun_azimuth'),
                'sun_elevation': content['metadata'].get('sun_elevation'),
                'target_azimuth': content['metadata'].get('target_azimuth'),
                'disaster': disaster,
                'disaster_type': content['metadata'].get('disaster_type'),
                'catalog_id': content['metadata'].get('catalog_id'),
                'original_width': content['metadata'].get('original_width'),
                'original_height': content['metadata'].get('original_height'),
                'width': content['metadata'].get('width'),
                'height': content['metadata'].get('height'),
                'id': content['metadata'].get('id')
            }
            
            # Separate pre and post DataFrame
            if "pre" in img_name.lower():
                pre_data.append(data)
            elif "post" in img_name.lower():
                post_data.append(data)
    
    hurricane_pre_df = pd.DataFrame(pre_data)
    hurricane_post_df = pd.DataFrame(post_data)
    
    return hurricane_pre_df, hurricane_post_df

In [39]:
# Function to extract pre- and post-disaster images
def extract_images(image_folder) :
    
    # List to store pre- and post-hurricane images
    pre_images, post_images = [], []
    
    print("Retrieving pre and post disaster images from:", image_folder)
    
    for image in glob.iglob(f'{image_folder}/*') :
        if image.endswith(".png") :
            if "pre" in image.lower():
                pre_images.append(image)
            elif "post" in image.lower():
                post_images.append(image)
    
    return pre_images, post_images

In [40]:
# Function to load images
def load_image(image_path) :
    
    image = cv2.imread(image_path)
    # Convert image from BGR to RGB format
    rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    return rgb_image

In [42]:
def prepare_data(images_dir, json_dir):
    hurricane_pre_df, hurricane_post_df = extract_JSON_data(json_dir)
    pre_hurricane_images, post_hurricane_images = extract_images(images_dir)
    print(f"\nTotal pre-disaster images: {len(pre_hurricane_images)}")
    print(f"\nTotal post-disaster images: {len(post_hurricane_images)}")

    return hurricane_pre_df, hurricane_post_df, pre_hurricane_images, post_hurricane_images