In [1]:
# imports
import os
from shapely.geometry import Polygon
import numpy as np
import xml.etree.ElementTree as ET
import json
import cv2
import glob

from PIL import Image

import matplotlib.pyplot as plt

In [2]:
# Image alteration functions:
def apply_gamma_correction(image, gamma):
    # Normalize the image to [0, 1]
    image_float = image / 255.0
    # Apply gamma correction
    corrected_image = np.power(image_float, gamma)
    # Scale back to [0, 255] and convert to 8-bit
    corrected_image = np.clip(corrected_image * 255, 0, 255).astype(np.uint8)
    return corrected_image

def adjust_brightness(image, brightness_factor):
    adjusted_image = image + brightness_factor
    adjusted_image = np.clip(adjusted_image, 0, 255).astype(np.uint8)
    return adjusted_image

def adjust_contrast(image, contrast_factor):
    # Calculate the mean of the image for contrast adjustment
    mean = np.mean(image)
    adjusted_image = (image - mean) * contrast_factor + mean
    adjusted_image = np.clip(adjusted_image, 0, 255).astype(np.uint8)
    return adjusted_image

def histogram_stretching(image):
    # Convert to float to avoid issues during division
    img = image.astype('float')
    
    # Get minimum and maximum pixel values
    min_val = np.min(img)
    max_val = np.max(img)
    
    # Apply histogram stretching
    stretched_img = (img - min_val) / (max_val - min_val) * 255.0
    
    return stretched_img.astype('uint8')

def linear_stretch(image):
    # Get the minimum and maximum pixel values
    min_val = np.min(image)
    max_val = np.max(image)
    
    # Apply the linear stretch
    stretched_image = (image - min_val) * (255.0 / (max_val - min_val))
    
    # Clip the values to ensure they are within the valid range [0, 255]
    stretched_image = np.clip(stretched_image, 0, 255)
    
    # Convert the stretched image to uint8
    stretched_image = stretched_image.astype('uint8')
    
    return stretched_image

def percent_clip(image, percent):
    # Calculate the number of pixels to clip
    num_pixels = image.size
    num_to_clip = int(num_pixels * percent / 100)
    
    # Sort the pixel values
    sorted_pixels = np.sort(image, axis=None)
    
    # Clip the pixel values
    clipped_image = np.clip(image, sorted_pixels[num_to_clip], sorted_pixels[-num_to_clip - 1])
    
    return clipped_image

def histogram_equalize(image):
    # Initialize an empty array to store the equalized image
    equalized_image = np.zeros_like(image)

    # Iterate over each channel of the image
    for channel in range(image.shape[2]):
        # Flatten the channel to 1D array
        channel_values = image[:, :, channel].flatten()

        # Calculate the histogram of the channel
        hist, bins = np.histogram(channel_values, bins=256, range=(0, 256), density=True)

        # Calculate the cumulative distribution function (CDF)
        cdf = hist.cumsum()

        # Normalize the CDF to the range [0, 255]
        cdf_normalized = ((cdf - cdf.min()) / (cdf.max() - cdf.min())) * 255

        # Interpolate the CDF values to get the equalized values
        equalized_values = np.interp(channel_values, bins[:-1], cdf_normalized)

        # Reshape the equalized values back to the original shape of the channel
        equalized_channel = equalized_values.reshape(image[:, :, channel].shape)

        # Assign the equalized channel to the equalized image
        equalized_image[:, :, channel] = equalized_channel

    # Convert the equalized image to uint8
    equalized_image = equalized_image.astype('uint8')

    return equalized_image

In [19]:
# Dataset Paths:

###### UCSB Dataset ######
ucsb_parent_folder = '/Users/vihaan/Workspace/!Datasets/Processed_Data_S24'
ucsb_full_images_numpy  = os.path.join(ucsb_parent_folder, 'image_patches')
ucsb_full_labels = os.path.join(ucsb_parent_folder, 'labels')

# create images (jpg) folder:
ucsb_full_images = os.path.join(ucsb_parent_folder, 'images')

# If it exists and has files inside, delete the folder and recreate it:
if os.path.exists(ucsb_full_images) and os.listdir(ucsb_full_images):
    print('Deleting and recreating the images folder...')
    os.system(f'rm -rf {ucsb_full_images}')
    os.mkdir(ucsb_full_images)

# number of image patches and labels:
ucsb_image_patches = os.listdir(ucsb_full_images_numpy)
ucsb_labels = os.listdir(ucsb_full_labels)

print(len(ucsb_image_patches), len(ucsb_labels))



Deleting and recreating the images folder...
1088 1063


# UCSB Data Manipulation and Visualisation

# Instance Segmentation XML Creation

In [20]:



# Converting one line of the above to coco format instance segmentation for the image:

# Example:

Source: https://docs.nvidia.com/tao/tao-toolkit/text/data_annotation_format.html#id9
annotation{
"id": int,
"image_id": int,
"category_id": int,
"segmentation": RLE or [polygon],
"area": float,
"bbox": [x,y,width,height],
"iscrowd": 0 or 1,
}

image{
"id": int,
"width": int,
"height": int,
"file_name": str,
"license": int,
"flickr_url": str,
"coco_url": str,
"date_captured": datetime,
}

categories[{
"id": int,
"name": str,
"supercategory": str,
}]
'''


##### Handling main dictionaries #####
# Preparing categories field:
categories = [
    {
        "id": 1,
        "name": "Landslide",
        "supercategory": "Natural_Disaster"
    }
]
# Setting up images and annotations fields:
images = []
annotations = []
landslide_count = 0

# Iterating through the images folder enumerating with number:
for image_index, image_name in enumerate(os.listdir(ucsb_full_images_numpy)):

    # Obtain image details:
    image_path = os.path.join(ucsb_full_images_numpy, image_name)

    # Reading image
    sample_image_numpy = np.load(image_path)

    # Obtain image height and width
    image_height = sample_image_numpy.shape[1]
    image_width = sample_image_numpy.shape[2]

    # save image as jpg in images folder
    image_jpg_name = image_name.split('.')[0] + '.jpg'
    image_jpg_path = os.path.join(ucsb_full_images, image_jpg_name)

    # Change the orientation from (C x H x W) to (H x W x C)
    sample_image_numpy = np.transpose(sample_image_numpy, (1, 2, 0))

    # Choose bands 2,1 and 0 for RGB
    sample_image_numpy = sample_image_numpy[:, :, [2, 1, 0]]

    # Normalising the image:
    img_reshaped = sample_image_numpy
    if img_reshaped.dtype == np.uint16:
        scale_factor = 65535 // 255
        img_reshaped = (img_reshaped / scale_factor).astype(np.uint8)
    elif img_reshaped.max() <= 1:  # float images in [0, 1] range
        img_reshaped = (img_reshaped* 255).astype(np.uint8)
        
    cv2.imwrite(image_jpg_path, img_reshaped)

    # Preparing the next set of fields for the current image:
    image = {
        "id": image_index,
        "width": image_width,
        "height": image_height,
        "file_name": image_jpg_name,
        "license": 1,
        "flickr_url": "http://www.flickr.com/",
        "coco_url": "http://www.coco.com/",
        "date_captured": "2021-08-06"
    }

    # Appending the image to the images list
    images.append(image)

    ### Find the corresponding label file
    # Get the name of the image
    image_name = image_name.split('.')[0]

    # Find the corresponding label file
    label_file = image_name + '.txt'

    # Get the full path of the label file
    label_file_path = os.path.join(ucsb_full_labels, label_file)

    # If label doesn't exist, we move on to the next image
    if not os.path.exists(label_file_path):
        continue

    # Read the label file and obtain line by line as a list
    with open(label_file_path, 'r') as f:
        lines = f.readlines()

    # iterating through all landslide annotations in the labels file (enumerate)
    for i, line in enumerate(lines):

        # obtaining segmentation polygon from line,
        full_annotation_polygon_normalised = lines[i].split(' ')[1:]

        # Convert polygon to non-normalised format by multiplying with image width (for 1,3,5... coordinates) and height (for 2,4,6... coordinates):
        x_coords = []
        y_coords = []
        for i, coord in enumerate(full_annotation_polygon_normalised):
            if i % 2 == 0:
                x_coords.append(float(coord) * image_width)
            else:
                y_coords.append(float(coord) * image_height)
        # Create a polygon object
        full_annotation_polygon = []
        for i in range(len(x_coords)):
            full_annotation_polygon.append(x_coords[i])
            full_annotation_polygon.append(y_coords[i])

        segmentation_polygon = [full_annotation_polygon]

        ### obtaining area of the polygon
        x = x_coords
        y = y_coords

        # creating a polygon object
        polygon = Polygon(zip(x, y))

        # obtaining area of the polygon
        area = polygon.area

        ### obtaining bounding box of the polygon (as x,y,width,height):
        x_min, y_min, x_max, y_max = polygon.bounds

        # obtaining bounding box of the polygon
        bbox = [x_min, y_min, x_max-x_min, y_max-y_min]

        # obtaining iscrowd field of the polygon
        iscrowd = 0

        # creating annotation dictionary
        annotation = {
            "id": landslide_count,
            "image_id": image_index,
            "category_id": 1,
            "segmentation": segmentation_polygon,
            "area": area,
            "bbox": bbox,
            "iscrowd": iscrowd
        }

        # incrementing landslide count
        landslide_count += 1

        # appending annotation to annotations list
        annotations.append(annotation)





[127.38476800000001, 2.4731840000000003, 10.543455999999992, 8.630944]
[52.743488, 5.097568, 8.733984, 4.900448]
[98.688128, 25.430272000000002, 5.182687999999985, 8.457343999999999]
[128.47139199999998, 36.603616, 8.504832000000022, 16.528511999999992]
[109.063136, 41.710592000000005, 3.852800000000002, 7.198463999999994]
[147.406336, 63.03696000000001, 8.737567999999982, 10.700703999999988]
[105.152992, 156.768416, 4.648223999999999, 9.018016000000017]
[118.34681599999999, 158.310208, 9.174592000000018, 9.8784]
[132.59120000000001, 128.49894400000002, 3.5927359999999737, 12.083007999999978]
[124.27094399999999, 125.366976, 4.964736000000016, 11.576544000000013]
[109.1832, 119.91839999999999, 3.0430399999999906, 10.589823999999993]
[98.576576, 116.420416, 4.532640000000001, 8.64752]
[77.830368, 70.66729600000001, 6.595007999999993, 13.699615999999992]
[76.336288, 32.481792, 7.679615999999996, 17.819648]
[71.96739199999999, 35.656991999999995, 21.28694400000002, 35.743007999999996]
[20

In [21]:

# Preparing the final xml file:

def create_xml(annotations, images, categories, output_file):
    # Create the root element
    root = ET.Element("dataset")
    
    # Add images element
    images_element = ET.SubElement(root, "images")
    for image in images:
        image_element = ET.SubElement(images_element, "image")
        for key, value in image.items():
            ET.SubElement(image_element, key).text = str(value)
    
    # Add annotations element
    annotations_element = ET.SubElement(root, "annotations")
    for annotation in annotations:
        annotation_element = ET.SubElement(annotations_element, "annotation")
        for key, value in annotation.items():
            if key == "bbox" and isinstance(value, list):
                bbox_element = ET.SubElement(annotation_element, "bbox")
                for bbox_value in value:
                    ET.SubElement(bbox_element, "value").text = str(bbox_value)
            else:
                ET.SubElement(annotation_element, key).text = str(value)
    
    # Add categories element
    categories_element = ET.SubElement(root, "categories")
    for category in categories:
        category_element = ET.SubElement(categories_element, "category")
        for key, value in category.items():
            ET.SubElement(category_element, key).text = str(value)
    
    # Create a tree from the root element
    tree = ET.ElementTree(root)
    
    # Write the tree to an XML file
    tree.write(output_file, encoding='utf-8', xml_declaration=True)


output_file = 'ucsb_full_coco_format_instance_segmentation.xml'

# If file already exists, delete it:
if os.path.exists(output_file):
    os.remove(output_file)

# Create the XML file
create_xml(annotations, images, categories, output_file)





# YOLO Labels to PASCAL VOC XML

In [12]:
img_path = '/Users/vihaan/Workspace/!Datasets/Processed_Data_S24/images'
label_path = '/Users/vihaan/Workspace/!Datasets/Processed_Data_S24/labels'
output_image_path = '/Users/vihaan/Workspace/!Datasets/Processed_Data_S24/data_images/'

# Make sure the output directory exists
if not os.path.exists(output_image_path):
    os.makedirs(output_image_path)

# Remove existing files in the output directory
files = [file for file in os.listdir(output_image_path) if os.path.isfile(os.path.join(output_image_path, file))]
for file in files:
    os.remove(os.path.join(output_image_path, file))

# Setting a threshold:
threshold = 1  # how the bbox area should be at least 10 pixels

# Ensure the output directory exists
if not os.path.exists(output_image_path):
    os.makedirs(output_image_path)

# Remove existing files in the output directory
files = [file for file in os.listdir(output_image_path) if os.path.isfile(os.path.join(output_image_path, file))]
for file in files:
    os.remove(os.path.join(output_image_path, file))

for filename_temp in glob.glob(img_path+'/*'):

    # Choose a file
    filename = filename_temp.split('/')[-1].split('.')[0]

    # Read the image for image dimensions
    img = cv2.imread(f'{img_path}/{filename}.jpg')
    image = Image.fromarray(img, 'RGB')
    
    # Get the corresponding label name
    label = f'{label_path}/{filename}.txt'

    # If the label file does not exist, skip the image
    if not os.path.exists(label):
        print(f'{filename} does not have a label file')
        continue

    with open(label, 'r') as file:
        file_content = file.read()
        file_content = file_content.split('\n')
    
    lst = [item.split(' ') for item in file_content]
    
    coords = []
    for i in lst:
        coords_temp = []
        for j in i[1:]:
            coords_temp.append(float(j))
        coords.append(np.array(coords_temp, dtype='object').reshape(-1,2))
    
    label_base='<annotation>\n<folder></folder>\n<filename>{}.jpg</filename>\n<path>{}.jpg</path>\n<source>\n<database></database>\n</source>\n<size>\n<width>{}</width>\n<height>{}</height>\n<depth>3</depth>\n</size>\n<segmented>0</segmented>\n'.format(filename,filename, img.shape[0], img.shape[1])
    label_base_end = '</annotation>'

    ct = 0
    for num_labels in range(len(coords)):
        if len(coords[num_labels]) != 0:
            big_x=np.where(coords[num_labels][:,0] == max(coords[num_labels][:,0]))[0][0] # big x
            small_x=np.where(coords[num_labels][:,0] == min(coords[num_labels][:,0]))[0][0] # small x
            big_y=np.where(coords[num_labels][:,1] == max(coords[num_labels][:,1]))[0][0] # big y
            small_y=np.where(coords[num_labels][:,1] == min(coords[num_labels][:,1]))[0][0]# small y
                
            xmin, ymin = [int(coords[num_labels][small_x][0]*img.shape[0]),
                          int((coords[num_labels][small_y][1])*img.shape[1])]
            xmax, ymax = [int(coords[num_labels][big_x][0]*img.shape[0]), 
                          int((coords[num_labels][big_y][1])*img.shape[1])]

            if xmin<0 or ymin<0 or xmax<0 or ymax<0: continue
            elif xmax-xmin<1 or ymax-ymin<1: continue
            elif (xmax-xmin) *  (ymax-ymin) < threshold: continue
            else:
                label_name = '<object>\n<name>landslides</name>\n<pose>Unspecified</pose>\n<truncated>0</truncated>\n<difficult>0</difficult>\n<occluded>0</occluded>\n<bndbox><xmin>{}</xmin><xmax>{}</xmax><ymin>{}</ymin><ymax>{}</ymax></bndbox>\n</object>\n'.format(xmin, xmax, ymin, ymax)
                label_base=label_base+label_name
                ct += 1

    if ct == 0:
        continue
       #print(filename, 'does not have any labels')
    else:
        print(filename, 'has', ct, 'labels')
        label_base = label_base+label_base_end

        image.save(output_image_path + str(filename) + '.jpg', 'JPEG')
        
        xml_label_path = output_image_path + str(filename) + '.txt'
        with open(xml_label_path, 'w') as file:
            file.write(label_base)
        
        new_file_name = xml_label_path.split('.')[0] + '.xml'
        
        os.rename(xml_label_path, new_file_name)

# Print the total number of images in the output folder with .jpg extension :
print(len([file for file in os.listdir(output_image_path) if file.endswith('.jpeg')]))


2011 sikkim earthquake_patch_8288_4704 has 2 labels
2010 Haiti_patch_4928_4480 has 4 labels
1987 Sichuan pre-earthquake_patch_7616_7168 has 1 labels
2011 sikkim earthquake_patch_9408_3360 has 5 labels
2011 sikkim earthquake_patch_9408_3360 has 5 labels
2011 sikkim earthquake_patch_8288_4704 has 2 labels
1987 Sichuan pre-earthquake_patch_7616_7168 has 1 labels
2010 Haiti_patch_4928_4480 has 4 labels
1999 chamoli earthquake_patch_8064_7840 has 1 labels
1987 Sichuan pre-earthquake_patch_4704_8512 has 13 labels
1987 Sichuan pre-earthquake_patch_2464_8960 has 1 labels
1987 Sichuan pre-earthquake_patch_7616_6048 has 24 labels
1987 Sichuan pre-earthquake_patch_2464_8960 has 1 labels
1987 Sichuan pre-earthquake_patch_7616_6048 has 24 labels
1987 Sichuan pre-earthquake_patch_4704_8512 has 13 labels
1999 chamoli earthquake_patch_8064_7840 has 1 labels
1987 Sichuan pre-earthquake_patch_2912_9856 has 3 labels
1987 Sichuan pre-earthquake_patch_8512_6944 has 1 labels
1999 chamoli earthquake_patch_71

In [3]:
# Splitting data in Pascal ucsb folder into train tesst and val:
path_ucsb_pascal = '/Users/vihaan/Workspace/!Datasets/Processed_Data_S24/data_images'
final_ucsb_pascal = '/Users/vihaan/Workspace/!Datasets/ucsb_pascal'


# Create the train, test, and val directories
train_dir = os.path.join(final_ucsb_pascal, 'train')
test_dir = os.path.join(final_ucsb_pascal, 'test')
val_dir = os.path.join(final_ucsb_pascal, 'val')

# Create the directories if they do not exist
if not os.path.exists(train_dir):
    os.makedirs(train_dir)

if not os.path.exists(test_dir):
    os.makedirs(test_dir)

if not os.path.exists(val_dir):
    os.makedirs(val_dir)

# Get the list of image files
image_files = [file for file in os.listdir(path_ucsb_pascal) if file.endswith('.jpg')]
# Shuffle the list of image files
np.random.shuffle(image_files)

# Split the image files into train, test, and val sets
train_files = image_files[:int(0.8 * len(image_files))]
test_files = image_files[int(0.8 * len(image_files)):int(0.9 * len(image_files))]
val_files = image_files[int(0.9 * len(image_files)):]

# Move the image files to the corresponding directories
for file in train_files:
    # copy the image file to the corresponding directory
    os.rename(os.path.join(path_ucsb_pascal, file), os.path.join(train_dir, file))
    # Move the corresponding label file to the corresponding directory
    os.rename(os.path.join(path_ucsb_pascal, file.replace('.jpg', '.xml')), os.path.join(train_dir, file.replace('.jpg', '.xml')))







Number of images in train: 1674
Number of images in test: 0
Number of images in val: 0


In [5]:
for file in test_files:
    # copy the image file to the corresponding directory
    os.rename(os.path.join(path_ucsb_pascal, file), os.path.join(test_dir, file))
    # Move the corresponding label file to the corresponding directory
    os.rename(os.path.join(path_ucsb_pascal, file.replace('.jpg', '.xml')), os.path.join(test_dir, file.replace('.jpg', '.xml')))

for file in val_files:
    # copy the image file to the corresponding directory
    os.rename(os.path.join(path_ucsb_pascal, file), os.path.join(val_dir, file))
    # Move the corresponding label file to the corresponding directory
    os.rename(os.path.join(path_ucsb_pascal, file.replace('.jpg', '.xml')), os.path.join(val_dir, file.replace('.jpg', '.xml')))




FileNotFoundError: [Errno 2] No such file or directory: '/Users/vihaan/Workspace/!Datasets/Processed_Data_S24/data_images/1987 Sichuan pre-earthquake_patch_3136_11648.jpg' -> '/Users/vihaan/Workspace/!Datasets/ucsb_pascal/test/1987 Sichuan pre-earthquake_patch_3136_11648.jpg'

In [6]:
# Print the number of images in each directory
print('Number of images in train:', len(os.listdir(train_dir)))
print('Number of images in test:', len(os.listdir(test_dir)))
print('Number of images in val:', len(os.listdir(val_dir)))

Number of images in train: 1674
Number of images in test: 210
Number of images in val: 210


# SAR Dataset to PASCAL format

In [5]:
path_sar_unprocessed = '/Users/vihaan/Workspace/!Datasets/SAR Dataset Unprocessed'
regions_in_sar = os.listdir(path_sar_unprocessed)

output_file_path = '/Users/vihaan/Workspace/!Datasets/SAR Dataset Processed 1'

# If the output directory does not exist, create it
if not os.path.exists(output_file_path):
    os.makedirs(output_file_path)

# Remove existing files in the output directory
files = [file for file in os.listdir(output_file_path) if os.path.isfile(os.path.join(output_file_path, file))]
for file in files:
    os.remove(os.path.join(output_file_path, file))

# Remove .DS_Store if it exists
if '.DS_Store' in regions_in_sar:
    regions_in_sar.remove('.DS_Store')

# Ignore the shapefiles folder as well
if 'study areas shp' in regions_in_sar:
    regions_in_sar.remove('study areas shp')

# Setting a threshold
threshold = 1  # Bounding box area should be at least 1 pixel

# Iterate through each region and read the images and labels
for region in regions_in_sar:
    region_path = os.path.join(path_sar_unprocessed, region)
    
    images_path = os.path.join(region_path, 'img')
    images_list = os.listdir(images_path)
    
    labels_path = os.path.join(region_path, 'label')

    for image_tif in images_list:
        # Get the image name without the extension
        image_name = image_tif.split('.')[0]
        
        # Read the image
        image = cv2.imread(os.path.join(images_path, image_tif), cv2.IMREAD_UNCHANGED)
        
        # Get the image height and width
        image_height = image.shape[0]
        image_width = image.shape[1]
        
        # Save the image as a .jpg file
        cv2.imwrite(os.path.join(output_file_path, f'{image_name}.jpg'), image)
        
        # Read the corresponding label file
        label_file_path = os.path.join(labels_path, f'{image_name}.tif')
        
        # If the label file does not exist, skip the image
        if not os.path.exists(label_file_path):
            continue
        
        # Read the label file
        label_image = cv2.imread(label_file_path, cv2.IMREAD_UNCHANGED)
        
        # Extract the 0th band since all bands hold the same value
        band0 = label_image[:, :, 0]
        
        # Threshold the image to create a binary mask
        _, binary_mask = cv2.threshold(band0, 127, 255, cv2.THRESH_BINARY_INV)
        
        # Find contours in the binary mask
        contours, _ = cv2.findContours(binary_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        
        # Prepare XML content
        label_base = '<annotation>\n<folder></folder>\n<filename>{}.jpg</filename>\n<path>{}.jpg</path>\n<source>\n<database></database>\n</source>\n<size>\n<width>{}</width>\n<height>{}</height>\n<depth>3</depth>\n</size>\n<segmented>0</segmented>\n'.format(image_name, image_name, image_width, image_height)
        label_base_end = '</annotation>'
        
        ct = 0
        for contour in contours:
            x, y, w, h = cv2.boundingRect(contour)
            xmin, ymin, xmax, ymax = x, y, x + w, y + h
            
            if xmin < 0 or ymin < 0 or xmax < 0 or ymax < 0:
                continue
            elif xmax - xmin < 1 or ymax - ymin < 1:
                continue
            elif (xmax - xmin) * (ymax - ymin) < threshold:
                continue
            else:
                if ymin < 0:
                    print(ymin)
                label_name = '<object>\n<name>landslides</name>\n<pose>Unspecified</pose>\n<truncated>0</truncated>\n<difficult>0</difficult>\n<occluded>0</occluded>\n<bndbox><xmin>{}</xmin><xmax>{}</xmax><ymin>{}</ymin><ymax>{}</ymax></bndbox>\n</object>\n'.format(xmin, xmax, ymin, ymax)
                label_base = label_base + label_name
                ct += 1
        
        if ct == 0:
            continue
        else:
            #print(image_name, 'has', ct, 'labels')
            label_base = label_base + label_base_end
            
            xml_label_path = os.path.join(output_file_path, f'{image_name}.xml')
            with open(xml_label_path, 'w') as file:
                file.write(label_base)
            
# Print the total number of images in the output folder with .jpg extension
print(len([file for file in os.listdir(output_file_path) if file.endswith('.jpg')]))

    

7422


In [8]:
# Split full SAR processed dataset into train test and val:

# Define the input and output directories
input_dir = '/Users/vihaan/Workspace/!Datasets/SAR_Dataset_Processed_1'
output_dir = '/Users/vihaan/Workspace/!Datasets/SAR_Dataset_Processed_1_Split'

# If the output directory does not exist, create it
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Remove existing files in the output directory
files = [file for file in os.listdir(output_dir) if os.path.isfile(os.path.join(output_dir, file))]

for file in files:
    os.remove(os.path.join(output_dir, file))

# Remove .DS_Store if it exists
if '.DS_Store' in os.listdir(input_dir):
    os.remove(os.path.join(input_dir, '.DS_Store'))

# Get the list of images in the input directory (images are in .jpg format)
images = [file for file in os.listdir(input_dir) if file.endswith('.jpg')]
print(len(images))

# Split the images into train, test, and validation sets
train_images = images[:int(0.8 * len(images))]
test_images = images[int(0.8 * len(images)):int(0.9 * len(images))]
val_images = images[int(0.9 * len(images)):]

# Create the train, test, and validation directories
train_dir = os.path.join(output_dir, 'train')
test_dir = os.path.join(output_dir, 'test')
val_dir = os.path.join(output_dir, 'val')

print(train_dir)

# Create the train, test, and validation directories if they do not exist
if not os.path.exists(train_dir):
    os.makedirs(train_dir)

if not os.path.exists(test_dir):
    os.makedirs(test_dir)

if not os.path.exists(val_dir):
    os.makedirs(val_dir)

# Copy the images as well as the .xml labels files to the train, test, and validation directories
for image in train_images:
    image_path = os.path.join(input_dir, image)
    xml_path = os.path.join(input_dir, image.split('.')[0] + '.xml')
    os.system(f'cp {image_path} {train_dir}')
    os.system(f'cp {xml_path} {train_dir}')

for image in test_images:
    image_path = os.path.join(input_dir, image)
    xml_path = os.path.join(input_dir, image.split('.')[0] + '.xml')
    os.system(f'cp {image_path} {test_dir}')
    os.system(f'cp {xml_path} {test_dir}')

for image in val_images:
    image_path = os.path.join(input_dir, image)
    xml_path = os.path.join(input_dir, image.split('.')[0] + '.xml')
    os.system(f'cp {image_path} {val_dir}')
    os.system(f'cp {xml_path} {val_dir}')

# Print the number of images in the train, test, and validation directories
print(len(os.listdir(train_dir)))
print(len(os.listdir(test_dir)))
print(len(os.listdir(val_dir)))


7422
/Users/vihaan/Workspace/!Datasets/SAR_Dataset_Processed_1_Split/train


cp: /Users/vihaan/Workspace/!Datasets/SAR_Dataset_Processed_1/Haiti705.xml: No such file or directory
cp: /Users/vihaan/Workspace/!Datasets/SAR_Dataset_Processed_1/Haiti739.xml: No such file or directory
cp: /Users/vihaan/Workspace/!Datasets/SAR_Dataset_Processed_1/Haiti824.xml: No such file or directory
cp: /Users/vihaan/Workspace/!Datasets/SAR_Dataset_Processed_1/Haiti825.xml: No such file or directory
cp: /Users/vihaan/Workspace/!Datasets/SAR_Dataset_Processed_1/Haiti831.xml: No such file or directory
cp: /Users/vihaan/Workspace/!Datasets/SAR_Dataset_Processed_1/Haiti765.xml: No such file or directory
cp: /Users/vihaan/Workspace/!Datasets/SAR_Dataset_Processed_1/Haiti764.xml: No such file or directory
cp: /Users/vihaan/Workspace/!Datasets/SAR_Dataset_Processed_1/Haiti740.xml: No such file or directory
cp: /Users/vihaan/Workspace/!Datasets/SAR_Dataset_Processed_1/Haiti583.xml: No such file or directory
cp: /Users/vihaan/Workspace/!Datasets/SAR_Dataset_Processed_1/Haiti637.xml: No suc

11859
1484
1486


In [17]:
################################### Calculating total area of landslides per region:
path_sar_unprocessed = '/Users/vihaan/Workspace/!Datasets/SAR Dataset Unprocessed'
regions_in_sar = os.listdir(path_sar_unprocessed)

# Remove .DS_Store if it exists
if '.DS_Store' in regions_in_sar:
    regions_in_sar.remove('.DS_Store')

# Ignore the shapefiles folder as well
if 'study areas shp' in regions_in_sar:
    regions_in_sar.remove('study areas shp')

for region in regions_in_sar:
    # Get the resolution from the user when code runs:
    print(f'Enter the resolution for the region {region}: ')
    resolution = float(input())

    pixel_to_meter = resolution*resolution

    # Open the label path:
    labels_path = os.path.join(path_sar_unprocessed, region, 'label')

    # Get the list of label files
    label_files = os.listdir(labels_path)

    # Initialize the total area of landslides
    total_landslide_area = 0

    # number of labels:
    print('Total number of labels for region:', region, 'is:', len(label_files))

    # Iterate through each label file
    for label_file in label_files:
        # Read the label file
        label_image = cv2.imread(os.path.join(labels_path, label_file), cv2.IMREAD_UNCHANGED)

        # Extract the 0th band since all bands hold the same value
        band0 = label_image[:, :, 0]

        # Count the number of pixels with landslide
        landslide_pixels = np.count_nonzero(band0 == 0)

        #print(landslide_pixels/band0.size)

        # Add the area to the total area
        total_landslide_area += landslide_pixels

    print(f'Total area of landslides in {region}: {total_landslide_area * pixel_to_meter / 1000000} square kms')
    print(f'Total image area in {region}: {len(label_files) * band0.size * pixel_to_meter / 1000000} square kms')

Enter the resolution for the region Lombok: 
Total number of labels for region: Lombok is: 436
Total area of landslides in Lombok: 87.585025 square kms
Total image area in Lombok: 2857.3696 square kms
Enter the resolution for the region Mengdong Township: 
Total number of labels for region: Mengdong Township is: 1155
Total area of landslides in Mengdong Township: 8.289362 square kms
Total image area in Mengdong Township: 75.69408 square kms
Enter the resolution for the region Wenchuan: 
Total number of labels for region: Wenchuan is: 178
Total area of landslides in Wenchuan: 53.08375 square kms
Total image area in Wenchuan: 1166.5408 square kms
Enter the resolution for the region Moxitaidi (SAT): 
Total number of labels for region: Moxitaidi (SAT) is: 652
Total area of landslides in Moxitaidi (SAT): 4.3708518 square kms
Total image area in Moxitaidi (SAT): 61.53043968 square kms
Enter the resolution for the region palu: 
Total number of labels for region: palu is: 817
Total area of lan

In [5]:
################################### Calculating total area of landslides fpr l4s:
path_l4s_full = '/Users/vihaan/Workspace/!Datasets/data_land4sensor/l4s_complete_xml'

# Get list of labels:
labels = os.listdir(path_l4s_full)
print(len(labels))

# Initialize the total area of landslides
total_landslide_area = 0

# Iterate through each label file
for label in labels:
    # Read the label file
    tree = ET.parse(os.path.join(path_l4s_full, label))
    root = tree.getroot()

    # Get the image width and height
    image_width = int(root.find('size').find('width').text)
    image_height = int(root.find('size').find('height').text)

    # Get the bounding box coordinates
    xmin = int(root.find('object').find('bndbox').find('xmin').text)
    xmax = int(root.find('object').find('bndbox').find('xmax').text)
    ymin = int(root.find('object').find('bndbox').find('ymin').text)
    ymax = int(root.find('object').find('bndbox').find('ymax').text)

    # Calculate the area of the bounding box
    area = (xmax - xmin) * (ymax - ymin)

    # sanity check
    print(area/(image_width*image_height))

    # Add the area to the total area
    total_landslide_area += area

total_image_area = len(labels) * image_width * image_height
pixel_sq_to_m_sq = 10 * 10

print(f'Total area of landslides in l4s: {total_landslide_area * pixel_sq_to_m_sq / 1000000} square kms')
print(f'Total image area in l4s: {total_image_area * pixel_sq_to_m_sq / 1000000} square kms')

2057
0.0166015625
0.01348876953125
0.03717041015625
0.056396484375
0.0225830078125
0.1016845703125
0.19793701171875
0.00732421875
0.056396484375
0.05615234375
0.02435302734375
0.0166015625
0.0087890625
0.0186767578125
0.0263671875
0.015380859375
0.0111083984375
0.00732421875
0.0946044921875
0.0115966796875
0.01971435546875
0.099365234375
0.0093994140625
0.0487060546875
0.05810546875
0.59765625
0.017578125
0.543701171875
0.0164794921875
0.015380859375
0.066650390625
0.0166015625
0.03228759765625
0.01507568359375
0.0311279296875
0.0333251953125
0.09759521484375
0.0162353515625
0.034912109375
0.01513671875
0.0087890625
0.0087890625
0.10894775390625
0.048828125
0.0076904296875
0.006591796875
2.46844482421875
0.0146484375
0.07000732421875
0.72802734375
0.041015625
0.22430419921875
0.02490234375
0.032958984375
0.0589599609375
0.89556884765625
0.015625
0.0076904296875
0.1309814453125
0.201416015625
0.07305908203125
0.00726318359375
0.0238037109375
0.015380859375
0.04742431640625
0.12036132812