In [92]:
import os
import cv2
import argparse
import json
import numpy as np
import pandas as pd
import shutil

from tqdm import tqdm
from os import path, walk, makedirs
from skimage.io import imread
from shapely import wkt
from shapely.geometry import mapping, Polygon
from cv2 import fillPoly, imwrite

In [4]:
print("Current Dir",os.getcwd())
os.chdir('../..')
print("Current Dir",os.getcwd())

Current Dir /Users/yaminigotimukul/DataScience/Berekley/Semesters/Spring_2024/repo/alivio/src
Current Dir /Users/yaminigotimukul/DataScience/Berekley/Semesters/Spring_2024/repo/alivio


In [37]:
ROOT_DIR=os.getcwd()
BASE_DATA_DIR = os.path.join(ROOT_DIR, 'data')
print("BASE_DIR : ", BASE_DATA_DIR)
DATA_DIR = os.path.join(BASE_DATA_DIR, 'xview_building_damage/train/sample')
print("DATA_DIR : ", DATA_DIR)
SAM_CKPT_DIR=os.path.join(BASE_DATA_DIR, 'utils')
print("SAM_CKPT_DIR : ", SAM_CKPT_DIR)
metadata_csv_path=os.path.join(BASE_DATA_DIR, 'xview_building_damage/train/xview2_processed.csv')
print("Metadata File path : ",metadata_csv_path )
xbd_train = os.path.join(BASE_DATA_DIR, 'xview_building_damage', 'train', 'clean_meta_csv')

bd_dir=os.path.join(BASE_DATA_DIR, 'xview_building_damage')
challeng_dir = os.path.join(bd_dir, 'challenge')
train_out_dir=os.path.join(challeng_dir, 'train', 'disaster')
test_out_dir=os.path.join(challeng_dir, 'test', 'disaster')
hold_out_dir=os.path.join(challeng_dir, 'hold', 'disaster')

BASE_DIR :  /Users/yaminigotimukul/DataScience/Berekley/Semesters/Spring_2024/repo/alivio/data
DATA_DIR :  /Users/yaminigotimukul/DataScience/Berekley/Semesters/Spring_2024/repo/alivio/data/xview_building_damage/train/sample
SAM_CKPT_DIR :  /Users/yaminigotimukul/DataScience/Berekley/Semesters/Spring_2024/repo/alivio/data/utils
Metadata File path :  /Users/yaminigotimukul/DataScience/Berekley/Semesters/Spring_2024/repo/alivio/data/xview_building_damage/train/xview2_processed.csv


### Metafiles Cleanup Code

In [38]:
def get_df_with_class_numeric_labels(df_name):
    df_name['damage'].fillna('pre', inplace=True)
    df_name['damage_class']=df_name['damage']
    keys=list(df_name['damage_class'].value_counts().keys())
    df_name['damage_class']=df_name['damage_class'].apply(keys.index)
    df_name['damage_class'].value_counts()
    return df_name

In [58]:
def pre_process_meta_data(meta_df, out_dir, disaster_name ='hurricane-florence', dataset='train'):
    hc_disaster = meta_df[meta_df['disaster']==disaster_name]
    hc_train = hc_disaster[hc_disaster['dataset']== dataset]
    df_disaster = hc_train[hc_train['image_polygon'].notna()]
    df_disaster['mask_file_names'] = df_disaster['img_name'].str.replace('.png', '_')+df_disaster['building_id']+'.png'
    df_disaster_class_labels = get_df_with_class_numeric_labels(df_disaster)
    df_disaster_class_labels.to_csv(out_dir+'/hc_'+dataset+"_"+disaster_name+".csv")

In [42]:
df_all = pd.read_csv(metadata_csv_path)

In [None]:
### Michael
pre_process_meta_data(df_all, train_out_dir, 'hurricane-michael', 'train')
pre_process_meta_data(df_all, test_out_dir, 'hurricane-michael', 'test')
pre_process_meta_data(df_all, hold_out_dir, 'hurricane-michael', 'hold')

In [None]:


### Florence
pre_process_meta_data(df_all, xbd_train, 'hurricane-florence', 'train')
pre_process_meta_data(df_all, xbd_train, 'hurricane-florence', 'test')
pre_process_meta_data(df_all, xbd_train, 'hurricane-florence', 'hold')

### Matthew
pre_process_meta_data(df_all, xbd_train, 'hurricane-matthew', 'train')
pre_process_meta_data(df_all, xbd_train, 'hurricane-matthew', 'test')
pre_process_meta_data(df_all, xbd_train, 'hurricane-matthew', 'hold')

### Harvey
pre_process_meta_data(df_all, xbd_train, 'hurricane-harvey', 'train')
pre_process_meta_data(df_all, xbd_train, 'hurricane-harvey', 'test')
pre_process_meta_data(df_all, xbd_train, 'hurricane-harvey', 'hold')

In [60]:
df_hmc_hold=pd.read_csv(os.path.join(hold_out_dir, 'hc_hold_hurricane-michael.csv'))
df_hmc_train=pd.read_csv(os.path.join(train_out_dir, 'hc_train_hurricane-michael.csv'))
df_hmc_test=pd.read_csv(os.path.join(test_out_dir, 'hc_test_hurricane-michael.csv'))

### Spliting the images by disaster

#### Train

In [5]:
!python src/utils/mask_generators/split_into_disasters.py --input data/xview_building_damage/challenge/train/raw --output data/xview_building_damage/challenge/train/disaster

#### Hold

In [6]:
!python src/utils/mask_generators/split_into_disasters.py --input data/xview_building_damage/challenge/hold/raw --output data/xview_building_damage/challenge/hold/disaster

#### Test

In [7]:
!python src/utils/mask_generators/split_into_disasters.py --input data/xview_building_damage/challenge/test/raw --output data/xview_building_damage/challenge/test/disaster

### Creating Masks using Challenge Baseline Script

This code is adapted from the original challenge baseline code base for generating masks and been modified slightly to generate the masks. The link for the original code is https://github.com/DIUx-xView/xView2_baseline/blob/master/utils/mask_polygons.py

In [29]:
#!python src/utils/mask_generators/mask_polygons.py --input data/xview_building_damage/challenge/output --single-file --border 2
### Adapting the contents of the mask_polygons.py below
def get_dimensions(file_path):
    """
    :param file_path: The path of the file 
    :return: returns (width,height,channels)
    """
    # Open the image we are going to mask
    pil_img = imread(file_path)
    img = np.array(pil_img)
    w, h, c = img.shape
    return (w, h, c)

def mask_polygons_separately(size, shapes):
    """
    :param size: A tuple of the (width,height,channels)
    :param shapes: A list of points in the polygon from get_feature_info
    :returns: a dict of masked polygons with the shapes filled in from cv2.fillPoly
    """
    # For each WKT polygon, read the WKT format and fill the polygon as an image
    masked_polys = {}

    for u in shapes:
        sh = shapes[u]
        mask_img = np.zeros(size, np.uint8)
        i = fillPoly(mask_img, [sh], (255, 255, 255))
        masked_polys[u] = i

    return masked_polys

def mask_polygons_together(size, shapes):
    """
    :param size: A tuple of the (width,height,channels)
    :param shapes: A list of points in the polygon from get_feature_info
    :returns: A numpy array with the polygons filled 255s where there's a building and 0 where not 
    """
    # For each WKT polygon, read the WKT format and fill the polygon as an image
    mask_img = np.zeros(size, np.uint8)

    for u in shapes:
        blank =  np.zeros(size, np.uint8)
        poly = shapes[u] 
        fillPoly(blank, [poly], (1, 1, 1))
        mask_img += blank
    
    # Here we are taking the overlap (+=) and squashing it back to 0
    mask_img[mask_img > 1] = 0

    # Finally we are taking all 1s and making it pure white (255)
    mask_img[mask_img == 1] = 255

    return mask_img

def mask_polygons_together_with_border(size, shapes, border):
    """
    :param size: A tuple of the (width,height,channels)
    :param shapes: A list of points in the polygon from get_feature_info
    :returns: a dict of masked polygons with the shapes filled in from cv2.fillPoly
    """

    # For each WKT polygon, read the WKT format and fill the polygon as an image
    mask_img = np.zeros(size, np.uint8)

    for u in shapes:
        blank =  np.zeros(size, np.uint8)
        # Each polygon stored in shapes is a np.ndarray
        poly = shapes[u]
        
        # Creating a shapely polygon object out of the numpy array 
        polygon = Polygon(poly)

        # Getting the center points from the polygon and the polygon points
        (poly_center_x, poly_center_y) = polygon.centroid.coords[0]
        polygon_points = polygon.exterior.coords

        # Setting a new polygon with each X,Y manipulated based off the center point
        shrunk_polygon = []
        for (x,y) in polygon_points:
            if x < poly_center_x:
                x += border
            elif x > poly_center_x:
                x -= border

            if y < poly_center_y:
                y += border
            elif y > poly_center_y:
                y -= border

            shrunk_polygon.append([x,y])
        
        # Transforming the polygon back to a np.ndarray
        ns_poly = np.array(shrunk_polygon, np.int32)
  
        # Filling the shrunken polygon to add a border between close polygons
        fillPoly(blank, [ns_poly], (1, 1, 1))
        mask_img += blank
    
    mask_img[mask_img > 1] = 0
    mask_img[mask_img == 1] = 255
    return mask_img

def save_masks(masks, output_path, mask_file_name):
    """
    :param masks: dictionary of UID:masked polygons from mask_polygons_separately()
    :param output_path: path to save the masks
    :param mask_file_name: the file name the masks should have 
    """
    # For each filled polygon, write out a separate file, increasing the name
    for m in masks:
        final_out = path.join(output_path,
                              mask_file_name + '_{}.png'.format(m))
        imwrite(final_out, masks[m])

def save_one_mask(masks, output_path, mask_file_name):
    """
    :param masks: list of masked polygons from the mask_polygons_separately function 
    :param output_path: path to save the masks
    :param mask_file_name: the file name the masks should have 
    """
    # For each filled polygon, write the mask shape out to the file per image
    mask_file_name = path.join(output_path, mask_file_name + '.png')
    imwrite(mask_file_name, masks)
    

def read_json(json_path):
    """
    :param json_path: path to load json from
    :returns: a python dictionary of json features
    """
    annotations = json.load(open(json_path))
    return annotations


def get_feature_info(feature):
    """
    :param feature: a python dictionary of json labels
    :returns: a list mapping of polygons contained in the image 
    """
    # Getting each polygon points from the json file and adding it to a dictionary of uid:polygons
    props = {}

    for feat in feature['features']['xy']:
        feat_shape = wkt.loads(feat['wkt'])
        coords = list(mapping(feat_shape)['coordinates'][0])
        props[feat['properties']['uid']] = (np.array(coords, np.int32))

    return props


def mask_chips(json_path, images_directory, output_directory, single_file, border, pre_post='_post'):
    """
    :param json_path: path to find multiple json files for the chips
    :param images_directory: path to the directory containing the images to be masked
    :param output_directory: path to the directory where masks are to be saved
    :param single_file: a boolean value to see if masks should be saved a single file or multiple
    """
    # For each feature in the json we will create a separate mask
    # Getting all files in the directory provided for jsons
    jsons = [j for j in next(walk(json_path))[2] if pre_post in j]

    # After removing non-json items in dir (if any)
    for j in tqdm([j for j in jsons if j.endswith('json')],
                  unit='poly',
                  leave=False):
        # Our chips start off in life as PNGs
        chip_image_id = path.splitext(j)[0] + '.png'
        mask_file = path.splitext(j)[0]

        # Loading the per chip json
        j_full_path = path.join(json_path, j)
        chip_json = read_json(j_full_path)

        # Getting the full chip path, and loading the size dimensions
        chip_file = path.join(images_directory, chip_image_id)
        chip_size = get_dimensions(chip_file)

        # Reading in the polygons from the json file
        polys = get_feature_info(chip_json)

        # Getting a list of the polygons and saving masks as separate or single image files
        if len(polys) > 0:
            if single_file:
                if border > 0:
                    masked_polys = mask_polygons_together_with_border(chip_size, polys, border)
                else:
                    masked_polys = mask_polygons_together(chip_size, polys)
                save_one_mask(masked_polys, output_directory, mask_file)
            else:
                masked_polys = mask_polygons_separately(chip_size, polys)
                save_masks(masked_polys, output_directory, mask_file)

In [130]:
def create_masks(dataSplit='train', pre_post_str="post", single_file=True):
    # Parse command line arguments
    parser = argparse.ArgumentParser(
        description=
        """mask_polygons.py: Takes in xBD dataset and masks polygons in the image\n\n
        WARNING: This could lead to hundreds of output images per input\n""")
    
    parser.add_argument('--input',
                        required=True,
                        metavar="/path/to/xBD/",
                        help='Path to parent dataset directory "xBD"')
    parser.add_argument('--single-file', 
                        action='store_true',
                        help='use to save all masked polygon instances to a single file rather than one polygon per mask file')
    parser.add_argument('--border',
                        default=0,
                        type=int,
                        metavar="positive integer for pixel border (e.g. 1)",
                        help='Positive integer used to shrink the polygon by')
    
    if single_file: ### Creates all the masks in single file
        args = parser.parse_args(['--input', 'data/xview_building_damage/challenge/output' , '--single-file', '--border', '2'])
    else:
        args = parser.parse_args(['--input', 'data/xview_building_damage/challenge/'+dataSplit+'/disaster' , '--border', '2'])
    
    # Getting the list of the disaster types under the xBD directory
    disasters = next(walk(args.input))[1]
    
    for disaster in tqdm(disasters, desc='Masking', unit='disaster'):
        # Create the full path to the images, labels, and mask output directories
            print("disaster ", disaster)
            image_dir = path.join(args.input, disaster, 'images')
            json_dir = path.join(args.input, disaster, 'labels')
            output_dir = path.join(args.input, disaster, 'masks', pre_post_str)
        
            if not path.isdir(image_dir):
                print(
                    "Error, could not find image files in {}.\n\n"
                    .format(image_dir),
                    file=stderr)
                exit(2)
        
            if not path.isdir(json_dir):
                print(
                    "Error, could not find labels in {}.\n\n"
                    .format(json_dir),
                    file=stderr)
                exit(3)
        
            if not path.isdir(output_dir):
                makedirs(output_dir)
        
            mask_chips(json_dir, image_dir, output_dir, args.single_file, args.border, '_' + pre_post_str)

In [None]:
create_masks('train', "post", single_file=False)

In [None]:
create_masks('train', "post", single_file=False)  ## Completed Locally
create_masks('train', "pre", single_file=False)

In [None]:
create_masks('hold', "pre", single_file=False)
create_masks('hold', "post", single_file=False)

In [None]:
create_masks('test', "pre", single_file=False)
create_masks('test', "post", single_file=False)

### Prepare for Pytorch Masks for Models

In [129]:
def sort_masks_by_class(top_dir, meta_df, disaster_name='hurricane-michael', post=True):
    root_path=os.path.join(top_dir, disaster_name)
    disas_post_mask= os.path.join(root_path,'masks', 'post') #source
    print("Source root : ", disas_post_mask)
    disas_class_path=os.path.join(root_path, 'class', 'post')
    print("Destination root : ", disas_class_path)
    if post :
        df = meta_df[meta_df['is_post_image'] == post]
    else:
        df = meta_df[meta_df['is_pre_image'] != post]
    print("Started moving the mask files to class folder for ", disaster_name)
    for idx, file_name in enumerate(df['mask_file_names']):
        source = os.path.join(disas_post_mask, df.iloc[idx]['mask_file_names'])
        destination = os.path.join(disas_class_path, df.iloc[idx]['damage'])
        if os.path.exists(destination):
            pass
        else:
            print( "Creating dir for " , df.iloc[idx]['damage'])
            os.mkdir(destination)
        
        if os.path.exists(source):
            shutil.copy(source, destination)
    print("Finshed moving the mask files to class folder for ", disaster_name)

In [109]:
df_hmc_train['dataset'].value_counts()
df_hmc_train['is_post_image'].value_counts()

mask_file_names
hurricane-michael_00000000_pre_disaster_dc6cb95a-4105-4cff-ae51-562fe8b949e5.png     1
hurricane-michael_00000406_post_disaster_193ea56b-13a9-411b-bf88-b1fd467385e7.png    1
hurricane-michael_00000406_post_disaster_92cd376a-8b4f-495b-997c-7da00e8da079.png    1
hurricane-michael_00000406_post_disaster_cda45860-d351-449c-936f-d0cd97fd4bc6.png    1
hurricane-michael_00000406_post_disaster_b773e571-00a3-4c89-9138-2d1c54bbf785.png    1
                                                                                    ..
hurricane-michael_00000214_post_disaster_b58e6d77-f171-435e-a91e-395b1e669647.png    1
hurricane-michael_00000214_post_disaster_1b0540b4-8ed2-4ef3-b00d-b7707fb20944.png    1
hurricane-michael_00000214_post_disaster_cae1b198-0727-47d8-b67b-b71e8b15c764.png    1
hurricane-michael_00000214_post_disaster_46a9778c-3287-42a8-bfe2-16f5e126d378.png    1
hurricane-michael_00000549_pre_disaster_e89ad57f-2670-495e-8fd4-30467a3fb371.png     1
Name: count, Length: 45372,

In [63]:
df_hmc_hold['dataset'].value_counts()

dataset
hold    14316
Name: count, dtype: int64

In [64]:
df_hmc_test['dataset'].value_counts()

dataset
test    11314
Name: count, dtype: int64

In [123]:
sort_masks_by_class(train_out_dir, df_hmc_train, 'hurricane-michael')

Source root :  /Users/yaminigotimukul/DataScience/Berekley/Semesters/Spring_2024/repo/alivio/data/xview_building_damage/challenge/train/disaster/hurricane-michael/masks/post
Destination root :  /Users/yaminigotimukul/DataScience/Berekley/Semesters/Spring_2024/repo/alivio/data/xview_building_damage/challenge/train/disaster/hurricane-michael/class/post
Started moving the mask files to class folder for  hurricane-michael
Finshed moving the mask files to class folder for  hurricane-michael


In [125]:
sort_masks_by_class(hold_out_dir, df_hmc_hold, 'hurricane-michael')
#

Source root :  /Users/yaminigotimukul/DataScience/Berekley/Semesters/Spring_2024/repo/alivio/data/xview_building_damage/challenge/hold/disaster/hurricane-michael/masks/post
Destination root :  /Users/yaminigotimukul/DataScience/Berekley/Semesters/Spring_2024/repo/alivio/data/xview_building_damage/challenge/hold/disaster/hurricane-michael/class/post
Started moving the mask files to class folder for  hurricane-michael
Creating dir for  no-damage
Creating dir for  un-classified
Creating dir for  major-damage
Creating dir for  destroyed
Creating dir for  minor-damage
Finshed moving the mask files to class folder for  hurricane-michael


In [126]:
sort_masks_by_class(test_out_dir, df_hmc_test, 'hurricane-michael')

Source root :  /Users/yaminigotimukul/DataScience/Berekley/Semesters/Spring_2024/repo/alivio/data/xview_building_damage/challenge/test/disaster/hurricane-michael/masks/post
Destination root :  /Users/yaminigotimukul/DataScience/Berekley/Semesters/Spring_2024/repo/alivio/data/xview_building_damage/challenge/test/disaster/hurricane-michael/class/post
Started moving the mask files to class folder for  hurricane-michael
Creating dir for  major-damage
Creating dir for  no-damage
Creating dir for  minor-damage
Creating dir for  destroyed
Creating dir for  un-classified
Finshed moving the mask files to class folder for  hurricane-michael


In [128]:
sort_masks_by_class(train_out_dir, df_hmc_train, 'hurricane-michael', post=False)
sort_masks_by_class(hold_out_dir, df_hmc_hold, 'hurricane-michael', post=False)
sort_masks_by_class(test_out_dir, df_hmc_test, 'hurricane-michael', post=False)

Source root :  /Users/yaminigotimukul/DataScience/Berekley/Semesters/Spring_2024/repo/alivio/data/xview_building_damage/challenge/train/disaster/hurricane-michael/masks/post
Destination root :  /Users/yaminigotimukul/DataScience/Berekley/Semesters/Spring_2024/repo/alivio/data/xview_building_damage/challenge/train/disaster/hurricane-michael/class/post
Started moving the mask files to class folder for  hurricane-michael
Creating dir for  pre
Finshed moving the mask files to class folder for  hurricane-michael
Source root :  /Users/yaminigotimukul/DataScience/Berekley/Semesters/Spring_2024/repo/alivio/data/xview_building_damage/challenge/hold/disaster/hurricane-michael/masks/post
Destination root :  /Users/yaminigotimukul/DataScience/Berekley/Semesters/Spring_2024/repo/alivio/data/xview_building_damage/challenge/hold/disaster/hurricane-michael/class/post
Started moving the mask files to class folder for  hurricane-michael
Creating dir for  pre
Finshed moving the mask files to class folder