In [1]:
import os
import json
import pandas as pd

In [2]:
# JSON files Directory
directory = 'train_images_labels_targets/train/labels'

wind_count = 0

# Iterate through all the JSON files in the directory
for file_name in os.listdir(directory):
    file_path = os.path.join(directory, file_name)
    # Open and read the JSON file
    with open(file_path, 'r') as file:
        data = json.load(file)
        
        # only count wind disaster type metadata
        if data.get('metadata', {}).get('disaster_type') == 'wind':
            wind_count += 1

print(f"\nTotal number of files 'wind': {wind_count}")


Total number of files 'wind': 1162


In [3]:
def read_json_files(directory):
    data = []
    
    # Loop through each file in the specified directory
    for filename in os.listdir(directory):
        file_path = os.path.join(directory, filename)
        with open(file_path, 'r') as file:
            content = json.load(file)

        disaster_type = content['metadata'].get('disaster_type')
        # print(f"Processing file: {filename}, Disaster Type: {disaster_type}")  # Debug output
        
        # Only process files with disaster_type "wind"
        if disaster_type == "wind":
            # Separate Post and Pre by Status
            status = 'Post' if 'post' in filename.lower() else 'Pre'
            record_id = "_".join(filename.split('.')[0].split('_')[:-1]) # only to get disaster name and number
            
            # Split features and metadata
            for feature in content.get('features', {}).get('lng_lat', []):
                feature_data = {
                    'disaster_type': disaster_type,
                    'status': status,
                    'filename' : record_id,
                    'feature_type': feature['properties'].get('feature_type'),
                    'subtype': feature['properties'].get('subtype', None), 
                    'uid': feature['properties'].get('uid'),
                    'wkt': feature['wkt'],
                    # 'sensor': content['metadata'].get('sensor'),
                    # 'provider_asset_type': content['metadata'].get('provider_asset_type'),
                    'gsd': content['metadata'].get('gsd'),
                    'capture_date': content['metadata'].get('capture_date'),
                    'off_nadir_angle': content['metadata'].get('off_nadir_angle'),
                    'pan_resolution': content['metadata'].get('pan_resolution'),
                    'sun_azimuth': content['metadata'].get('sun_azimuth'),
                    'sun_elevation': content['metadata'].get('sun_elevation'),
                    'target_azimuth': content['metadata'].get('target_azimuth'),
                    'disaster': content['metadata'].get('disaster'),
                    'catalog_id': content['metadata'].get('catalog_id'),
                    # 'original_width': content['metadata'].get('original_width'),
                    # 'original_height': content['metadata'].get('original_height'),
                    # 'width': content['metadata'].get('width'),
                    # 'height': content['metadata'].get('height'),
                    'id' : content['metadata'].get('id'), 
                    'img_name': content['metadata'].get('img_name')
                }
                data.append(feature_data)

    df = pd.DataFrame(data)
    return df


In [4]:
hurricane_df = read_json_files(directory)

In [5]:
hurricane_df.head()

Unnamed: 0,disaster_type,status,filename,feature_type,subtype,uid,wkt,gsd,capture_date,off_nadir_angle,pan_resolution,sun_azimuth,sun_elevation,target_azimuth,disaster,catalog_id,id,img_name
0,wind,Post,hurricane-matthew_00000000_post,building,minor-damage,6dd06e25-353d-4ca2-8a93-bbdd44b10435,POLYGON ((-73.73999821958201 18.19645168390706...,2.773366,2016-10-09T15:32:03.000Z,36.067112,0.693665,143.505142,59.831169,199.950531,hurricane-matthew,103001005E6EF000,MjU4NjA5NA.LBIab2DEJgS_Qmx3XLqUPmlPU4E,hurricane-matthew_00000000_post_disaster.png
1,wind,Post,hurricane-matthew_00000000_post,building,minor-damage,29ea07d6-33b5-4c85-9a80-20eea786ab2f,POLYGON ((-73.74012695415038 18.19649906723468...,2.773366,2016-10-09T15:32:03.000Z,36.067112,0.693665,143.505142,59.831169,199.950531,hurricane-matthew,103001005E6EF000,MjU4NjA5NA.LBIab2DEJgS_Qmx3XLqUPmlPU4E,hurricane-matthew_00000000_post_disaster.png
2,wind,Post,hurricane-matthew_00000000_post,building,minor-damage,25e32d78-3761-443d-8afb-e4b1d575ad30,"POLYGON ((-73.74014403688365 18.1965502302046,...",2.773366,2016-10-09T15:32:03.000Z,36.067112,0.693665,143.505142,59.831169,199.950531,hurricane-matthew,103001005E6EF000,MjU4NjA5NA.LBIab2DEJgS_Qmx3XLqUPmlPU4E,hurricane-matthew_00000000_post_disaster.png
3,wind,Post,hurricane-matthew_00000000_post,building,minor-damage,63591a27-a002-4c86-a02c-e5c4282339cc,POLYGON ((-73.74006042030388 18.19652266297088...,2.773366,2016-10-09T15:32:03.000Z,36.067112,0.693665,143.505142,59.831169,199.950531,hurricane-matthew,103001005E6EF000,MjU4NjA5NA.LBIab2DEJgS_Qmx3XLqUPmlPU4E,hurricane-matthew_00000000_post_disaster.png
4,wind,Post,hurricane-matthew_00000000_post,building,minor-damage,5416393e-158e-4bee-9f93-b70df29eab02,POLYGON ((-73.74005471031927 18.19658868660606...,2.773366,2016-10-09T15:32:03.000Z,36.067112,0.693665,143.505142,59.831169,199.950531,hurricane-matthew,103001005E6EF000,MjU4NjA5NA.LBIab2DEJgS_Qmx3XLqUPmlPU4E,hurricane-matthew_00000000_post_disaster.png


In [6]:
hurricane_df.shape

(73250, 18)

In [7]:
hurricane_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 73250 entries, 0 to 73249
Data columns (total 18 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   disaster_type    73250 non-null  object 
 1   status           73250 non-null  object 
 2   filename         73250 non-null  object 
 3   feature_type     73250 non-null  object 
 4   subtype          36625 non-null  object 
 5   uid              73250 non-null  object 
 6   wkt              73250 non-null  object 
 7   gsd              73250 non-null  float64
 8   capture_date     73250 non-null  object 
 9   off_nadir_angle  73250 non-null  float64
 10  pan_resolution   73250 non-null  float64
 11  sun_azimuth      73250 non-null  float64
 12  sun_elevation    73250 non-null  float64
 13  target_azimuth   73250 non-null  float64
 14  disaster         73250 non-null  object 
 15  catalog_id       73250 non-null  object 
 16  id               73250 non-null  object 
 17  img_name    

In [10]:
hurricane_df[hurricane_df['uid']=='97cbad54-550e-4b99-8c36-2eb2c47229c6']

Unnamed: 0,disaster_type,status,filename,feature_type,subtype,uid,wkt,gsd,capture_date,off_nadir_angle,pan_resolution,sun_azimuth,sun_elevation,target_azimuth,disaster,catalog_id,id,img_name
22318,wind,Post,hurricane-matthew_00000329_post,building,minor-damage,97cbad54-550e-4b99-8c36-2eb2c47229c6,POLYGON ((-74.13475790257469 18.64278474590648...,2.722003,2016-10-11T15:59:14.035Z,35.466908,0.678957,156.503387,62.046967,103.106628,hurricane-matthew,103001005F15AA00,MjU4NjQyMw.k4PLtBoCwyZ-_0JDeD4uw8wGK9k,hurricane-matthew_00000329_post_disaster.png
22349,wind,Pre,hurricane-matthew_00000329_pre,building,,97cbad54-550e-4b99-8c36-2eb2c47229c6,POLYGON ((-74.13475790257465 18.64278474590649...,1.957535,2013-06-09T15:51:17.851Z,14.371258,0.488291,69.879898,74.259109,251.317047,hurricane-matthew,103001002468D900,MjU4NTExOQ.ujjqlEdXOVzT5nJeLVJqtGljsk0,hurricane-matthew_00000329_pre_disaster.png


In [14]:
hurricane_df['img_name'].nunique()

1114