## Imports

Import everything we need for this notebook.

In [98]:
import json
import pandas as pd
import numpy as np

## Read and Format Data

We need to read the labels created when we first downloaded the data, as well as the IBTrACS data. The IBTrACS data needs to be formatted 

In [100]:
# Read labels from downloaded image data
original_labels_path = '/Users/dylanwhite/Documents/Projects/tropical-cv/data/training/image_data.json'
with open(original_labels_path,'r') as f:
    labels_data = json.load(f)

# Read IBTrACS data
ibtracs_path = '/Users/dylanwhite/Documents/Projects/tropical-cv/data/ibtracs/ibtracs_goes_east.csv'
ibtracs_df = pd.read_csv(ibtracs_path)

# Format IBTrACS data to do the interpolation later
ibtracs_df.replace(' ',np.nan,inplace=True)
ibtracs_df['WMO_PRES'] = ibtracs_df['WMO_PRES'].astype(float)
ibtracs_df['WMO_WIND'] = ibtracs_df['WMO_WIND'].astype(float)

## Interpolate Values

IBTrACS data is shown every 3 hours. Observational data, like wind speed and pressure, are only provided for hours 00, 06, 12, and 18. We downloaded the imagery without concern for this, but we want to assign additional data like wind speed to the image labels which may fall on the odd-numbered hours in the dataset. To fix this, we can group by a particular storm (using the unique `sid` key) and linearly interpolate the missing values. This should be more than good enough for our purposes.

In [101]:
# Interpolate values for odd-numbered hours that were originally missing
groups = []
for sid, group in ibtracs_df.groupby('SID'):
    group[['WMO_WIND','WMO_PRES']] = group[['WMO_WIND','WMO_PRES']].interpolate(method='linear',axis=0)
    groups.append(group)
ibtracs_df = pd.concat(groups)

# Replace NAs that weren't able to be interpolated back the ' '
ibtracs_df.fillna(' ')

Unnamed: 0.1,Unnamed: 0,index,SID,SEASON,NUMBER,NAME,ISO_TIME,NATURE,LAT,LON,WMO_WIND,WMO_PRES,TRACK_TYPE,DIST2LAND,LANDFALL,IFLAG,STORM_SPEED,STORM_DIR
0,0,666805,2017106N36310,2017,20,ARLENE,2017-04-16 06:00:00,ET,35.8,-50.3,55.0,992.0,main,1225,1225,O______________,10,135
1,1,666806,2017106N36310,2017,20,ARLENE,2017-04-16 09:00:00,ET,35.5,-49.9,55.0,990.5,main,1265,1265,P______________,10,135
2,2,666807,2017106N36310,2017,20,ARLENE,2017-04-16 12:00:00,ET,35.1,-49.5,55.0,989.0,main,1316,1316,O______________,10,135
3,3,666808,2017106N36310,2017,20,ARLENE,2017-04-16 15:00:00,ET,34.7,-49.1,55.0,987.5,main,1367,1367,P______________,10,135
4,4,666809,2017106N36310,2017,20,ARLENE,2017-04-16 18:00:00,ET,34.4,-48.7,55.0,986.0,main,1408,1408,O______________,10,135
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18391,19057,721857,2025296N10255,2025,96,SONIA,2025-10-28 18:00:00,TS,15.2,-123.9,,,PROVISIONAL,1625,1625,O______________,9,285
18392,19058,721858,2025296N10255,2025,96,SONIA,2025-10-28 21:00:00,TS,15.3,-124.3,,,PROVISIONAL,1649,1649,P______________,9,285
18393,19059,721859,2025296N10255,2025,96,SONIA,2025-10-29 00:00:00,TS,15.4,-124.8,,,PROVISIONAL,1682,1682,O______________,8,275
18394,19060,721860,2025296N10255,2025,96,SONIA,2025-10-29 03:00:00,TS,15.4,-125.2,,,PROVISIONAL,1714,1714,P______________,8,265


In [108]:
pd.DataFrame(labels_data['images'])

Unnamed: 0,category,file_name,id,width,height,band,original_file,original_ul,track_coordinates,date,df_index
0,positive,/Users/dylanwhite/Documents/Projects/tropical-...,0,1200,1200,13,OR_ABI-L1b-RadF-M6C13_G16_s20222611500205_e202...,"[1176, 2555]","[-66.5, 17.4]",2022-09-18 15:00:00,8466
1,positive,/Users/dylanwhite/Documents/Projects/tropical-...,1,1200,1200,13,OR_ABI-L1b-RadF-M6C13_G16_s20222611500205_e202...,"[1153, 574]","[-106.9, 18.3]",2022-09-18 15:00:00,8415
2,negative,/Users/dylanwhite/Documents/Projects/tropical-...,2,1200,1200,13,OR_ABI-L1b-RadF-M6C13_G16_s20222611500205_e202...,"[3036, 1670]","[-9999, -9999]",2022-09-18 15:00:00,-9999
3,negative,/Users/dylanwhite/Documents/Projects/tropical-...,3,1200,1200,13,OR_ABI-L1b-RadF-M6C13_G16_s20222611500205_e202...,"[3779, 4009]","[-9999, -9999]",2022-09-18 15:00:00,-9999
4,positive,/Users/dylanwhite/Documents/Projects/tropical-...,4,1200,1200,13,OR_ABI-L1b-RadF-M6C13_G16_s20213090000204_e202...,"[131, 3395]","[-39.0, 42.3]",2021-11-05 00:00:00,7595
...,...,...,...,...,...,...,...,...,...,...,...
984,negative,/Users/dylanwhite/Documents/Projects/tropical-...,984,1200,1200,13,OR_ABI-L1b-RadF-M6C13_G16_s20192471800150_e201...,"[596, 1904]","[-9999, -9999]",2019-09-04 18:00:00,-9999
985,positive,/Users/dylanwhite/Documents/Projects/tropical-...,985,1200,1200,13,OR_ABI-L1b-RadF-M3C13_G16_s20171980900382_e201...,"[1417, 194]","[-130.1, 13.3]",2017-07-17 09:00:00,214
986,negative,/Users/dylanwhite/Documents/Projects/tropical-...,986,1200,1200,13,OR_ABI-L1b-RadF-M3C13_G16_s20171980900382_e201...,"[663, 3395]","[-9999, -9999]",2017-07-17 09:00:00,-9999
987,positive,/Users/dylanwhite/Documents/Projects/tropical-...,987,1200,1200,13,OR_ABI-L1b-RadF-M6C13_G16_s20231711200218_e202...,"[1486, 3568]","[-46.3, 11.7]",2023-06-20 12:00:00,8976


In [109]:
tc_observation_labels = []
for image in labels_data['images']:
    if image['category']=='positive':
        ibtracs_row = ibtracs_df.loc[
            (ibtracs_df['LON']==image['track_coordinates'][0]) &
            (ibtracs_df['LAT']==image['track_coordinates'][1]) &
            (ibtracs_df['ISO_TIME']==image['date'])
        ]
        tc_observation_labels.append({
            'image_id':image['id'],
            'nature':ibtracs_row['NATURE'].item(),
            'wind_speed':ibtracs_row['WMO_WIND'].item(),
            'pressure':ibtracs_row['WMO_PRES'].item(),
            'storm_speed':ibtracs_row['STORM_SPEED'].item(),
            'storm_dir':ibtracs_row['STORM_DIR'].item(),
        })

In [110]:
pd.DataFrame(tc_observation_labels)

Unnamed: 0,image_id,nature,wind_speed,pressure,storm_speed,storm_dir
0,0,TS,70.0,988.0,9,310
1,1,TS,40.0,997.0,7,335
2,4,TS,45.0,992.0,5,90
3,6,TS,90.0,972.0,11,280
4,8,TS,45.0,1001.0,10,295
...,...,...,...,...,...,...
490,979,TS,30.0,1007.0,9,285
491,981,TS,52.5,997.0,6,290
492,983,TS,85.0,972.0,7,285
493,985,TS,107.5,958.0,9,295
