In [1]:
import xarray as xr
import pandas as pd

# ds = xr.open_dataset('IBTrACS.nc')
ds = xr.open_dataset('IBTrACS.since1980.v04r01.nc')
print(ds)

<xarray.Dataset> Size: 1GB
Dimensions:           (storm: 4827, date_time: 360, quadrant: 4)
Coordinates:
    time              (storm, date_time) datetime64[ns] 14MB ...
    lat               (storm, date_time) float32 7MB ...
    lon               (storm, date_time) float32 7MB ...
Dimensions without coordinates: storm, date_time, quadrant
Data variables: (12/159)
    numobs            (storm) float32 19kB ...
    sid               (storm) |S13 63kB ...
    season            (storm) float32 19kB ...
    number            (storm) int16 10kB ...
    basin             (storm, date_time) |S2 3MB ...
    subbasin          (storm, date_time) |S2 3MB ...
    ...                ...
    reunion_gust      (storm, date_time) float32 7MB ...
    reunion_gust_per  (storm, date_time) float32 7MB ...
    usa_seahgt        (storm, date_time) float32 7MB ...
    usa_searad        (storm, date_time, quadrant) float32 28MB ...
    storm_speed       (storm, date_time) float32 7MB ...
    storm_dir       

In [2]:
df0 = ds[["sid","basin","name","wmo_pres"]].to_dataframe().reset_index()
df0 = df0.query("basin==b'NA' & name!=b'UNNAMED'") 
df0["date"] = df0["time"].dt.date
df0

Unnamed: 0,storm,date_time,sid,basin,name,wmo_pres,time,lat,lon,date
19440,54,0,b'1980214N11330',b'NA',b'ALLEN',,1980-07-31 12:00:00.000040448,11.000000,-30.000000,1980-07-31
19441,54,1,b'1980214N11330',b'NA',b'ALLEN',,1980-07-31 15:00:00.000040448,11.000000,-31.100000,1980-07-31
19442,54,2,b'1980214N11330',b'NA',b'ALLEN',,1980-07-31 18:00:00.000040448,10.900000,-32.200001,1980-07-31
19443,54,3,b'1980214N11330',b'NA',b'ALLEN',,1980-07-31 21:00:00.000040448,10.800000,-33.299999,1980-07-31
19444,54,4,b'1980214N11330',b'NA',b'ALLEN',1010.0,1980-08-01 00:00:00.000040448,10.800000,-34.299999,1980-08-01
...,...,...,...,...,...,...,...,...,...,...
1737034,4825,34,b'2025184N26277',b'NA',b'CHANTAL',,2025-07-07 12:00:00.000039936,37.200001,-77.000000,2025-07-07
1737035,4825,35,b'2025184N26277',b'NA',b'CHANTAL',,2025-07-07 15:00:00.000039936,37.700001,-76.300003,2025-07-07
1737036,4825,36,b'2025184N26277',b'NA',b'CHANTAL',,2025-07-07 18:00:00.000039936,38.299999,-75.699997,2025-07-07
1737037,4825,37,b'2025184N26277',b'NA',b'CHANTAL',,2025-07-07 21:00:00.000039936,38.799999,-75.199997,2025-07-07


In [7]:
min_pressure_per_storm = df0.groupby("sid")["wmo_pres"].min()

# Filter for storms with a minimum pressure lower than 990
storms_to_select = min_pressure_per_storm[min_pressure_per_storm < 980].index

# Select the rows from the original DataFrame for the filtered storms
df = df0[df0["sid"].isin(storms_to_select)].copy()
df
# df.query("name==b'ALLEN' & wmo_pres<1000")

Unnamed: 0,storm,date_time,sid,basin,name,wmo_pres,time,lat,lon,date
19440,54,0,b'1980214N11330',b'NA',b'ALLEN',,1980-07-31 12:00:00.000040448,11.000000,-30.000000,1980-07-31
19441,54,1,b'1980214N11330',b'NA',b'ALLEN',,1980-07-31 15:00:00.000040448,11.000000,-31.100000,1980-07-31
19442,54,2,b'1980214N11330',b'NA',b'ALLEN',,1980-07-31 18:00:00.000040448,10.900000,-32.200001,1980-07-31
19443,54,3,b'1980214N11330',b'NA',b'ALLEN',,1980-07-31 21:00:00.000040448,10.800000,-33.299999,1980-07-31
19444,54,4,b'1980214N11330',b'NA',b'ALLEN',1010.0,1980-08-01 00:00:00.000040448,10.800000,-34.299999,1980-08-01
...,...,...,...,...,...,...,...,...,...,...
1719051,4775,51,b'2024309N13283',b'NA',b'RAFAEL',1001.0,2024-11-10 00:00:00.000039936,25.600000,-91.699997,2024-11-10
1719052,4775,52,b'2024309N13283',b'NA',b'RAFAEL',,2024-11-10 03:00:00.000039936,25.799999,-91.800003,2024-11-10
1719053,4775,53,b'2024309N13283',b'NA',b'RAFAEL',1003.0,2024-11-10 06:00:00.000039936,25.900000,-91.900002,2024-11-10
1719054,4775,54,b'2024309N13283',b'NA',b'RAFAEL',,2024-11-10 09:00:00.000039936,26.000000,-91.900002,2024-11-10


In [8]:
# Step 1: Get minimum pressure per storm
min_pressures = df.groupby("sid")["wmo_pres"].min()

# Step 2: Compute pressure threshold for each storm
thresholds = min_pressures + 0.85 * (1000 - min_pressures)

# Step 3: Map thresholds back to original DataFrame by storm
df["min_pressure"] = df["sid"].map(min_pressures)
df["threshold"] = df["sid"].map(thresholds)

# Step 4: Filter rows where pressure ≤ threshold
df = df[df["wmo_pres"] <= df["threshold"]]
df.loc[:,'sid'] = df['sid'].str.decode('utf-8')
df.loc[:,'name'] = df['name'].str.decode('utf-8')
df
df.to_csv("storm_info.csv", sep=',' ,index=False)

In [9]:
t = df.groupby("sid").agg(name=("name","first"),pres=("wmo_pres","min"),start_time=("time","min"),end_time=("time","max")).reset_index()
t.loc[:, "start_time"] = t["start_time"].dt.floor('6h')
t.loc[:, "end_time"] = t["end_time"].dt.ceil('6h')
t["forecast_lead"] = ((t["end_time"] - t["start_time"]).dt.total_seconds() // 3600).astype(int)
t['date'] = t['start_time'].dt.strftime('%Y%m%d')
t['time'] = t['start_time'].dt.strftime('%H%M')
t[['name','date','time','forecast_lead']].to_csv("model_input.csv", sep=' ' ,index=False)