In [2]:
import pandas as pd

# load all datasets
fire = pd.read_csv("fire_data.csv")
topo = pd.read_csv("topo_data.csv")
weather = pd.read_csv("weather_data.csv")

# --- clean date/time formats ---
fire['acq_datetime'] = pd.to_datetime(fire['acq_date'] + ' ' + fire['acq_time'].astype(str).str.zfill(4),
                                      format='%d-%m-%Y %H%M', errors='coerce')
weather['time'] = pd.to_datetime(weather['time'], format='%d-%m-%Y %H:%M', errors='coerce')

# --- drop duplicates / missing values ---
fire = fire.dropna(subset=['latitude','longitude'])
topo = topo.drop_duplicates(subset=['latitude','longitude'])

fire['time'] = fire['acq_datetime'].dt.floor('H')


  fire['time'] = fire['acq_datetime'].dt.floor('H')


In [3]:
fire_topo = pd.merge_asof(
    fire.sort_values('latitude'),
    topo.sort_values('latitude'),
    on='latitude',
    direction='nearest',
    tolerance=0.01
)


In [6]:
# Combine date and time safely
fire['acq_time'] = fire['acq_time'].astype(str).str.zfill(4)
fire['acq_datetime'] = pd.to_datetime(
    fire['acq_date'] + ' ' + fire['acq_time'],
    format='%d-%m-%Y %H%M',
    errors='coerce'
)

# Drop rows where we still couldn't parse
fire = fire.dropna(subset=['acq_datetime'])

# Round to the nearest hour
fire['time'] = fire['acq_datetime'].dt.floor('H')


  fire['time'] = fire['acq_datetime'].dt.floor('H')


In [7]:
fire['time'].isna().sum()


np.int64(0)

In [8]:
import pandas as pd

# Load again just to be clean
fire = pd.read_csv("fire_data.csv")

# Ensure both are strings
fire['acq_date'] = fire['acq_date'].astype(str)
fire['acq_time'] = fire['acq_time'].astype(str).str.zfill(4)  # pad with leading zeros

# Combine into one proper datetime
fire['acq_datetime'] = pd.to_datetime(
    fire['acq_date'] + ' ' + fire['acq_time'],
    format='%d-%m-%Y %H%M',
    errors='coerce'
)

# Check how many failed
print("Unparsed:", fire['acq_datetime'].isna().sum())

# Create the hourly time column
fire['time'] = fire['acq_datetime'].dt.floor('H')

print("Parsed example:")
print(fire[['acq_date','acq_time','acq_datetime','time']].head())


Unparsed: 0
Parsed example:
     acq_date acq_time        acq_datetime                time
0  01-01-2020     1002 2020-01-01 10:02:00 2020-01-01 10:00:00
1  01-01-2020     1002 2020-01-01 10:02:00 2020-01-01 10:00:00
2  01-01-2020     1002 2020-01-01 10:02:00 2020-01-01 10:00:00
3  01-01-2020     1002 2020-01-01 10:02:00 2020-01-01 10:00:00
4  01-01-2020     1002 2020-01-01 10:02:00 2020-01-01 10:00:00


  fire['time'] = fire['acq_datetime'].dt.floor('H')


In [12]:
import pandas as pd

weather = pd.read_csv("weather_data.csv")

# Force datetime parsing
weather['time'] = pd.to_datetime(weather['time'], format='%d-%m-%Y %H:%M', errors='coerce')

# Drop rows where it failed
weather = weather.dropna(subset=['time'])

# Sort by time (required by merge_asof)
weather = weather.sort_values('time').reset_index(drop=True)

print("✅ Weather times parsed:", weather['time'].isna().sum() == 0)
print(weather.head())


✅ Weather times parsed: True
                 time  temperature_2m  relative_humidity_2m  wind_speed_10m  \
0 2020-01-01 00:00:00            17.2                    41             3.2   
1 2020-01-01 01:00:00            12.9                    58             2.9   
2 2020-01-01 02:00:00             9.0                    61             7.1   
3 2020-01-01 03:00:00             8.0                    58             4.5   
4 2020-01-01 04:00:00             8.9                    54             5.7   

   wind_direction_10m  precipitation  
0                 207            0.0  
1                  30            0.0  
2                  30            0.0  
3                  29            0.0  
4                  35            0.0  


In [13]:
fire['time'] = pd.to_datetime(fire['time'])
fire = fire.sort_values('time').reset_index(drop=True)
topo = topo.drop_duplicates(subset=['latitude','longitude'])


In [14]:
fire_topo = pd.merge(fire, topo, on=['latitude','longitude'], how='left')

combined = pd.merge_asof(
    fire_topo,
    weather,
    on='time',
    direction='nearest'
)


In [15]:
combined.info()
combined.head()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29208 entries, 0 to 29207
Data columns (total 29 columns):
 #   Column                  Non-Null Count  Dtype         
---  ------                  --------------  -----         
 0   latitude                29208 non-null  float64       
 1   longitude               29208 non-null  float64       
 2   brightness              29208 non-null  float64       
 3   scan                    29208 non-null  float64       
 4   track                   29208 non-null  float64       
 5   acq_date                29208 non-null  object        
 6   acq_time                29208 non-null  object        
 7   satellite               29208 non-null  object        
 8   instrument              29208 non-null  object        
 9   confidence              29208 non-null  object        
 10  version                 29208 non-null  int64         
 11  bright_t31              29208 non-null  float64       
 12  frp                     29208 non-null  float6

Unnamed: 0,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,...,aspect,vegetation_cover,vegetation_type,fuel_vegetation_cover,fuel_vegetation_height,temperature_2m,relative_humidity_2m,wind_speed_10m,wind_direction_10m,precipitation
0,34.33472,-118.52109,298.29,0.52,0.42,01-01-2020,1002,N20,VIIRS,n,...,,,,,,14.7,34,7.4,313,0.0
1,33.77607,-118.22866,299.05,0.55,0.43,01-01-2020,1002,N20,VIIRS,n,...,,,,,,14.7,34,7.4,313,0.0
2,34.15387,-118.19527,306.91,0.55,0.42,01-01-2020,1002,N20,VIIRS,n,...,,,,,,14.7,34,7.4,313,0.0
3,34.03726,-118.10534,295.22,0.55,0.43,01-01-2020,1002,N20,VIIRS,n,...,,,,,,14.7,34,7.4,313,0.0
4,33.85507,-118.33278,297.68,0.54,0.42,01-01-2020,1002,N20,VIIRS,n,...,,,,,,14.7,34,7.4,313,0.0


In [16]:
combined.info()
combined.describe()
combined.isna().sum().sort_values(ascending=False).head(10)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29208 entries, 0 to 29207
Data columns (total 29 columns):
 #   Column                  Non-Null Count  Dtype         
---  ------                  --------------  -----         
 0   latitude                29208 non-null  float64       
 1   longitude               29208 non-null  float64       
 2   brightness              29208 non-null  float64       
 3   scan                    29208 non-null  float64       
 4   track                   29208 non-null  float64       
 5   acq_date                29208 non-null  object        
 6   acq_time                29208 non-null  object        
 7   satellite               29208 non-null  object        
 8   instrument              29208 non-null  object        
 9   confidence              29208 non-null  object        
 10  version                 29208 non-null  int64         
 11  bright_t31              29208 non-null  float64       
 12  frp                     29208 non-null  float6

fuel_vegetation_cover     29208
elevation                 29208
slope                     29208
aspect                    29208
vegetation_cover          29208
fuel_vegetation_height    29208
vegetation_type           29208
acq_date                      0
latitude                      0
longitude                     0
dtype: int64

In [18]:
# Round coordinates to 3 or 4 decimal places (depends on your topo grid resolution)
fire['latitude'] = fire['latitude'].round(3)
fire['longitude'] = fire['longitude'].round(3)
topo['latitude'] = topo['latitude'].round(3)
topo['longitude'] = topo['longitude'].round(3)

fire_topo = pd.merge(fire, topo, on=['latitude', 'longitude'], how='left')


In [19]:
fire_topo[['elevation', 'slope', 'aspect']].isna().sum()


elevation    28978
slope        28978
aspect       28978
dtype: int64

In [20]:
combined = pd.merge_asof(
    fire_topo.sort_values('time'),
    weather.sort_values('time'),
    on='time',
    direction='nearest'
)


In [21]:
combined.info()
combined.head()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29208 entries, 0 to 29207
Data columns (total 29 columns):
 #   Column                  Non-Null Count  Dtype         
---  ------                  --------------  -----         
 0   latitude                29208 non-null  float64       
 1   longitude               29208 non-null  float64       
 2   brightness              29208 non-null  float64       
 3   scan                    29208 non-null  float64       
 4   track                   29208 non-null  float64       
 5   acq_date                29208 non-null  object        
 6   acq_time                29208 non-null  object        
 7   satellite               29208 non-null  object        
 8   instrument              29208 non-null  object        
 9   confidence              29208 non-null  object        
 10  version                 29208 non-null  int64         
 11  bright_t31              29208 non-null  float64       
 12  frp                     29208 non-null  float6

Unnamed: 0,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,...,aspect,vegetation_cover,vegetation_type,fuel_vegetation_cover,fuel_vegetation_height,temperature_2m,relative_humidity_2m,wind_speed_10m,wind_direction_10m,precipitation
0,34.335,-118.521,298.29,0.52,0.42,01-01-2020,1002,N20,VIIRS,n,...,,,,,,14.7,34,7.4,313,0.0
1,33.776,-118.229,299.05,0.55,0.43,01-01-2020,1002,N20,VIIRS,n,...,,,,,,14.7,34,7.4,313,0.0
2,34.154,-118.195,306.91,0.55,0.42,01-01-2020,1002,N20,VIIRS,n,...,,,,,,14.7,34,7.4,313,0.0
3,34.037,-118.105,295.22,0.55,0.43,01-01-2020,1002,N20,VIIRS,n,...,,,,,,14.7,34,7.4,313,0.0
4,33.855,-118.333,297.68,0.54,0.42,01-01-2020,1002,N20,VIIRS,n,...,,,,,,14.7,34,7.4,313,0.0
