In [2]:
# Force garbage collection
import gc
gc.collect()

0

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import pickle
import os

In [3]:
features = pd.read_parquet('../Clean_Data/Weather_Data_w_Veg_Filter_FOD_FPA_Fire_12KM_Merged_Cleaned_Completed_LogDensity_2001_2020.parquet')
features.rename(columns={'Band1': 'population_density'}, inplace=True)
features['date'] = features['day'].dt.strftime('%m-%d')
# features['date'] = features['day'].astype(str).str[5:10]

In [4]:
features.dtypes

lon                                                 float64
lat                                                 float64
day                                          datetime64[ns]
dead_fuel_moisture_1000hr                           float32
dead_fuel_moisture_100hr                            float32
pdsi_pre_interpolated                               float32
pdsi_class                                          float32
max_air_temperature                                 float64
max_relative_humidity                               float32
max_wind_speed                                      float32
min_air_temperature                                 float64
min_relative_humidity                               float32
precipitation_amount                                float32
specific_humidity                                   float32
surface_downwelling_shortwave_flux_in_air           float32
wind_from_direction                                 float32
wind_speed                              

In [5]:
weather_features = ['dead_fuel_moisture_1000hr',
       'dead_fuel_moisture_100hr', 
       'max_air_temperature', 
       'max_relative_humidity', 
       #'max_wind_speed',
       'min_air_temperature', 
       'min_relative_humidity', 
       'precipitation_amount',
       'specific_humidity', 'surface_downwelling_shortwave_flux_in_air',
       'wind_from_direction', 
       'wind_speed', 
       #'wind_direction_category', 
       'SWE', # snow water equivalent
       #'population_density_log',
       #'population_density',
       #'Band1', 
       #'LAI', # leaf area index
       'pdsi' 
       #'IS_FIRE', 
       #'NWCG_CAUSE_CLASSIFICATION',
       #'min_FIRE_SIZE', 'max_FIRE_SIZE', 'Year','veg_type_details','fire_attribute', 
       #'veg', 
       #'slope_avg', 
       #'slope_max',
       #'road_density_km_km2' 
       #'road_density_km_km2_log'
       ]

In [6]:
features.columns

Index(['lon', 'lat', 'day', 'dead_fuel_moisture_1000hr',
       'dead_fuel_moisture_100hr', 'pdsi_pre_interpolated', 'pdsi_class',
       'max_air_temperature', 'max_relative_humidity', 'max_wind_speed',
       'min_air_temperature', 'min_relative_humidity', 'precipitation_amount',
       'specific_humidity', 'surface_downwelling_shortwave_flux_in_air',
       'wind_from_direction', 'wind_speed', 'wind_direction_category', 'SWE',
       'population_density', 'LAI', 'pdsi', 'population_density_log', 'date'],
      dtype='object')

In [7]:
for col in weather_features:
    print(f"Missing values in {col}: {features[col].isna().sum()}")

Missing values in dead_fuel_moisture_1000hr: 0
Missing values in dead_fuel_moisture_100hr: 0
Missing values in max_air_temperature: 0
Missing values in max_relative_humidity: 0
Missing values in min_air_temperature: 0
Missing values in min_relative_humidity: 0
Missing values in precipitation_amount: 0
Missing values in specific_humidity: 0
Missing values in surface_downwelling_shortwave_flux_in_air: 0
Missing values in wind_from_direction: 0
Missing values in wind_speed: 0
Missing values in SWE: 0
Missing values in pdsi: 0


In [8]:
# group by lon, lat, calculate mean for each col in weather_features
features_grouped = features.groupby(['lon', 'lat','date'])[weather_features].mean().reset_index()

In [10]:
save_path = '../Clean_Data/climatology'
# check if the directory exists, if not create it
if not os.path.exists(save_path):
    os.makedirs(save_path)

In [11]:
# save to parquet file
features_grouped.to_parquet(os.path.join(save_path, 'climatology_2001_2020.parquet'), index=False)

check data

wind direction (not in use)

In [None]:
wind_direction_ranges = {
    'N': (337.5, 22.5),
    'NE': (22.5, 67.5),
    'E': (67.5, 112.5),
    'SE': (112.5, 157.5),
    'S': (157.5, 202.5),
    'SW': (202.5, 247.5),
    'W': (247.5, 292.5),
    'NW': (292.5, 337.5)
}

In [None]:
# Initialize the wind_direction_category column with None
features_grouped['wind_direction_category'] = None

# Use numpy to create boolean masks for each wind direction category
wind_from_direction = features_grouped['wind_from_direction'].values

# Mask for North (N)
mask_N = (wind_from_direction >= 337.5) | (wind_from_direction < 22.5)
features_grouped.loc[mask_N, 'wind_direction_category'] = 'N'

# Masks for other directions
for category, (min_angle, max_angle) in wind_direction_ranges.items():
    if category != 'N':
        mask = (wind_from_direction >= min_angle) & (wind_from_direction < max_angle)
        features_grouped.loc[mask, 'wind_direction_category'] = category

In [None]:
# order the wind_direction_category from N to NW
wind_direction_order = ['N', 'NE', 'E', 'SE', 'S', 'SW', 'W', 'NW']
features_grouped['wind_direction_category'] = pd.Categorical(features_grouped['wind_direction_category'], categories=wind_direction_order, ordered=True)