In [1]:
# The neccessary packages
## In-house packages
import lonlat_interp
from lonlat_interp import test_reproduce_smooth_grid, Coord_to_index, Interpolator
from saveload import save,load
## Pacakages from anywhere else
import os
from os import path as osp 
import netCDF4 as nc
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import rasterio
from rasterio.plot import show
from rasterio.mask import mask
from rasterio.warp import reproject, Resampling
import geopandas as gpd
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap, BoundaryNorm
import matplotlib.patches as mpatches
from dbfread import DBF
import pygrib
import pickle
from joblib import Parallel, delayed
import time
import psutil
from datetime import datetime, timedelta

In [19]:
# The file paths
## Define the base directory (main)
main = osp.join('C:/', 'Users', 'T-Spe', 'OneDrive', 'School', "Fall '25", "Master's Project", 'test')
tifdata = osp.join(main, 'tifdata')

## topography paths
slope_path = osp.join(main, 'LH20_SlpP_220.tif')
elevation_path = osp.join(main, 'LH20_Elev_220.tif')
aspect_path = osp.join(main, 'LH20_Asp_220.tif')

## vegetation paths
fuelmod_path = osp.join(main, 'LH20_F13_200.tif')
fuelvat_path = osp. join(main, 'LH20_F13_200.tif.vat.dbf')

## fire detection (red pixel detection....)
fire_path = osp.join(tifdata, 'ml_data')

# meteorology paths (wrf outputs)
wrfout_path = osp.join(main, 'wrfout_d02_2020-08-29_14-00-00')
#wrfout_path = osp.join(main, 'processed_output-sample.nc')

In [20]:
# Extraction of the data from the files
wrfdata = nc.Dataset(wrfout_path)

In [28]:
#print(wrfdata.variables)

In [23]:
## spatial
lon_grid = wrfdata.variables['XLONG'][0, :, :]
lat_grid = wrfdata.variables['XLAT'][0, :, :]

## temporal
date_raw = wrfdata.variables['Times'][:]
# Convert byte array to string
date_str = ''.join(date_raw[0].astype(str))  # Join the byte elements as a single string
# Parse the string into a datetime object
date = datetime.strptime(date_str, '%Y-%m-%d_%H:%M:%S')

## meteorology
rain_c = wrfdata.variables['RAINC'][0, :, :] # 'RAINC', from convective (deep) thunderstorms
rain_sh = wrfdata.variables['RAINSH'][0, :, :] # 'RAINSH', from shallow convective processes
rain_nc = wrfdata.variables['RAINNC'][0, :, :] # 'RAINNC', from large-scale, non-convective (stratiform) processes
rain = rain_c + rain_sh + rain_nc  # Note then observed rain at t will be rain[t] - rain[t-1] where t is a timestep
temp = wrfdata.variables['T2'][0, :, :] #'T2', the measured temp 2m above the surface
vapor = wrfdata.variables['Q2'][0, :, :] # 'Q2', the water-vapor mixing ratio 2m above the surface
wind_u = wrfdata.variables['U10'][0, :, :]
wind_v = wrfdata.variables['V10'][0, :, :]
#

#### CHECK THE CRS OF THE FILES #####
## topography
elevation_dataset = rasterio.open(elevation_path)
elevation = elevation_dataset.read(1)
aspect_dataset = rasterio.open(aspect_path)
aspect = aspect_dataset.read(1)
slope_dataset = rasterio.open(slope_path)
slope = slope_dataset.read(1)

## vegetation
fuelmod_dataset = rasterio.open(fuelmod_path)
fuelmod = fuelmod_dataset.read(1)

### Read the VAT file
fuel_vat = DBF(fuelvat_path)
fuel_vat_df = pd.DataFrame(iter(fuel_vat))

### Sort the VAT DataFrame by VALUE
fuel_vat_df_sorted = fuel_vat_df.sort_values(by='VALUE').reset_index(drop=True)
# Create a mapping from pixel values to class names
fuel_value_to_class = dict(zip(fuel_vat_df['VALUE'], fuel_vat_df['FBFM13']))

### Map the Fuel Model data to class names
fuelmod = np.vectorize(fuel_value_to_class.get)(fuelmod)

## fire detection (red pixel detection....)
X, y, c, basetime = load(fire_path) # X is a matrix of lon, lat and time (since base_time), y is fire dectections, c is confidence
lon_grid_fire = X[:, 0]
lat_grid_fire = X[:, 1]
time_in_days = X[:, 2]
dates_fire_actual = basetime + pd.to_timedelta(time_in_days, unit='D')
dates_fire =  dates_fire_actual.floor("h")

In [24]:
# Build the interpolator
interp = Coord_to_index(degree = 2)
interp.build(lon_grid, lat_grid) 

In [25]:
# Define the function to interpolate continuous features for each coordinate
def interpolate_data(lon, lat):
    ia,ja = interp.evaluate(lon,lat)
    i, j = np.round(ia).astype(int), np.round(ja).astype(int)
    data_dict = {'date': date, 'lon': lon, 'lat': lat, 'temp': temp[i, j], 'rain': rain[i, j], 'vapor' :vapor[i, j], 'wind':(wind_u[i, j]**2 + wind_v[i, j]**2)**(1/2)}
    return data_dict

In [26]:
# Start timing and resource monitoring
start_time = time.time()
process = psutil.Process(os.getpid())
start_cpu = process.cpu_percent(interval=None)
start_mem = process.memory_info().rss  # in bytes

# Run parallel interpolation
data_interp = Parallel(n_jobs=-3)(
    delayed(interpolate_data)(lon, lat) for lon, lat in zip(lon_grid_fire, lat_grid_fire)
)
# Convert the list of dictionaries to a DataFrame for easy handling
df = pd.DataFrame(data_interp)

# End timing and resource monitoring
end_time = time.time()
end_cpu = process.cpu_percent(interval=None)
end_mem = process.memory_info().rss  # in bytes

# Calculate the differences
total_time = end_time - start_time
cpu_usage = end_cpu - start_cpu
memory_usage = end_mem - start_mem

print(f"Script runtime: {total_time:.2f} seconds")
print(f"CPU usage change: {cpu_usage:.2f}%")
print(f"Memory usage change: {memory_usage / (1024 ** 2):.2f} MB")

Script runtime: 80.17 seconds
CPU usage change: 19.10%
Memory usage change: 786.11 MB


In [27]:
print(df.head)

<bound method NDFrame.head of                        date         lon        lat        temp      rain  \
0       2020-08-29 14:00:00 -154.797897  18.801624  298.946625  0.000000   
1       2020-08-29 14:00:00 -154.800247  18.813318  298.945709  0.000000   
2       2020-08-29 14:00:00 -154.817444  18.810272  298.937683  0.000000   
3       2020-08-29 14:00:00 -154.834686  18.807215  298.937683  0.000000   
4       2020-08-29 14:00:00 -154.851990  18.804148  298.936340  0.000000   
...                     ...         ...        ...         ...       ...   
1343276 2020-08-29 14:00:00 -155.147598  19.370657  290.472290  0.324480   
1343277 2020-08-29 14:00:00 -155.141022  19.368284  290.472290  0.324480   
1343278 2020-08-29 14:00:00 -155.142105  19.377956  290.102875  0.584075   
1343279 2020-08-29 14:00:00 -155.147919  19.376198  290.102875  0.584075   
1343280 2020-08-29 14:00:00 -155.145981  19.372068  290.472290  0.324480   

            vapor      wind  
0        0.015052  8.416578

In [16]:
# Start timing and resource monitoring
start_time = time.time()
process = psutil.Process(os.getpid())
start_cpu = process.cpu_percent(interval=None)
start_mem = process.memory_info().rss  # in bytes

# Run interpolation in a simple loop
data_interp = []
for lon, lat in zip(lon_grid_fire, lat_grid_fire):
    data_interp.append(interpolate_data(lon, lat))

# Convert the list of dictionaries to a DataFrame for easy handling
df = pd.DataFrame(data_interp)

# End timing and resource monitoring
end_time = time.time()
end_cpu = process.cpu_percent(interval=None)
end_mem = process.memory_info().rss  # in bytes

# Calculate the differences
total_time = end_time - start_time
cpu_usage = end_cpu - start_cpu
memory_usage = end_mem - start_mem

print(f"Script runtime: {total_time:.2f} seconds")
print(f"CPU usage change: {cpu_usage:.2f}%")
print(f"Memory usage change: {memory_usage / (1024 ** 2):.2f} MB")

Script runtime: 933.06 seconds
CPU usage change: 19.70%
Memory usage change: 561.68 MB


In [None]:
# Feature engineering of meteorology variables (i.e. averaging to determine effect on fuel moisture)
