# CALFIRE | NOAA NCEI Data Integration
---

In [79]:
# Dependencies
import pandas as pd
import geopandas as gpd

## 1 Set Variables
---

In [80]:
# Set Variables
yr = 2023

## 2 Load Data
---

### 2.1 Load CALFIRE Fire Data
The geometry of the fires provided by CALFIRE will be used to determine what loal precipitation data will be aggregated for comparative analysis.

In [81]:
fire_gdf = gpd.read_file(f'../outputs/calfire-geojson/ca_fire_{yr}.geojson')
fire_gdf.tail(1)

Unnamed: 0,ALARM_DATE,CONT_DATE,CAUSE_ID,CAUSE,GIS_ACRES,AGENCY,FIRE_NAME,geometry
283,2023-12-22,2023-12-22,14,Structure,38.60516,CDF,BORDER 42,"POLYGON ((-116.76955 32.58624, -116.76953 32.5..."


### 2.2 Load NOAA NCEI Precipitation Data
The precipitation data will be filtered using proximity of weather station to fire geometry to be aggregated later for analysis.

In [82]:
# Load NOAA-NCEI data
prcp_df = pd.read_csv(f'../outputs/noaa-ncei/CA_{yr}_prcp_data.csv')
# Create NOAA-NCEI geometry dataframe
stn_gdf = gpd.GeoDataFrame(prcp_df, geometry=gpd.points_from_xy(prcp_df.lon, prcp_df.lat), crs=4326)
stn_gdf.columns

Index(['station', '2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04',
       '2023-01-05', '2023-01-06', '2023-01-07', '2023-01-08', '2023-01-09',
       ...
       '2023-12-27', '2023-12-28', '2023-12-29', '2023-12-30', '2023-12-31',
       'lat', 'lon', 'elev', 'name', 'geometry'],
      dtype='object', length=371)

## 3 Filter and Merge Data by Geometry
---

### 3.1 Create GeoDatabase to store data

In [83]:
# Setup geodataframe to store data integration
gdf = fire_gdf

### 3.2 Create Integrated GeoDataframe
Resulting GeoDataFrame will have daily precipitation values aggregated by month, that are filtered by fire location using `sjoin`. Each year of fire data will be associated with the current and previoius precipitation data.

In [84]:
# Loop through years to get precipitation data
for y in [yr-1,yr]:
    # Load NOAA-NCEI and create geometry dataframe
    df = pd.read_csv(f'../outputs/noaa-ncei/CA_{y}_prcp_data.csv')
    # Aggregate NOAA-NCEI precipitation data by month
    # Strip dataframe to precipitation data and transpose to do groupby
    prcp_df = df.drop(columns=['lat','lon','elev','name']).set_index('station').T
    # Convert date index to %Y-%m
    prcp_df.index = prcp_df.index.str.slice(0,7)
    # Perform aggregation using groupby on index and transpose back
    prcp_df = prcp_df.groupby(level=0).sum().round(1).T
    # Create geodataframe
    prcp_gdf = gpd.GeoDataFrame(prcp_df, geometry=gpd.points_from_xy(df.lon, df.lat), crs=4326)
    # Filter NOAA-NCEI data with sjoin
    sjoin = fire_gdf.to_crs(32610).sjoin_nearest(prcp_gdf.to_crs(32610)).to_crs(4326).rename(columns={'station':f'STN{y}'})
    gdf = pd.concat( [gdf, sjoin.drop(columns=['ALARM_DATE','CONT_DATE','CAUSE_ID','CAUSE',
        'GIS_ACRES','AGENCY','FIRE_NAME','geometry','index_right'])],axis=1)
# Check geodataframe
gdf.tail(1)

Unnamed: 0,ALARM_DATE,CONT_DATE,CAUSE_ID,CAUSE,GIS_ACRES,AGENCY,FIRE_NAME,geometry,2022-01,2022-02,...,2023-03,2023-04,2023-05,2023-06,2023-07,2023-08,2023-09,2023-10,2023-11,2023-12
283,2023-12-22,2023-12-22,14,Structure,38.60516,CDF,BORDER 42,"POLYGON ((-116.76955 32.58624, -116.76953 32.5...",0.0,34.3,...,45.0,2.6,7.1,9.7,0.0,51.6,1.3,12.5,29.6,41.1


## 4 Export Data
---

In [85]:
gdf.to_file(f'../outputs/calfire-geojson/CA_prcp_{yr}.geojson')