<div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 40px; margin-top: 0;">
    <div style="flex: 0 0 auto; margin-left: 0; margin-bottom: 0; margin-top: 0;">
        <img src="./pics/UCSD Logo.png" alt="UCSD Logo" style="width: 179px; margin-bottom: 0px; margin-top: 20px;">
    </div>
    <div style="flex: 0 0 auto; margin-left: auto; margin-bottom: 0; margin-top: 20px;">
        <img src="./pics/LANL-logo.png" alt="LANL Logo" style="width: 200px; margin-bottom: 0px;">
    </div>
    <div style="flex: 0 0 auto; margin-left: auto; margin-bottom: 0; margin-top: 20px;">
        <img src="./pics/prowess.png" alt="Prowess Logo" style="width: 200px; margin-bottom: 0px;">
    </div>
    <div style="flex: 0 0 auto; margin-left: auto; margin-bottom: 0; margin-top: 20px;">
        <img src="./pics/wildfire.png" alt="WildFire Logo" width="100"/>
    </div>
</div>

<h1 style="text-align: center; font-size: 48px; margin-top: 0;">Fire-Ready Forests Data Challenge</h1>


# Exploring Field Data

## Setting up the environment

In [None]:
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from shapely.geometry import Point
import cartopy.crs as ccrs
import cartopy.feature as cfeature


## Downloading and opening data

**NOTE:** Before running the following cell, remember to download *Plot Identification* and *Tree Inventory* from the *Observed Field Data* dataset inside the data folder.

In [None]:
df_plots = pd.read_csv("data/01_plot_identification.csv")
df_trees = pd.read_csv("data/03_tree.csv")

## Inspecting the columns of the dataframes

In [None]:
def print_column_names(df):
    cols = 4
    for i, col in enumerate(df.columns):
        last_col = (i + 1) % cols == 0
        end = "\n" if last_col else " "
        print(f"{col:30}", end=end)

In [None]:
print_column_names(df_plots)

In [None]:
print_column_names(df_trees)

## Inspecting the rows of the dataframes

In [None]:
#
# Thats a lot of columns
# Lets look at entries for a few of them
#

In [None]:
#
# Plots dataframe
#
with pd.option_context("display.width", 120):
    cols = ["inventory_id", "plot_coord_x", "plot_coord_y", "site_name", "site_name_label", "inventory_date"]
    print(df_plots[cols])

In [None]:
#
# Trees dataframe
#
with pd.option_context("display.width", 120):
    # dbh: Diameter at Breast Height
    cols = ["inventory_id", "tree_id", "tree_sp_scientific_name", "tree_status", "tree_dbh"]
    print(df_trees[cols])

## Where are the sites?

In [None]:
#
# Lets convert plot x,y into latitude,longitude and plot them on a map
#
print(df_plots[["site_name_label", "plot_coord_x", "plot_coord_y", "plot_coord_srs"]])

In [None]:
#
# We can use x, y, and srs (Spatial Reference System) to derive latitude and longitude
#
gdfs = []
for crs in df_plots["plot_coord_srs"].unique():
    df_crs = df_plots[df_plots["plot_coord_srs"] == crs]
    geometry = [Point(xy) for xy in zip(df_crs["plot_coord_x"], df_crs["plot_coord_y"])]
    gdf = gpd.GeoDataFrame(df_crs, geometry=geometry, crs=f"EPSG:{crs}")
    gdf = gdf.to_crs(epsg=4326)
    gdfs.append(gdf)

# Recombine the GeoDataFrames
gdf = pd.concat(gdfs)
gdf["longitude"] = gdf.geometry.x
gdf["latitude"] = gdf.geometry.y
with pd.option_context("display.width", 140):
    print(gdf[["site_name_label", "latitude", "longitude"]])

In [None]:
fig, ax = plt.subplots(figsize=(6, 6), subplot_kw={'projection': ccrs.PlateCarree()})

# Add features and focus on California
ax.add_feature(cfeature.STATES, edgecolor='black')
ax.add_feature(cfeature.LAND, facecolor='lightgray')
ax.set_extent([-125, -114, 32, 43], crs=ccrs.PlateCarree())
# [lon_min, lon_max, lat_min, lat_max]

ax.scatter(
    gdf['longitude'],
    gdf['latitude'],
    color='red',
    s=50,
    transform=ccrs.PlateCarree(),
    label='Plot location',
)

ax.legend(fontsize=18)


## How many plots are at each site?

In [None]:
col = "site_name_label"
for label in df_plots[col].unique():
    nplots = (df_plots[col] == label).sum()
    print(f"{nplots} plots from {label}")

## Adding site name to the tree DataFrame via `merge`

In [None]:
col = "site_name_label"
if col not in df_trees.columns:
    df_trees = df_trees.merge(df_plots[["inventory_id", col]], on="inventory_id", how="left")

print(df_trees[col])
print()
print("Unique site names:")
print("\n".join(df_trees[col].unique()))

## A tree-level distribution: height vs diameter
Note the units on the field data - the height is in meters and the diameters is in centimeters.

In [None]:
#
# For 2D, lets compare height and diameter
#
fig, ax = plt.subplots(figsize=(8, 6))

cmap, cmin = plt.cm.winter_r, 0.5
bins = (np.arange(0, 100, step=2), np.arange(0, 60, step=2))

_, _, _, im = ax.hist2d(df_trees["tree_dbh"], df_trees["tree_ht"], bins=bins, cmap=cmap, cmin=cmin)

cbar = fig.colorbar(im, ax=ax)
cbar.set_label("Number of trees")
ax.set_xlabel("Diameter at breast height (DBH) [cm]")
ax.set_ylabel("Height (HT) [m]")
ax.tick_params(top=True, right=True)
ax.set_title("2D histogram of tree height and diameter")