In [None]:
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import cartopy.io.img_tiles as cimgt
import geopandas as gpd
import json
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
from matplotlib.colors import LogNorm, LinearSegmentedColormap
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
import numpy as np
from pyspark.sql.types import IntegerType
from pyspark.sql.functions import udf, col
import rasterio
from rasterio.plot import show

In [None]:
from src.data import spark_postgis
from src import constants

In [None]:
spark = spark_postgis.get_spark()

In [None]:
sdf_all = spark.read.parquet(
    (constants.RESULTS_PATH / "gedi_neighbors_nau_l24a").as_posix()
)

sdf_all.createOrReplaceTempView("shots_table")
sdf_all = spark.sql(
    "SELECT *, ST_GeomFromWKB(t1_geometry) AS t1_geom, ST_GeomFromWKB(t2_geometry) AS t2_geom FROM shots_table"
)
sdf_all = sdf_all.drop("t1_geometry", "t2_geometry")
print(sdf_all.count())
sdf_all.show(3)

In [None]:
@udf(returnType=IntegerType())
def get_days(time_delta):
    return time_delta.days


def add_time_diff(_sdf):
    _sdf = _sdf.withColumn(
        "time_diff", (_sdf["t2_absolute_time"] - _sdf["t1_absolute_time"])
    )
    _sdf = _sdf.withColumn("time_diff", get_days(col("time_diff")))
    return _sdf

In [None]:
sdf_all = add_time_diff(sdf_all)

In [None]:

sdf_filt = gpd.GeoDataFrame(
    sdf_all.sample(withReplacement=False, fraction=0.1)
    .select("t1_geom", "time_diff")
    .toPandas(),
    geometry="t1_geom",
    crs=constants.WGS84,
)


In [None]:
# Occasionally, we get sets of three shots with a disturbance between.
# Sometimes it is valid to count these as separate samples
# (e.g. s1a -- disturbance -- s1b -- s2,
# where the pair s1a-s2 is a treatment sample and s1b-s2 is a control sample).
# But other times, it's really two measurements of the same sample
# (e.g. s1a -- s1b -- disturbance -- s2, where s1a-s2 and s1b-s2 are both
# measurements of the same disturbance event).
# Just to be on the safe side, we can remove all the duplicates.
# This function should be run on the control and treatment sets separately.


def remove_duplicates(df):
    print(
        "Found {} s1 duplicates".format(
            len(df[df.duplicated(subset=["t1_shot_number"])])
        )
    )
    print(
        "Found {} s2 duplicates".format(
            len(df[df.duplicated(subset=["t2_shot_number"])])
        )
    )
    df = df.drop_duplicates(subset=["t1_shot_number"], keep="first")
    df = df.drop_duplicates(subset=["t2_shot_number"], keep="first")

    return df

In [None]:
from pyspark.sql.types import IntegerType
from pyspark.sql.functions import udf, col

degrade_sdf = spark.read.parquet(
    (constants.RESULTS_PATH / "gedi_degradation_glad_0d").as_posix()
)


@udf(returnType=IntegerType())
def get_days(time_delta):
    return time_delta.days


degrade_sdf = degrade_sdf.withColumn(
    "time_diff",
    (degrade_sdf["t2_absolute_time"] - degrade_sdf["t1_absolute_time"]),
)
degrade_sdf = degrade_sdf.withColumn("time_diff", get_days(col("time_diff")))
glad_df = gpd.GeoDataFrame(degrade_sdf.toPandas(), geometry="t2_geom").copy()
glad_df.loc[glad_df.control_disturbance > 0, "sample_grp"] = "control"
# Note: points may have a control disturbance as well as a measured disturbance.
# in that case, we include them in the treatment group; we don't care that they
# were also disturbed at another, unmeasured time.
glad_df.loc[glad_df.measured_disturbance > 0, "sample_grp"] = "treatment"
print(len(glad_df))
print(len(glad_df[glad_df["sample_grp"] == "treatment"]))
print(len(glad_df[glad_df["sample_grp"] == "control"]))
control_df = remove_duplicates(glad_df[glad_df["sample_grp"] == "control"])
control_df["sample_grp"] = "control"
treatment_df = remove_duplicates(glad_df[glad_df["sample_grp"] == "treatment"])
treatment_df["sample_grp"] = "treatment"
glad_df = pd.concat([control_df, treatment_df])
print(len(glad_df))
print(len(glad_df[glad_df["sample_grp"] == "treatment"]))
print(len(glad_df[glad_df["sample_grp"] == "control"]))
control_n = len(glad_df[glad_df["sample_grp"] == "control"])

## Spatial distribution of shot pairs

In [None]:
def truncate_colormap(cmap, minval=0.0, maxval=1.0, n=100):
    new_cmap = LinearSegmentedColormap.from_list(
        "trunc({n},{a:.2f},{b:.2f})".format(n=cmap.name, a=minval, b=maxval),
        cmap(np.linspace(minval, maxval, n)),
    )
    return new_cmap

In [None]:

fig = plt.figure(figsize=(20, 10))
ax = fig.add_subplot(1, 1, 1, projection=ccrs.PlateCarree())
extent = [-85, -40, -20, 10]
ax.set_extent(extent, crs=ccrs.PlateCarree())

# Draw the raster image within the geometry's boundaries
from shapely.geometry import box

bbox = gpd.GeoDataFrame(geometry=[box(*extent)], crs=constants.WGS84)
region = gpd.read_file(
    "/home/ah2174/shapefiles/Amazon_rainforest_shapefile.zip"
)
# Get the geometry in the format rasterio wants
coords = [json.loads(bbox.to_json())["features"][0]["geometry"]]
# Open the reprojected raster
with rasterio.open(
    "/maps/forecol/data/GEDI/NAU/rasters/rh-98_6000m_raster_reprojected.tif"
) as src:
    # Crop to the specified extent and mask nodata values
    img, transform = rasterio.mask.mask(src, coords, crop=True)
    img[img == src.nodata] = np.nan

cmap = truncate_colormap(plt.get_cmap("gist_earth"), 0.0, 0.9)
# Plot a hidden image without proper tranform so we can get the colorbar
image_hidden = ax.imshow(img[0], cmap=cmap, vmin=0, vmax=40)
# Plot the 0th band of the raster (mean rh 98)
show(
    img[0],
    transform=transform,
    ax=ax,
    cmap=cmap,
    vmin=0,
    vmax=40,
)
# Add the colorbar
cbaxes = inset_axes(ax, width="30%", height="2%", loc=3)
cbar = fig.colorbar(image_hidden, cax=cbaxes, orientation="horizontal")
cbaxes.xaxis.set_ticks_position("top")
cbaxes.set_title("Forest height (m)", fontsize=12, loc="left")


gl = ax.gridlines(draw_labels=True, dms=True, x_inline=False, y_inline=False)
gl.xlabel_style = {"size": 15}
gl.ylabel_style = {"size": 15}

region.plot(ax=ax, color="none", edgecolor="black", linewidth=2, zorder=10)


plt.show()

In [None]:
fig = plt.figure(figsize=(20, 10))
ax = fig.add_subplot(1, 1, 1, projection=ccrs.PlateCarree())
extent = [-85, -40, -20, 10]
ax.set_extent(extent, crs=ccrs.PlateCarree())

# Draw the raster image within the geometry's boundaries
# bbox = gpd.GeoDataFrame(geometry=[box(*extent)], crs=constants.WGS84)
region = gpd.read_file(
    "/home/ah2174/shapefiles/Amazon_rainforest_shapefile.zip"
)
ax.add_feature(cfeature.LAND)

region.plot(
    ax=ax, color="darkseagreen", edgecolor="black", linewidth=1, zorder=10
)
# # Plot our data
cmap = truncate_colormap(plt.get_cmap("plasma"), 0.0, 0.6)
cbaxes = inset_axes(ax, width="30%", height="2%", loc=3)
cbar_orientation = "horizontal"
sdf_filt.plot(
    column="time_diff",
    zorder=10,
    markersize=1,
    ax=ax,
    cmap=cmap,
    legend=True,
    legend_kwds={
        "orientation": cbar_orientation,
    },
    cax=cbaxes,
)
# Add the colorbar
cbaxes.xaxis.set_ticks_position("top")
cbaxes.tick_params(labelsize=13)
cbaxes.set_title("Time between shots (days)", fontsize=15, loc="left")


# Longitude and latitude degrees on the axes
gl = ax.gridlines(draw_labels=True, dms=True, x_inline=False, y_inline=False)
gl.xlabel_style = {"size": 15}
gl.ylabel_style = {"size": 15}


plt.show()

In [None]:
degrade_gdf = gpd.GeoDataFrame(glad_df, geometry="t2_geom", crs=constants.WGS84)
treatment_gdf = degrade_gdf[degrade_gdf.sample_grp == "treatment"]
control_gdf = degrade_gdf[degrade_gdf.sample_grp == "control"]

fig = plt.figure(figsize=(20, 10))
ax = fig.add_subplot(1, 1, 1, projection=ccrs.PlateCarree())
extent = [-85, -40, -20, 10]
ax.set_extent(extent, crs=ccrs.PlateCarree())

# Draw the raster image within the geometry's boundaries
# bbox = gpd.GeoDataFrame(geometry=[box(*extent)], crs=constants.WGS84)
region = gpd.read_file(
    "/home/ah2174/shapefiles/Amazon_rainforest_shapefile.zip"
)
# Get the geometry in the format rasterio wants
coords = [json.loads(region.to_json())["features"][0]["geometry"]]
# Open the reprojected raster
with rasterio.open(
    "/maps/forecol/data/GEDI/NAU/rasters/rh-98_12000m_raster_reprojected.tif"
) as src:
    # Crop to the specified extent and mask nodata values
    img, transform = rasterio.mask.mask(src, coords, crop=True)
    img[img == src.nodata] = np.nan
# cmap = truncate_colormap(plt.get_cmap("gist_earth"), 0.6, 0.9)
cmap = "summer"
# Plot a hidden image without proper tranform so we can get the colorbar
image_hidden = ax.imshow(img[0], cmap=cmap, vmin=0, vmax=40)
# Plot the 0th band of the raster (mean rh 98)
show(
    img[0],
    transform=transform,
    ax=ax,
    cmap=cmap,
    vmin=0,
    vmax=40,
)
ax.add_feature(cfeature.LAND)
# Add the colorbar
cbaxes = inset_axes(ax, width="30%", height="2%", loc=3)
cbar = fig.colorbar(image_hidden, cax=cbaxes, orientation="horizontal")
cbaxes.xaxis.set_ticks_position("top")
cbaxes.set_title("Forest height (m)", fontsize=12, loc="left")

# Plot treatment points second so they are visible on top of control points
control_gdf.plot(color="mediumblue", ax=ax, markersize=1)
treatment_gdf.plot(color="red", ax=ax, markersize=1)

# Draw the outline of the study region
region.plot(
    ax=ax,
    color="none",
    edgecolor="black",
    linewidth=1,
    zorder=10,
)

# Add Lon/lat lines and a legend
gl = ax.gridlines(draw_labels=True, dms=True, x_inline=False, y_inline=False)
gl.xlabel_style = {"size": 15}
gl.ylabel_style = {"size": 15}

legend_elements = [
    Line2D(
        [0],
        [0],
        marker="o",
        color="w",
        label="Control",
        markerfacecolor="mediumblue",
        markersize=10,
    ),
    Line2D(
        [0],
        [0],
        marker="o",
        color="w",
        label="Treatment\n(Intervening disturbance)",
        markerfacecolor="red",
        markersize=10,
    ),
]
ax.legend(handles=legend_elements, loc="upper right", fontsize=15)


plt.show()