In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
from pylr2 import regress2

import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable

In [None]:
from src.data import spark_postgis
from src import constants
CORR_METHOD="pearson"

In [None]:
spark = spark_postgis.get_spark()

In [None]:
# Occasionally, we get sets of three shots with a disturbance between.
# Sometimes it is valid to count these as separate samples
# (e.g. s1a -- disturbance -- s1b -- s2,
# where the pair s1a-s2 is a treatment sample and s1b-s2 is a control sample).
# But other times, it's really two measurements of the same sample
# (e.g. s1a -- s1b -- disturbance -- s2, where s1a-s2 and s1b-s2 are both
# measurements of the same disturbance event).
# Just to be on the safe side, we can remove all the duplicates.
# This function should be run on the control and treatment sets separately.


def remove_duplicates(df):
    print(
        "Found {} s1 duplicates".format(
            len(df[df.duplicated(subset=["t1_shot_number"])])
        )
    )
    print(
        "Found {} s2 duplicates".format(
            len(df[df.duplicated(subset=["t2_shot_number"])])
        )
    )
    df = df.drop_duplicates(subset=["t1_shot_number"], keep="first")
    df = df.drop_duplicates(subset=["t2_shot_number"], keep="first")

    return df

## A. Setup: Intact control

In [None]:
sdf = spark.read.parquet((constants.RESULTS_PATH / "gedi_neighbors_nau_test").as_posix())
sdf.createOrReplaceTempView("shots_table")
sdf = spark.sql("SELECT *, ST_GeomFromWKB(t1_geometry) AS t1_geom, ST_GeomFromWKB(t2_geometry) AS t2_geom FROM shots_table")
sdf = sdf.drop("t1_geometry", "t2_geometry")
print(sdf.count())
sdf.show(3)

In [None]:
from pyspark.sql.types import IntegerType
from pyspark.sql.functions import udf, col

@udf(returnType=IntegerType())
def get_days(time_delta):
  return time_delta.days

sdf = sdf.withColumn("time_diff", (sdf["t2_absolute_time"] - sdf["t1_absolute_time"]))
sdf = sdf.withColumn("time_diff", get_days(col("time_diff")))

In [None]:
from pyspark.sql.functions import mean

sdf_filtered = sdf.filter(sdf.time_diff != 0)
agbd_df = (
    sdf_filtered.sample(withReplacement=False, fraction=0.1)
    .select("t1_agbd_a0", "t2_agbd_a0", "time_diff")
    .toPandas()
)
rh98_df = (
    sdf_filtered.sample(withReplacement=False, fraction=0.1)
    .select("t1_rh_98_a0", "t2_rh_98_a0", "time_diff")
    .toPandas()
)
rh50_df = (
    sdf_filtered.sample(withReplacement=False, fraction=0.1)
    .select("t1_rh_50_a0", "t2_rh_50_a0", "time_diff")
    .toPandas()
)
n = sdf_filtered.count()
print(n)
corr_agbd = sdf_filtered.corr("t1_agbd_a0", "t2_agbd_a0", method=CORR_METHOD)
sdf = sdf.withColumn("agbd_diff", (sdf.t2_agbd_a0 - sdf.t1_agbd_a0))
bias_agbd = (
    sdf_filtered.withColumn("agbd_diff", (sdf.t2_agbd_a0 - sdf.t1_agbd_a0))
    .select(mean("agbd_diff"))
    .collect()
)[0]["avg(agbd_diff)"]
reldiff_agbd = (
    sdf_filtered.withColumn("agbd_reldiff", (sdf.t2_agbd_a0 - sdf.t1_agbd_a0)/(sdf.t1_agbd_a0 + sdf.t2_agbd_a0))
    .select(mean("agbd_reldiff"))
    .collect()
)[0]["avg(agbd_reldiff)"]
corr_rh98 = sdf_filtered.corr("t1_rh_98_a0", "t2_rh_98_a0", method=CORR_METHOD)
bias_rh98 = (
    sdf_filtered.withColumn("rh98_diff", (sdf.t2_rh_98_a0 - sdf.t1_rh_98_a0))
    .select(mean("rh98_diff"))
    .collect()
)[0]["avg(rh98_diff)"]
reldiff_rh98 = (
    sdf_filtered.withColumn("rh98_reldiff", (sdf.t2_rh_98_a0 - sdf.t1_rh_98_a0)/(sdf.t1_rh_98_a0 + sdf.t2_rh_98_a0))
    .select(mean("rh98_reldiff"))
    .collect()
)[0]["avg(rh98_reldiff)"]
corr_rh50 = sdf_filtered.corr("t1_rh_50_a0", "t2_rh_50_a0", method=CORR_METHOD)
bias_rh50 = (
    sdf_filtered.withColumn("rh50_diff", (sdf.t2_rh_50_a0 - sdf.t1_rh_50_a0))
    .select(mean("rh50_diff"))
    .collect()
)[0]["avg(rh50_diff)"]
reldiff_rh50 = (
    sdf_filtered.withColumn("rh50_reldiff", (sdf.t2_rh_50_a0 - sdf.t1_rh_50_a0)/(sdf.t1_rh_50_a0 + sdf.t2_rh_50_a0))
    .select(mean("rh50_reldiff"))
    .collect()
)[0]["avg(rh50_reldiff)"]
print("Control AGBD bias, corr, reldiff: {},{},{}".format( bias_agbd, corr_agbd, reldiff_agbd))
print("Control RH98 bias, corr, reldiff: {},{},{}".format( bias_rh98, corr_rh98, reldiff_rh98))
print("Control RH50 bias, corr, reldiff: {},{},{}".format( bias_rh50, corr_rh50, reldiff_rh50))

## B. Setup: Disturbance (GLAD)

In [None]:
from pyspark.sql.types import IntegerType
from pyspark.sql.functions import udf, col
degrade_sdf = spark.read.parquet((constants.RESULTS_PATH / "gedi_degradation_glad_0d").as_posix())
@udf(returnType=IntegerType())
def get_days(time_delta):
  return time_delta.days

degrade_sdf = degrade_sdf.withColumn("time_diff", (degrade_sdf["t2_absolute_time"] - degrade_sdf["t1_absolute_time"]))
degrade_sdf = degrade_sdf.withColumn("time_diff", get_days(col("time_diff")))
glad_df = gpd.GeoDataFrame(degrade_sdf.toPandas(), geometry="t2_geom").copy()
glad_df.loc[glad_df.control_disturbance > 0, "sample_grp"] = "control"
# Note: points may have a control disturbance as well as a measured disturbance.
# in that case, we include them in the treatment group; we don't care that they
# were also disturbed at another, unmeasured time.
glad_df.loc[glad_df.measured_disturbance > 0, "sample_grp"] = "treatment"
print(len(glad_df))
print(len(glad_df[glad_df["sample_grp"] == "treatment"]))
print(len(glad_df[glad_df["sample_grp"] == "control"]))
control_df = remove_duplicates(glad_df[glad_df["sample_grp"] == "control"])
control_df["sample_grp"] = "control"
treatment_df = remove_duplicates(glad_df[glad_df["sample_grp"] == "treatment"])
treatment_df["sample_grp"] = "treatment"
glad_df = pd.concat([control_df, treatment_df])
print(len(glad_df))
print(len(glad_df[glad_df["sample_grp"] == "treatment"]))
print(len(glad_df[glad_df["sample_grp"] == "control"]))
control_n = len(glad_df[glad_df["sample_grp"] == "control"])

In [None]:
control_df = glad_df[glad_df["sample_grp"] == "control"]
glad_agbd_corr = control_df.t1_agbd_a0.corr(control_df.t2_agbd_a0, method=CORR_METHOD)
glad_agbd_bias = (control_df.t2_agbd_a0 - control_df.t1_agbd_a0).mean()
glad_agbd_reldiff = ((control_df.t2_agbd_a0 - control_df.t1_agbd_a0)/(control_df.t1_agbd_a0 + control_df.t2_agbd_a0)).mean()
glad_rh98_corr = control_df.t1_rh_98_a0.corr(control_df.t2_rh_98_a0, method=CORR_METHOD)
glad_rh98_bias = (control_df.t2_rh_98_a0 - control_df.t1_rh_98_a0).mean()
glad_rh98_reldiff = ((control_df.t2_rh_98_a0 - control_df.t1_rh_98_a0)/(control_df.t1_rh_98_a0 + control_df.t2_rh_98_a0)).mean()
glad_rh50_corr = control_df.t1_rh_50_a0.corr(control_df.t2_rh_50_a0, method=CORR_METHOD)
glad_rh50_bias = (control_df.t2_rh_50_a0 - control_df.t1_rh_50_a0).mean()
glad_rh50_reldiff = ((control_df.t2_rh_50_a0 - control_df.t1_rh_50_a0)/(control_df.t1_rh_50_a0 + control_df.t2_rh_50_a0))
glad_rh50_reldiff.replace([np.inf, -np.inf], np.nan, inplace=True)
glad_rh50_reldiff = glad_rh50_reldiff.dropna().mean()
print("Control AGBD bias, corr, reldiff: {},{},{}".format(glad_agbd_bias, glad_agbd_corr, glad_agbd_reldiff))
print("Control RH98 bias, corr, reldiff: {},{},{}".format(glad_rh98_bias, glad_rh98_corr, glad_rh98_reldiff))
print("Control RH50 bias, corr, reldiff: {},{},{}".format(glad_rh50_bias, glad_rh50_corr, glad_rh50_reldiff))
print("Mean t1 AGBD: {}".format(control_df.t1_agbd_a0.mean()))
print("Percent AGBD bias: {:.1f}%".format(glad_agbd_bias / control_df.t1_agbd_a0.mean() * 100))
del control_df

## C. Setup: Disturbance (AFC)

In [None]:
from pyspark.sql.types import IntegerType
from pyspark.sql.functions import udf, col
degrade_sdf = spark.read.parquet((constants.RESULTS_PATH / "gedi_degradation_afc_2022").as_posix())
@udf(returnType=IntegerType())
def get_days(time_delta):
  return time_delta.days

degrade_sdf = degrade_sdf.withColumn("time_diff", (degrade_sdf["t2_absolute_time"] - degrade_sdf["t1_absolute_time"]))
degrade_sdf = degrade_sdf.withColumn("time_diff", get_days(col("time_diff")))
afc_df = gpd.GeoDataFrame(degrade_sdf.toPandas(), geometry="t2_geom").copy()
afc_df.loc[afc_df.control_disturbance > 0, "sample_grp"] = "control"
# Note: points may have a control disturbance as well as a measured disturbance.
# in that case, we include them in the treatment group; we don't care that they
# were also disturbed at another, unmeasured time.
afc_df.loc[afc_df.measured_disturbance > 0, "sample_grp"] = "treatment"
print(len(afc_df))
print(len(afc_df[afc_df["sample_grp"] == "treatment"]))
print(len(afc_df[afc_df["sample_grp"] == "control"]))
control_df = remove_duplicates(afc_df[afc_df["sample_grp"] == "control"])
control_df["sample_grp"] = "control"
treatment_df = remove_duplicates(afc_df[afc_df["sample_grp"] == "treatment"])
treatment_df["sample_grp"] = "treatment"
afc_df = pd.concat([control_df, treatment_df])
print(len(afc_df))
print(len(afc_df[afc_df["sample_grp"] == "treatment"]))
print(len(afc_df[afc_df["sample_grp"] == "control"]))


In [None]:
control_df = afc_df[afc_df["sample_grp"] == "control"]
afc_agbd_corr = control_df.t1_agbd_a0.corr(control_df.t2_agbd_a0, method=CORR_METHOD)
afc_agbd_bias = (control_df.t2_agbd_a0 - control_df.t1_agbd_a0).mean()
afc_agbd_reldiff = ((control_df.t2_agbd_a0 - control_df.t1_agbd_a0)/(control_df.t1_agbd_a0 + control_df.t2_agbd_a0)).mean()
afc_rh98_corr = control_df.t1_rh_98_a0.corr(control_df.t2_rh_98_a0, method=CORR_METHOD)
afc_rh98_bias = (control_df.t2_rh_98_a0 - control_df.t1_rh_98_a0).mean()
afc_rh98_reldiff = ((control_df.t2_rh_98_a0 - control_df.t1_rh_98_a0)/(control_df.t1_rh_98_a0 + control_df.t2_rh_98_a0)).mean()
afc_rh50_corr = control_df.t1_rh_50_a0.corr(control_df.t2_rh_50_a0, method=CORR_METHOD)
afc_rh50_bias = (control_df.t2_rh_50_a0 - control_df.t1_rh_50_a0).mean()
afc_rh50_reldiff = ((control_df.t2_rh_50_a0 - control_df.t1_rh_50_a0)/(control_df.t1_rh_50_a0 + control_df.t2_rh_50_a0))
afc_rh50_reldiff.replace([np.inf, -np.inf], np.nan, inplace=True)
afc_rh50_reldiff = afc_rh50_reldiff.dropna().mean()
print("Control AGBD bias, corr, reldiff: {},{},{}".format(afc_agbd_bias, afc_agbd_corr, afc_agbd_reldiff))
print("Control RH98 bias, corr, reldiff: {},{},{}".format(afc_rh98_bias, afc_rh98_corr, afc_rh98_reldiff))
print("Control RH50 bias, corr, reldiff: {},{},{}".format(afc_rh50_bias, afc_rh50_corr, afc_rh50_reldiff))
print("Mean t1 AGBD: {}".format(control_df.t1_agbd_a0.mean()))
print("Percent AGBD bias: {:.1f}%".format(afc_agbd_bias / control_df.t1_agbd_a0.mean() * 100))
del control_df

## 1. Control groups

In [None]:
plt.rcParams['xtick.labelsize'] = 20
plt.rcParams['ytick.labelsize'] = 20
fig = plt.figure(layout='constrained', figsize=(20, 20))
subfigs = fig.subfigures(2, 1, hspace=0.05, height_ratios=[0.85, 1])
axs_top = subfigs[0].subplots(1, 2)
axs_bottom = subfigs[1].subplots(1, 2)

axi = axs_top[0]
xs = rh98_df.t1_rh_98_a0
ys = rh98_df.t2_rh_98_a0
axmax = 50

# increase gridsize for smaller hexagons
imi = axi.hexbin(xs, ys, gridsize=100, cmap='magma', vmin=0, vmax=1200, mincnt=150)
axi.axis([0, axmax, 0, axmax])
axi.plot([0, axmax], [0, axmax], color='black', linestyle='dashed', linewidth=3)
res = regress2(xs, ys, _method_type_2="reduced major axis")
dummy_xs = np.arange(axmax)
axi.plot(dummy_xs, res["intercept"] + res["slope"] * dummy_xs, color='green', linewidth=3)

# cb = fig.colorbar(imi, ax=axi, orientation='vertical')
# cb.ax.tick_params(rotation=275)
axi.set_xlabel("RH 98 1 (m)", fontsize=22)
axi.set_ylabel("RH 98 2 (m)", fontsize=22)
textstr = f"Correlation: {corr_rh98:.2f}\nBias: {bias_rh98:.2f}\nSMA: y = {res['intercept']:.2f} + {res['slope']:.2f}x"
props = dict(boxstyle='round', facecolor='white', alpha=0.7)
axi.text(0.05, 0.95, textstr, transform=axi.transAxes, fontsize=22,
        verticalalignment='top', bbox=props)

axi = axs_top[1]
xs = agbd_df.t1_agbd_a0
ys = agbd_df.t2_agbd_a0
axmax = 500

# increase gridsize for smaller hexagons
imi = axi.hexbin(xs, ys, gridsize=300, cmap='magma', vmin=0, vmax=1200, mincnt=150)
axi.axis([0, axmax, 0, axmax])
axi.plot([0, axmax], [0, axmax], color='black', linestyle='dashed', linewidth=3)
res = regress2(xs, ys, _method_type_2="reduced major axis")
dummy_xs = np.arange(axmax)
axi.plot(dummy_xs, res["intercept"] + res["slope"] * dummy_xs, color='green', linewidth=3)
axi.set_xlabel("AGBD 1 (Mg/ha)", fontsize=22)
axi.set_ylabel("AGBD 2 (Mg/ha)", fontsize=22)
textstr = f"Correlation: {corr_agbd:.2f}\nBias: {bias_agbd:.2f}\nSMA: y = {res['intercept']:.2f} + {res['slope']:.2f}x"
props = dict(boxstyle='round', facecolor='white', alpha=0.7)
axi.text(0.05, 0.95, textstr, transform=axi.transAxes, fontsize=22,
        verticalalignment='top', bbox=props)

subfigs[0].suptitle(f"All near-coincident footprints ({n:,} shot pairs)", fontsize=24)

axi = axs_bottom[0]
xs = glad_df[glad_df["sample_grp"] == "control"].t1_rh_98_a0
ys = glad_df[glad_df["sample_grp"] == "control"].t2_rh_98_a0
axmax = 50

# increase gridsize for smaller hexagons
imi = axi.hexbin(xs, ys, gridsize=100, cmap='magma', vmin=0, vmax=120, mincnt=6)
axi.axis([0, axmax, 0, axmax])
axi.plot([0, axmax], [0, axmax], color='black', linestyle='dashed', linewidth=3)
res = regress2(xs, ys, _method_type_2="reduced major axis")
dummy_xs = np.arange(axmax)
axi.plot(dummy_xs, res["intercept"] + res["slope"] * dummy_xs, color='green', linewidth=3)
cb = fig.colorbar(imi, ax=axs_bottom.ravel().tolist(), orientation='horizontal', ticks=[0, 120], shrink=0.5)
cb.ax.set_xticklabels(['Few pairs', 'Many pairs'])
axi.set_xlabel("RH 98 1 (m)", fontsize=22)
axi.set_ylabel("RH 98 2 (m)", fontsize=22)
textstr = f"Correlation: {glad_rh98_corr:.2f}\nBias: {glad_rh98_bias:.2f}\nSMA: y = {res['intercept']:.2f} + {res['slope']:.2f}x"
props = dict(boxstyle='round', facecolor='white', alpha=0.7)
axi.text(0.05, 0.95, textstr, transform=axi.transAxes, fontsize=22,
        verticalalignment='top', bbox=props)

axi = axs_bottom[1]
xs = glad_df[glad_df["sample_grp"] == "control"].t1_agbd_a0
ys = glad_df[glad_df["sample_grp"] == "control"].t2_agbd_a0
axmax = 500

# increase gridsize for smaller hexagons
imi = axi.hexbin(xs, ys, gridsize=300, cmap='magma', vmin=0, vmax=120, mincnt=6)
axi.axis([0, axmax, 0, axmax])
axi.plot([0, axmax], [0, axmax], color='black', linestyle='dashed', linewidth=3)
res = regress2(xs, ys, _method_type_2="reduced major axis")
dummy_xs = np.arange(axmax)
axi.plot(dummy_xs, res["intercept"] + res["slope"] * dummy_xs, color='green', linewidth=3)
axi.set_xlabel("AGBD 1 (Mg/ha)", fontsize=22)
axi.set_ylabel("AGBD 2 (Mg/ha)", fontsize=22)
textstr = f"Correlation: {glad_agbd_corr:.2f}\nBias: {glad_agbd_bias:.2f}\nSMA: y = {res['intercept']:.2f} + {res['slope']:.2f}x"
props = dict(boxstyle='round', facecolor='white', alpha=0.7)
axi.text(0.05, 0.95, textstr, transform=axi.transAxes, fontsize=22,
        verticalalignment='top', bbox=props)

subfigs[1].suptitle(f"Disturbed forest control ({control_n:,} shot pairs)", fontsize=24)

In [None]:
quantity = "rel_diff"
# Redo the above graph, but color by control group and have metric on the x-axis
agbd_df["Control Group"] = "All pairs"
rh98_df["Control Group"] = "All pairs"
rh50_df["Control Group"] = "All pairs"

glad_df["Control Group"] = "Disturbed forest (GLAD)"
glad_df_agbd = glad_df[glad_df.sample_grp == "control"].copy()[["t1_agbd_a0", "t2_agbd_a0", "Control Group"]]
glad_df_rh98 = glad_df[glad_df.sample_grp == "control"].copy()[["t1_rh_98_a0", "t2_rh_98_a0", "Control Group"]]
glad_df_rh50 = glad_df[glad_df.sample_grp == "control"].copy()[["t1_rh_50_a0", "t2_rh_50_a0", "Control Group"]]
print(len(glad_df_agbd))
print(len(glad_df_rh98))
print(len(glad_df_rh50))

afc_df["Control Group"] = "Disturbed forest (AFC)"
afc_df_agbd = afc_df[afc_df.sample_grp == "control"].copy()[["t1_agbd_a0", "t2_agbd_a0", "Control Group"]]
afc_df_rh98 = afc_df[afc_df.sample_grp == "control"].copy()[["t1_rh_98_a0", "t2_rh_98_a0", "Control Group"]]
afc_df_rh50 = afc_df[afc_df.sample_grp == "control"].copy()[["t1_rh_50_a0", "t2_rh_50_a0", "Control Group"]]
print(len(afc_df_agbd))
print(len(afc_df_rh98))
print(len(afc_df_rh50))

big_agbd_df = pd.concat([agbd_df, glad_df_agbd, afc_df_agbd])
big_rh98_df = pd.concat([rh98_df, glad_df_rh98, afc_df_rh98])
big_rh50_df = pd.concat([rh50_df, glad_df_rh50, afc_df_rh50])

big_agbd_df["rel_diff"] = (big_agbd_df.t2_agbd_a0 - big_agbd_df.t1_agbd_a0) / (big_agbd_df.t1_agbd_a0 + big_agbd_df.t2_agbd_a0)
big_rh98_df["rel_diff"] = (big_rh98_df.t2_rh_98_a0 - big_rh98_df.t1_rh_98_a0) / (big_rh98_df.t1_rh_98_a0 + big_rh98_df.t2_rh_98_a0)
big_rh50_df["rel_diff"] = (big_rh50_df.t2_rh_50_a0 - big_rh50_df.t1_rh_50_a0) / (big_rh50_df.t1_rh_50_a0 + big_rh50_df.t2_rh_50_a0)

big_agbd_df["pct_diff"] = (big_agbd_df.t2_agbd_a0 - big_agbd_df.t1_agbd_a0) / (big_agbd_df.t1_agbd_a0 + 0.1) * 100
big_rh98_df["pct_diff"] = (big_rh98_df.t2_rh_98_a0 - big_rh98_df.t1_rh_98_a0) / (big_rh98_df.t1_rh_98_a0 + 0.1) * 100
big_rh50_df["pct_diff"] = (big_rh50_df.t2_rh_50_a0 - big_rh50_df.t1_rh_50_a0) / (big_rh50_df.t1_rh_50_a0 + 0.1) * 100

big_agbd_df["Metric"] = "AGBD"
big_rh98_df["Metric"] = "RH 98"
big_rh50_df["Metric"] = "RH 50"

big_df = pd.concat([
    big_rh98_df[["Metric", "Control Group", quantity]],
    big_rh50_df[["Metric", "Control Group", quantity]],
    big_agbd_df[["Metric", "Control Group", quantity]],
])

print(big_df)

In [None]:
import seaborn as sns

# dd=pd.melt(big_df, id_vars=['Metric'], value_vars=['All pairs', 'Disturbed forest\n(RADD)', 'Disturbed forest\n(AFC)'], var_name='Group')
fig, axs = plt.subplots(1, 1, figsize=(12, 10))
sns.boxplot(x='Metric',y=quantity, data=big_df, hue='Control Group', showfliers=False)

iqrs = big_df.groupby(['Control Group','Metric']).describe()[quantity][['25%','75%']]
print(iqrs)

# Add labels to the IQR
label_offset = 0.275     # unfortunately this needs to be adjusted manually
for iqr in iqrs.iterrows():
    name, metric = iqr[0]
    if 'RH 98' in metric:
        loc = 0
    if 'RH 50' in metric:
        loc = 1
    if 'AGBD' in metric:
        loc = 2
    if 'All' in name:
        loc -= label_offset
    if 'GLAD' in name:
        loc += 0
    if 'AFC' in name:
        loc += label_offset
    axs.text(loc, iqr[1][0], str(round(iqr[1][0],2)), color='white', 
             bbox=dict(facecolor='black', alpha=0.5, edgecolor='black'), ha='center')
    axs.text(loc, iqr[1][1], str(round(iqr[1][1],2)), color='white', 
             bbox=dict(facecolor='black', alpha=0.5, edgecolor='black'), ha='center')

axs.plot([-0.5, 2.5], [0, 0], color='black', linestyle='dashed', alpha = 0.5, linewidth = 2)
axs.set_xticklabels(axs.get_xticklabels(), fontsize=14)
if quantity == "rel_diff":
    axs.set_ylabel("Relative difference", fontsize=18)
elif quantity == "pct_diff":
    axs.set_ylabel("Percent difference", fontsize=18)
axs.set_xlabel("Metric", fontsize=18)
plt.setp(axs.get_legend().get_texts(), fontsize='16') # for legend text
plt.setp(axs.get_legend().get_title(), fontsize='16') # for legend title