### Climatology restricted to DNN training and testing times at bottom

In [None]:
import datetime
import logging
import os
from pathlib import Path

import cartopy.crs as ccrs
import matplotlib.colors as colors
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import xarray
from matplotlib.colors import BoundaryNorm, ListedColormap

%matplotlib inline
from metpy.units import units

# for ratio plots
from scipy.ndimage import gaussian_filter

import G211
import nclcmaps
from ml_functions import get_args, get_glm
from statisticplot import make_map

logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")

In [None]:
rptdist = 20
twin = 1
tmpdir = Path(os.getenv("TMPDIR"))

In [None]:
# map projection
map_crs = G211.g211
makemapkw = dict(projection=map_crs, scale=1.3, draw_labels=False)

test = nclcmaps.colors["MPL_Greys"][6:-25:] + nclcmaps.colors["MPL_Reds"]
cmap = ListedColormap(test, "GreysReds")
n = 23 if rptdist == 40 else 10
norm = BoundaryNorm(range(n), ncolors=cmap.N, clip=True)

### GLM longterm average from beginning of GLM (20180213T17)

In [None]:
GLMnc = f"GLM/longterm_average.glm_{rptdist}km_{twin}hr.nc"
GLM_mean = xarray.open_dataset(GLMnc)
# history attribute contains the NCO command that created this netCDF file
# The NCO command contains a list of files that went into this file.
ifiles = GLM_mean.attrs["nco_input_file_list"].split()

time_range = " - ".join([ifiles[0][:18], ifiles[-1][:18]])
glm_tmin = datetime.datetime.strptime(min(ifiles)[5:18], "%Y%m%d_%H%M")
glm_tmax = datetime.datetime.strptime(max(ifiles)[5:18], "%Y%m%d_%H%M")

fig, ax = make_map(**makemapkw)
GLM_mean = GLM_mean / (twin * units.hour)
sc = GLM_mean["flashes"].plot(
    x="lon",
    y="lat",
    cmap=cmap,
    transform=ccrs.PlateCarree(),
    ax=ax,
    norm=norm,
    cbar_kwargs={"shrink": 0.75},
)
ax.set_title(f"{rptdist}km GLM\n{time_range}")
base, ext = os.path.splitext(GLMnc)
ofile = os.path.realpath(base + ".png")
fig.savefig(ofile)
logging.warning(f"made {ofile}")
del GLM_mean.attrs["nco_input_file_list"]

In [None]:
wxbugnc = f"wbug_lightning/flash.{rptdist}km_30min.nc"
logging.info(f"open wbug {wxbugnc}")
wb = xarray.open_dataset(wxbugnc)

dt = wb.time_coverage_start.diff(dim="time_coverage_start").median()
assert dt.dt.seconds == 1800

# Should match ml_functions.load_df()
# mean of 30-minute lightning blocks in time window, times twin * 2
logging.info(f"sum weatherbug {rptdist}km flashes in {twin}hr time window")

wb_cnt = (
    wb.resample(time_coverage_start="30min")
    .first()
    .rolling(
        dim={"time_coverage_start": twin * 2},
        min_periods=twin,
        center=True,
    )
    .mean()
    .rename({"time_coverage_start": "valid_time"})
    * twin
    * 2
)

wb_cnt_slice = wb_cnt.sel(
    valid_time=slice(glm_tmin, glm_tmax)
)  # same time slice as GLM
fmt = "%Y%m%d %H:%M"
tmin = wb_cnt_slice.valid_time.min().dt.strftime(fmt)
tmax = wb_cnt_slice.valid_time.max().dt.strftime(fmt)

title = f"[{tmin.data}, {tmax.data}]"

logging.info(f"average {wb_cnt_slice.valid_time.size} times {title}")
wb_mean = wb_cnt_slice.mean(dim="valid_time")
logging.info("done")

In [None]:
fig, ax = make_map(**makemapkw)
sc = (wb_mean.cg + wb_mean.ic).plot(
    x="lon",
    y="lat",
    cmap=cmap,
    transform=ccrs.PlateCarree(),
    norm=norm,
    cbar_kwargs={"shrink": 0.75},
)
ax.set_title(f"WxBug cg+ic {title}")

base, ext = os.path.splitext(GLMnc)
ofile = f"{base}.png"
fig.savefig(ofile)
logging.warning(f"made {ofile}")

In [None]:
# Merge GLM and WxBug Datasets (long term averages)
# lat/lons are a little different (within machine precision)
ds = glm.squeeze().drop_vars("time").merge(wb_mean, compat="override")

In [None]:
def ratio_plot(r, title, ofile):
    levels = [10, 50, 75, 90, 110, 133, 200, 1000]
    sigma = 2.5
    fig, ax = make_map(**makemapkw)
    CS = r.plot(
        x="lon",
        y="lat",
        transform=ccrs.PlateCarree(),
        ax=ax,
        norm=colors.LogNorm(vmin=min(levels), vmax=max(levels)),
        cmap="RdBu_r",
        cbar_kwargs={"shrink": 0.75},
    )
    r.data = gaussian_filter(r.data, sigma)
    CS = r.plot.contour(
        x="lon",
        y="lat",
        transform=ccrs.PlateCarree(),
        levels=levels,
        colors="0.2",
        ax=ax,
    )
    ax.clabel(CS)
    ax.set_title(title)
    fig.savefig(ofile)
    logging.warning(f"made {ofile}")

In [None]:
r = ds.flashes / (ds.cg + ds.ic)
ratio_plot(
    r * 100, "ratio: GLM / WxBug (cg+ic) total flashes", base + ".GLMWxBugratio.png"
)

In [None]:
r = ds.flashes / ds.ic
ratio_plot(r * 100, "ratio: GLM / WxBug IC flashes", base + ".GLMICratio.png")

In [None]:
fig, ax = plt.subplots()
(ds.cg + ds.ic).plot.hist(
    ax=ax,
    yscale="log",
    bins=range(5, 210, 10),
    xlim=(5, 200),
    label="WxBug cg + ic)",
)
ds.flashes.plot.hist(
    ax=ax, yscale="log", bins=range(5, 210, 10), xlim=(5, 200), label="GLM"
)
ax.legend()
ax.grid()

In [None]:
time_space_window = (twin, rptdist)
glm_twin = get_glm(time_space_window, start=glm_tmin, end=glm_tmax)

In [None]:
fig, ax = plt.subplots()
(wb_cnt_slice.cg + wb_cnt_slice.ic).plot.hist(
    ax=ax,
    yscale="log",
    bins=range(5, 210, 10),
    xlim=(5, 200),
    label="WxBug ic)",
)
glm_twin[var].plot.hist(
    ax=ax, yscale="log", bins=range(5, 210, 10), xlim=(5, 200), label="GLM"
)
ax.legend()
ax.grid()

In [None]:
o_thresh = 1
norm = BoundaryNorm(np.arange(0, 10.0, 0.5), ncolors=cmap.N, clip=True)

fig, ax = make_map(**makemapkw)
var = f"flashes_{rptdist}km_{twin}hr"
base_rate_glm = ((glm_twin[var] >= o_thresh) * 100).mean(dim="valid_time")
base_rate_glm.name = "base rate [%]"
base_rate_glm.plot(
    x="lon",
    y="lat",
    ax=ax,
    cmap=cmap,
    norm=norm,
    transform=ccrs.PlateCarree(),
    cbar_kwargs={"shrink": 0.75},
)
ax.set_title(
    f"{o_thresh}+ GLM {var.replace(f'_{rptdist}km',' per grid box').replace('_',' per ')}"
)
ofile = f"{o_thresh}+{var}.png"
fig.savefig(ofile)
logging.warning(f"made {ofile}")

In [None]:
fig, ax = make_map(**makemapkw)
base_rate_wb = (( wb_cnt_slice.cg + wb_cnt_slice.ic >= o_thresh) * 100).mean(dim="valid_time")
base_rate_wb.name = "base rate [%]"
base_rate_wb.plot(
    x="lon",
    y="lat",
    ax=ax,
    cmap=cmap,
    norm=norm,
    transform=ccrs.PlateCarree(),
    cbar_kwargs={"shrink": 0.75},
)
ax.set_title(
    f"{o_thresh}+ total WxBug (cg+ic) {var.replace(f'_{rptdist}km',' per grid box').replace('_',' per ')}"
)
ofile = f"{o_thresh}+cg.ic_{rptdist}km_{twin}hr" + ".png"
fig.savefig(ofile)
logging.warning(f"made {ofile}")

In [None]:
# .values to prevent lat and lon coordinates from going
# away when dividing (they're slightly different)
r = base_rate_glm / base_rate_wb.values
r.name = "ratio [%]"
ratio_plot(
    r * 100,
    f"{o_thresh}+{var} base rate ratio: GLM / WxBug",
    f"{o_thresh}+{var}.GLM-WxBugratio.png",
)

## Same time range as DNN train and test

In [None]:
import matplotlib.dates as mdates
import matplotlib.ticker as ticker

args = get_args(o_thresh, twin)
fig, ax = plt.subplots(figsize=(10, 0.4))
ax.plot()
# Major ticks every half year, minor ticks every month,
ax.xaxis.set_major_locator(mdates.MonthLocator(bymonth=(1, 7)))
ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m"))
ax.xaxis.set_minor_locator(mdates.MonthLocator())
ax.yaxis.set_major_locator(ticker.NullLocator())
lw = 14
ax.hlines(0, args.trainstart, args.trainend, label="train", lw=lw)
ax.hlines(0, args.teststart, args.testend, label="test", color="orange", lw=lw)
ax.legend(loc="upper left", bbox_to_anchor=(1.03, 1.0))
ax.set_title(
    f"[{args.trainstart:%Y%m%d %-H UTC}, {args.trainend:%Y%m%d %-H UTC})  [{args.teststart:%Y%m%d %-H UTC}, {args.testend:%Y%m%d %-H UTC})"
)
ax.grid()

In [None]:
glm20 = get_glm(
    (twin, rptdist), start=args.trainstart, end=args.testend - pd.Timedelta(minutes=30)
)

In [None]:
var = f"flashes_{rptdist}km_{twin}hr"
base_rate_glm = ((glm20[var] >= o_thresh) * 100).mean(dim="valid_time")
base_rate_glm.name = "base rate [%]"

fig, ax = make_map(**makemapkw)
base_rate_glm.plot(
    x="lon",
    y="lat",
    ax=ax,
    cmap=cmap,
    norm=norm,
    transform=ccrs.PlateCarree(),
    cbar_kwargs={"shrink": 0.75},
)
ax.set_title(
    f"base rate {o_thresh}+ GLM {var.replace(f'_{rptdist}km',' per grid box').replace('_',' per ')}"
    f"\n{glm20.valid_time.min().dt.strftime('%Y%m%d %-H UTC').data} - {glm20.valid_time.max().dt.strftime('%Y%m%d %_H UTC').data}"
)
ofile = f"baserate_{o_thresh}+{var}.png"
fig.savefig(ofile)
logging.warning(f"made {ofile}")

In [None]:
wb_cnt_slice = wb_cnt.sel(valid_time=slice(args.trainstart, args.testend - pd.Timedelta(minutes=30)))

fig, ax = make_map(**makemapkw)
base_rate_wb = ((wb_cnt_slice.cg + wb_cnt_slice.ic >= o_thresh) * 100).mean(dim="valid_time")
base_rate_wb.name = "base rate [%]"

base_rate_wb.plot(
    x="lon",
    y="lat",
    ax=ax,
    cmap=cmap,
    norm=norm,
    transform=ccrs.PlateCarree(),
    cbar_kwargs={"shrink": 0.75},
)
ax.set_title(
    f"base rate {o_thresh}+ WxBug (cg+ic) {var.replace(f'_{rptdist}km',' per grid box').replace('_',' per ')}"
    f"\n{wb_cnt_slice.valid_time.min().dt.strftime('%Y%m%d %-H UTC').data} - {wb_cnt_slice.valid_time.max().dt.strftime('%Y%m%d %_H UTC').data}"
)
ofile = f"baserate_{o_thresh}+cg.ic_{rptdist}km_{twin}hr" + ".png"
fig.savefig(ofile)
logging.warning(f"made {ofile}")