# Optimal Estimation Retrievals

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from db_tools import (read_csv_profiles, read_csv_covariance,
        iter_profiles, split_bands)
from optimal_estimation import (VirtualHATPRO, VirtualHATPRO_Kband,
        VirtualHATPRO_Vband, Gaussian, rgrid, mgrid, z_hatpro, z_top,
        iterate_to_convergence)

from plots import retrieval_template, statistical_eval

%matplotlib inline

plt.rcParams["font.family"] = "DejaVu Sans"

## Statistical evaluation

In [None]:
def split_x(x):
    T = x.iloc[:,:50]
    T.columns = [col[2:] for col in T.columns]
    q = np.exp(x.iloc[:,50:])
    q.columns = [col[4:] for col in q.columns]
    return T, q

In [None]:
T_test = read_csv_profiles("../data/unified/test/T_rasoclim.csv")
q_test = np.exp(read_csv_profiles("../data/unified/test/lnq_rasoclim.csv"))

x_full = read_csv_profiles("../data/unified/retrievals/x_cosmo_cosmo_full.csv")
T_full, q_full = split_x(x_full)
x_zenith = read_csv_profiles("../data/unified/retrievals/x_cosmo_cosmo_zenith.csv")
T_zenith, q_zenith = split_x(x_zenith)

T_reg = read_csv_profiles("../data/unified/retrievals/T_regression.csv")
q_reg = np.exp(read_csv_profiles("../data/unified/retrievals/lnq_regression.csv"))

T_cosmo = read_csv_profiles("../data/unified/priors/T_cosmo7+00+06_mean.csv")
q_cosmo = np.exp(read_csv_profiles("../data/unified/priors/lnq_cosmo7+00+06_mean.csv"))

Convergence statistics

In [None]:
conv_full = read_csv_profiles("../data/unified/retrievals/convergence_cosmo_cosmo_full.csv")
conv_zenith = read_csv_profiles("../data/unified/retrievals/convergence_cosmo_cosmo_zenith.csv")

In [None]:
conv_full.mean()

In [None]:
conv_zenith.mean()

rmse/bias

In [None]:
fig, (axT1, axT2, axq1, axq2) = retrieval_template([8, 7],
        Tlims=[(-0.2, 2.7), (0, 12), (-0.2, 1.3), (0, 2.5)],
        qlims=[(-0.22, 1.05), (0, 12), (-0.22, 1.05), (0, 2.5)],                                                  
        )

for ax in [axT1, axT2]:
    statistical_eval(ax, T_test,
         T_cosmo,
         T_zenith.loc[conv_zenith["converged"] == 1,:],
         T_full.loc[conv_full["converged"] == 1,:],
         labels=["COSMO-7", "zenith only", "all information"],
         colors=["#000000", "#666666", "#1f78b4", "#33a02c"])
#axT2.set_xticks([0.2*i for i in range(0, 7)])
    
for ax in [axq1, axq2]:
    statistical_eval(ax, q_test*1000,
         q_cosmo*1000,
         q_full.loc[conv_full["converged"] == 1,:]*1000,
         labels=["COSMO-7", "all information\n= zenith only"],
         colors=["#000000", "#1f78b4"])
    ax.set_ylabel("")

axT1.legend(loc="upper right", fontsize=11)
axq1.legend(loc="upper right", fontsize=11)
fig.tight_layout()

axT1.set_title("optimal estimation retrievals", loc="left", size=11)
axq1.set_title("bias (dashed) and rmse (solid)", loc="right", size=11)
axq1.legend(loc="upper right", fontsize=11)
fig.tight_layout()
fig.savefig("../tex/figures/retrieval_optest.pdf")

## Case Studies

### Data Preparation

In [None]:
obs_cov = read_csv_covariance("../data/unified/priors/TB_mwrtm_fap_igmk_cov.csv")
obs_cov_k, obs_cov_v = split_bands(obs_cov)

# Add 0.5 K uncorrelated instrument noise
obs_cov = obs_cov + 0.25*np.eye(obs_cov.shape[0])
obs_cov_k = obs_cov_k + 0.25*np.eye(obs_cov_k.shape[0])
obs_cov_v = obs_cov_v + 0.25*np.eye(obs_cov_v.shape[0])

obs_error = Gaussian(np.zeros(obs_cov.shape[0]), obs_cov)
obs_error_k = Gaussian(np.zeros(obs_cov_k.shape[0]), obs_cov_k)
obs_error_v = Gaussian(np.zeros(obs_cov_v.shape[0]), obs_cov_v)

In [None]:
p = read_csv_profiles("../data/unified/test/psfc.csv")

prior_cov = read_csv_covariance("../data/unified/priors/x_cosmo7+00+06_cov.csv")
prior_means = read_csv_profiles("../data/unified/priors/x_cosmo7+00+06_mean.csv")

ys = read_csv_profiles("../data/unified/test/TB_mwrtm.csv")
ys = ys + np.random.normal(0., scale=0.5, size=ys.shape)
ys_k, ys_v = split_bands(ys)

In [None]:
def profiles_to_dict(pattern):
    return dict((k.strftime("%Y-%m-%d %H:%M:%S"), v) for k, v in iter_profiles(pattern))

profiles = profiles_to_dict("../data/unified/test/<VAR>_rasoclim.csv")
regprofiles = profiles_to_dict("../data/unified/retrievals/<VAR>_regression.csv")

In [None]:
def to_state(profile):
    return np.hstack([profile["T"].values, profile["lnq"].values]).reshape(-1, 1)

def to_profile(state):
    T = state[:50]
    lnq = state[50:]
    return pd.DataFrame(np.hstack([T, lnq]), columns=["T", "lnq"], index=["z={}m".format(int(z)) for z in rgrid])

def get_prior(valid):
    return Gaussian(prior_means.loc[valid,:].values.reshape(-1, 1), prior_cov)

### Plots

In [None]:
plotgrid = (rgrid-z_hatpro)/1000

In [None]:
def plot_raso(axT, axq, valid, color="#000000", label="raso", zorder=None):
    profile = profiles[valid]
    axT.plot(profile["T"].values, plotgrid, linewidth=2, color=color, label=label, zorder=zorder)
    axq.plot(np.exp(profile["lnq"].values)*1000, plotgrid, linewidth=2, color=color, label=label, zorder=zorder)

def plot_regression(axT, axq, valid, color="#33a02c", label="regression", zorder=None):
    profile = regprofiles[valid]
    axT.plot(profile["T"].values, plotgrid, linewidth=2, color=color, label=label, zorder=zorder)
    axq.plot(np.exp(profile["lnq"].values)*1000, plotgrid, linewidth=2, color=color, label=label, zorder=zorder)

def plot_prior(axT, axq, valid, color="#666666", label="COSMO-7", zorder=None):
    profile = to_profile(get_prior(valid).mean)
    axT.plot(profile["T"].values, plotgrid, linewidth=2, color=color, label=label, zorder=zorder)
    axq.plot(np.exp(profile["lnq"].values)*1000, plotgrid, linewidth=2, color=color, label=label, zorder=zorder)

def plot_retrieval(axT, axq, ret, iteration, color="#1f78b4", label="optimal estimation", uncertainty=False, zorder=None):
    profile = to_profile(ret.μs[iteration])
    axT.plot(profile["T"].values, plotgrid, linewidth=2, color=color, label=label, zorder=zorder)
    axq.plot(np.exp(profile["lnq"].values)*1000, plotgrid, linewidth=2, color=color, label=label, zorder=zorder)
    if uncertainty:
        cov = ret.covs[iteration]
        std = np.random.multivariate_normal(np.zeros(cov.shape[0]), cov, size=1000).std(axis=0).reshape(-1, 1)
        std = to_profile(std)
        lower, upper = profile-std, profile+std
        axT.fill_betweenx(plotgrid, lower["T"].values, upper["T"].values, color=color, alpha=0.25, zorder=zorder-50)
        axq.fill_betweenx(plotgrid, np.exp(lower["lnq"].values)*1000, np.exp(upper["lnq"].values)*1000, color=color, alpha=0.3, zorder=zorder-50)

### Retrieval Setup

In [None]:
vh = VirtualHATPRO(z_retrieval=rgrid, z_model=mgrid, error=obs_error)
vh_k = VirtualHATPRO_Kband(z_retrieval=rgrid, z_model=mgrid, error=obs_error_k)
vh_v = VirtualHATPRO_Vband(z_retrieval=rgrid, z_model=mgrid, error=obs_error_v)

### Case Study Dates

In [None]:
case1 = "2015-10-28 02:15:05" # Ground-based inversion
case2 = "2015-09-11 03:48:00" # Elevated inversion, cloudy

### Case 1

In [None]:
valid = case1

prior = get_prior(valid)
p0 = float(p.loc[valid,"p"])
y = ys.loc[valid,:].values.reshape(-1, 1)

ret = vh.retrieve(y, p0, prior.mean, prior, iterations=0)
%time converged, best = iterate_to_convergence(ret, debug=True)
converged

In [None]:
ret.γs

In [None]:
fig, (axT1, axT2, axq1, axq2) = retrieval_template(figsize=[8, 7],
        Tlims=[(210, 288), (0, 12), (273, 286), (0, 2.5)],
        qlims=[(0, 5.1), (0, 12), (1.8, 5.1), (0, 2.5)]
        )

plot_raso(axT1, axq1, case1, zorder=-30)
plot_raso(axT2, axq2, case1, zorder=-30)
plot_retrieval(axT1, axq1, ret, best, uncertainty=True, zorder=-40)
plot_retrieval(axT2, axq2, ret, best, uncertainty=True, zorder=-40)
plot_regression(axT1, axq1, case1, zorder=-45)
plot_regression(axT2, axq2, case1, zorder=-45)

axT1.set_ylabel("height above ground [km]")
axT2.set_ylabel("height above ground [km]")
axq1.legend(loc="upper right", fontsize=11)
axT1.set_title("", loc="left", size=11)

fig.tight_layout()

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=[8, 4.2])

ax1.plot(to_profile(ret.μs[0])["T"].values, plotgrid, linewidth=2, color="#BBBBBB", label="COSMO-7", zorder=-60)
ax1.plot(to_profile(ret.μs[3])["T"].values, plotgrid, linewidth=2, color="#999999", label="3 iterations", zorder=-50)
ax1.plot(to_profile(ret.μs[6])["T"].values, plotgrid, linewidth=2, color="#666666", label="6 iterations", zorder=-40)

ax2.plot(to_profile(ret.μs[6])["T"].values, plotgrid, linewidth=2, color="#666666", label="6 iterations", zorder=-40)
ax2.plot(to_profile(ret.μs[9])["T"].values, plotgrid, linewidth=2, color="#333333", label="9 iterations", zorder=-30)
ax2.plot(to_profile(ret.μs[12])["T"].values, plotgrid, linewidth=2, color="#1f78b4", label="12 iterations", zorder=-20)

for ax in [ax1, ax2]:
    ax.plot(profiles[valid]["T"].values, plotgrid, linewidth=2, color="#000000", label="raso", zorder=-70)
    ax.set_xlim(273, 286)
    ax.set_ylim(0, 2.5)
    ax.set_ylabel("height above ground [km]")
    ax.legend(loc="upper right", fontsize=11)

ax1.set_title("optimal estimation iteration states", loc="left", size=11)
ax2.set_title("MWRTM, " + case1, loc="right", size=11)

fig.tight_layout()

### Case 2