In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import datetime as dt

from db_tools import read_csv_covariance, read_csv_profiles
from optimal_estimation import zgrid

%matplotlib inline

## COSMO7

- Interpolate to radiosonde launch time
- Remove bias wrt to radiosonde
- Save bias-corrected values and covariance matrix of errors

In [None]:
def interpolate(target, dfs, hourbounds=(0, 6)):
    lower = min(min(df.index) for df in dfs)
    upper = max(max(df.index) for df in dfs)
    idx = pd.Index([
            d for d in target.index
                    if lower <= d <= upper
                    and hourbounds[0] <= d.hour <= hourbounds[1]
            ], name="valid")
    out = pd.DataFrame(index=idx.union_many(df.index for df in dfs), columns=target.columns, dtype=float)
    for df in dfs:
        out.ix[df.index] = df
    return idx, out.interpolate(method="time", axis=0).ix[idx]

In [None]:
def cosmo_prior(raso, dfl, dfh):
    idx, itp = interpolate(raso, [dfl, dfh])
    diff = raso.ix[idx] - itp
    return itp + diff.mean(), diff.cov()

### Temperature Prior

In [None]:
Traso = read_csv_profiles("../data/unified/T_raso.csv")
Tcosmo00 = read_csv_profiles("../data/unified/T_cosmo7+00.csv")
Tcosmo06 = read_csv_profiles("../data/unified/T_cosmo7+06.csv")
Tcosmo24 = read_csv_profiles("../data/unified/T_cosmo7+24.csv")
Tcosmo30 = read_csv_profiles("../data/unified/T_cosmo7+30.csv")

means0006, cov0006 = cosmo_prior(Traso, Tcosmo00, Tcosmo06)
means2430, cov2430 = cosmo_prior(Traso, Tcosmo24, Tcosmo30)

means0006.to_csv("../data/unified/priors/T_cosmo7+00+06_mean.csv")
means2430.to_csv("../data/unified/priors/T_cosmo7+24+30_mean.csv")
cov0006.to_csv("../data/unified/priors/T_cosmo7+00+06_cov.csv")
cov2430.to_csv("../data/unified/priors/T_cosmo7+24+30_cov.csv")

### Humidity Prior

In [None]:
qraso = read_csv_profiles("../data/unified/qvap_raso.csv")
qcosmo00 = read_csv_profiles("../data/unified/qvap_cosmo7+00.csv")
qcosmo06 = read_csv_profiles("../data/unified/qvap_cosmo7+06.csv")
qcosmo24 = read_csv_profiles("../data/unified/qvap_cosmo7+24.csv")
qcosmo30 = read_csv_profiles("../data/unified/qvap_cosmo7+30.csv")

qraso += read_csv_profiles("../data/unified/qliq_raso.csv")
qcosmo00 += read_csv_profiles("../data/unified/qliq_cosmo7+00.csv")
qcosmo06 += read_csv_profiles("../data/unified/qliq_cosmo7+06.csv")
qcosmo24 += read_csv_profiles("../data/unified/qliq_cosmo7+24.csv")
qcosmo30 += read_csv_profiles("../data/unified/qliq_cosmo7+30.csv")

means0006, cov0006 = cosmo_prior(np.log(qraso), np.log(qcosmo00), np.log(qcosmo06))
means2430, cov2430 = cosmo_prior(np.log(qraso), np.log(qcosmo24), np.log(qcosmo30))

means0006.to_csv("../data/unified/priors/lnq_cosmo7+00+06_mean.csv")
means2430.to_csv("../data/unified/priors/lnq_cosmo7+24+30_mean.csv")
cov0006.to_csv("../data/unified/priors/lnq_cosmo7+00+06_cov.csv")
cov2430.to_csv("../data/unified/priors/lnq_cosmo7+24+30_cov.csv")

### State Vector Prior

In [None]:
xraso = pd.concat([Traso.add_prefix("T_"), np.log(qraso).add_prefix("lnq_")], axis=1)
xcosmo00 = pd.concat([Tcosmo00.add_prefix("T_"), np.log(qcosmo00).add_prefix("lnq_")], axis=1)
xcosmo06 = pd.concat([Tcosmo06.add_prefix("T_"), np.log(qcosmo06).add_prefix("lnq_")], axis=1)
xcosmo24 = pd.concat([Tcosmo24.add_prefix("T_"), np.log(qcosmo24).add_prefix("lnq_")], axis=1)
xcosmo30 = pd.concat([Tcosmo30.add_prefix("T_"), np.log(qcosmo30).add_prefix("lnq_")], axis=1)

means0006, cov0006 = cosmo_prior(xraso, xcosmo00, xcosmo06)
means2430, cov2430 = cosmo_prior(xraso, xcosmo24, xcosmo30)

means0006.to_csv("../data/unified/priors/x_cosmo7+00+06_mean.csv")
means2430.to_csv("../data/unified/priors/x_cosmo7+24+30_mean.csv")
cov0006.to_csv("../data/unified/priors/x_cosmo7+00+06_cov.csv")
cov2430.to_csv("../data/unified/priors/x_cosmo7+24+30_cov.csv")

## Radiosonde Climatology

First, separate test and training data. Climatology prior is then only computed from the training data.

In [None]:
def intestdata(d):
    return dt.datetime(2015, 2, 1) < d < dt.datetime(2016, 1, 31)

### Temperature

In [None]:
raso = read_csv_profiles("../data/unified/T_raso.csv")
clim = raso.ix[[v for v in raso.index if not intestdata(v)]]
test = raso.ix[[v for v in raso.index if intestdata(v)]]
clim.to_csv("../data/unified/training/T_rasoclim.csv")
test.to_csv("../data/unified/test/T_rasoclim.csv")

In [None]:
pd.DataFrame(clim.mean(), columns=["T"]).to_csv("../data/unified/priors/T_rasoclim_mean.csv")
clim.cov().to_csv("../data/unified/priors/T_rasoclim_cov.csv")

### Humidity

In [None]:
raso = read_csv_profiles("../data/unified/qvap_raso.csv")
raso += read_csv_profiles("../data/unified/qliq_raso.csv")
raso = np.log(raso)
clim = raso.ix[[v for v in raso.index if not intestdata(v)]]
test = raso.ix[[v for v in raso.index if intestdata(v)]]
clim.to_csv("../data/unified/training/lnq_rasoclim.csv")
test.to_csv("../data/unified/test/lnq_rasoclim.csv")

In [None]:
pd.DataFrame(clim.mean(), columns=["lnq"]).to_csv("../data/unified/priors/lnq_rasoclim_mean.csv")
clim.cov().to_csv("../data/unified/priors/lnq_rasoclim_cov.csv")

### Cloudy cases

Mark cloudy profiles.

In [None]:
raso = read_csv_profiles("../data/unified/qliq_raso.csv")
clim = raso.ix[[v for v in raso.index if not intestdata(v)]]
test = raso.ix[[v for v in raso.index if intestdata(v)]]
(clim.sum(axis=1) > 0).rename("cloudy").to_frame().to_csv("../data/unified/training/cloudy_raso.csv")
(test.sum(axis=1) > 0).rename("cloudy").to_frame().to_csv("../data/unified/test/cloudy_raso.csv")

In [None]:
igmk = read_csv_profiles("../data/unified/cloudy_igmk.csv")
igmk = igmk.reindex(clim.index, method="nearest", tolerance=dt.timedelta(minutes=60)).dropna()
igmk = igmk.drop(dt.datetime(1999, 11, 18, 22, 51)) # foreshadowing...
igmk = igmk.drop(dt.datetime(2012, 2, 14, 3, 37)) # foreshadowing...
igmk.to_csv("../data/unified/training/cloudy_igmk.csv")

## Radiative Transfer Model

Separate training and test datasets. Then use only clear-sky cases for determination of covariance.

In [None]:
mwrtm = read_csv_profiles("../data/unified/bt_mwrtm_3000_fap_hr.csv").drop(["T", "qvap"], axis=1)
mwrtm_train = mwrtm.drop("p", axis=1).ix[[v for v in mwrtm.index if not intestdata(v)]]
mwrtm_test = mwrtm.drop("p", axis=1).ix[[v for v in mwrtm.index if intestdata(v)]]
mwrtm_train.to_csv("../data/unified/training/TB_mwrtm.csv")
mwrtm_test.to_csv("../data/unified/test/TB_mwrtm.csv")

psfc = mwrtm[["p"]]
psfc.ix[[v for v in psfc.index if not intestdata(v)]].to_csv("../data/unified/training/psfc.csv")
psfc.ix[[v for v in psfc.index if intestdata(v)]].to_csv("../data/unified/test/psfc.csv")

monortm = read_csv_profiles("../data/unified/bt_monortm_hr.csv").drop(["p", "T", "qvap"], axis=1)
monortm_train = monortm.ix[[v for v in monortm.index if not intestdata(v)]]
monortm_test = monortm.ix[[v for v in monortm.index if intestdata(v)]]
monortm_train.to_csv("../data/unified/training/TB_monortm.csv")
monortm_test.to_csv("../data/unified/test/TB_monortm.csv")

igmk = read_csv_profiles("../data/unified/bt_igmk.csv").drop(["p", "T", "qvap"], axis=1)
igmk = igmk[igmk>0].reindex(mwrtm.index, method="nearest", tolerance=dt.timedelta(minutes=60)).dropna()
igmk_train = igmk.ix[[v for v in igmk.index if not intestdata(v)]]
igmk_test = igmk.ix[[v for v in igmk.index if intestdata(v)]]
igmk.to_csv("../data/unified/training/TB_igmk.csv")
igmk.to_csv("../data/unified/test/TB_igmk.csv")

In [None]:
cloudy_raso = read_csv_profiles("../data/unified/training/cloudy_raso.csv")["cloudy"]
cloudy_igmk = read_csv_profiles("../data/unified/training/cloudy_igmk.csv")["cloudy"][igmk.index]

Determine all kinds of covariance matrices for clear-skies

In [None]:
cov = (mwrtm_train[~cloudy_raso][monortm_train.columns] - monortm_train[~cloudy_raso]).dropna(axis=0).cov()
cov.to_csv("../data/unified/priors/TB_mwrtm_monortm_cov.csv")

cov = (mwrtm_train[~cloudy_raso] - igmk_train[~cloudy_igmk]).dropna(axis=0).cov()
cov.to_csv("../data/unified/priors/TB_mwrtm_igmk_cov.csv")

cov = (igmk_train[~cloudy_igmk][monortm_train.columns] - monortm_train[~cloudy_raso]).dropna(axis=0).cov()
cov.to_csv("../data/unified/priors/TB_igmk_monortm_cov.csv")

Missing: FAP, HR/LR terms