In [None]:
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
plotwidth=30

In [None]:
from WwDec.main import *

import matplotlib.pyplot as plt
import seaborn as sns

# Globals

In [None]:
# Source of inspiration from covariatns, see:
# https://github.com/hodcroftlab/covariants/blob/master/web/data/clusters.json
#
# Keep in sync with covspectrum, see:
# https://github.com/cevo-public/cov-spectrum-website/blob/develop/src/models/wasteWater/constants.ts
color_map = {
  'B.1.1.7': '#D16666',
  'B.1.351': '#FF6665',
  'P.1': '#FFB3B3',
  'B.1.617.1': '#66C265',
  'B.1.617.2': '#66A366',
  'BA.1': '#A366A3',
  'BA.2': '#cfafcf',
  'BA.4': '#8a66ff',
  'BA.5': '#585eff',
  'BA.2.12.1': '#0400e0',
  'BA.2.75': '#008fe0',
  'BA.2.75.2': '#208fe0', # improv
  'BQ.1.1': '#8fe000', # improv
  'undetermined': '#969696',
}

In [None]:
# Overwrite globals set by WwDec.main:
# temporary, globals
tally_data = "./work-vp-test/variants/tallymut.tsv.zst" # zst needs python's Zstandard # "./tallymut_line.tsv"
out_dir = (
    "./out"
)
variants_list = [
    "B.1.1.7",
    "B.1.351",
    "P.1",
    "B.1.617.2",
    "B.1.617.1",
    "BA.1",
    "BA.2",
    "BA.4",
    "BA.5",
    "BA.2.75",
    #"BA.2.75.2",
    "BQ.1.1",
]
variants_pangolin = {
    "al": "B.1.1.7",
    "be": "B.1.351",
    "ga": "P.1",
    "C36": "C.36.3",
    "ka": "B.1.617.1",
    "de": "B.1.617.2",
    "AY42": "AY.4.2",
    "B16173": "B.1.617.3",
    "om1": "BA.1",
    "om2": "BA.2",
    "om4": "BA.4",
    "om5": "BA.5",
    "om275": "BA.2.75",
    "om2752": "BA.2.75.2",
    "ombq11": "BQ.1.1",
    "om2121": "BA.2.12.1",
}
variants_not_reported = [
    "BA.2.75.2",
    "BA.2.12.1",
    "phe-BA.1",
    "phe-BA.2",
    "C.36.3",
    "B.1.617.3",
    "AY.4.2",
    "mu",
    "d614g",
]
start_date = "2020-12-08"
to_drop = ["subset", "shared"]
cities_list = [
    "Lugano (TI)",
    "Zürich (ZH)",
    "Chur (GR)",
    "Altenrhein (SG)",
    "Laupen (BE)",
    "Genève (GE)",
    "Basel (catchment area ARA Basel)",
#    "Lausanne (VD)",
#    "Kanton Zürich",
]


In [None]:
import os
# local settings for this notebook
rename_variants = { } # 'omi-BA.2': 'BA.2'}
datadir = '.'

# Outputs
plots_dir='deconv_plots'
if not os.path.isdir(plots_dir):
    try:
        os.mkdir(plots_dir, mode=0o775)
    except FileExistsError:
        pass
update_data_lin_file = os.path.join(datadir, 'ww_update_data_smooth_kernel_lin.json')
update_data_rob_file = os.path.join(datadir, 'ww_update_data_smooth_kernel_rob.json')

# Load and preprocess data

In [None]:
df_tally = pd.read_csv(tally_data, sep="\t")#.drop(columns=['proto'])
df_tally.head()

In [None]:
set(df_tally.columns) - set(variants_pangolin.keys()) - {'base','batch','cov','date','frac','gene','plantcode','plantname','pos','proto','sample','var'}

In [None]:
preproc = DataPreprocesser(df_tally)
preproc = preproc.general_preprocess(
    variants_list=variants_list,
    variants_pangolin=variants_pangolin,
    variants_not_reported=variants_not_reported,
    to_drop=["subset"],
    start_date=start_date,
    remove_deletions=True,
)
t_df_tally = preproc.df_tally
# split into v41 and not v41, filter mutations and join
df_tally_v41 = preproc.df_tally[preproc.df_tally.proto == 'v41'] 
print(df_tally_v41.shape)
preproc.df_tally = preproc.df_tally[preproc.df_tally.proto != 'v41'] 
preproc = preproc.filter_mutations()
print(preproc.df_tally.shape)

preproc.df_tally = pd.concat([preproc.df_tally,df_tally_v41])
print(preproc.df_tally.shape)

In [None]:
for name in cities_list: #['Lugano (TI)', 'Zürich (ZH)', 'Chur (GR)', 'Altenrhein (SG)', 'Laupen (BE)', 'Genève (GE)']:
    print(df_tally[(df_tally.location==name) & (df_tally.proto=="v41")].date.max(), name)

# Look at design of mutations

In [None]:
des_matrix = preproc.df_tally[variants_list + ["undetermined", "mutations"]].drop_duplicates("mutations").set_index("mutations")
des_matrix_mut = des_matrix[~des_matrix.index.str.startswith("-")]
des_matrix_wt = des_matrix[des_matrix.index.str.startswith("-")]


In [None]:
plt.figure(figsize=(plotwidth,plotwidth/4)) # 25,6
sns.heatmap(des_matrix.T, square=True)

In [None]:
# np.linalg.cond(des_matrix_mut.drop('undetermined', axis=1))
print(np.linalg.cond(des_matrix))
print(np.linalg.cond(des_matrix[["BA.1", "BA.2", "BA.4", "BA.5"]]))
print(np.linalg.cond(des_matrix[[i for i in des_matrix.columns if i not in ["BA.1", "BA.2", "BA.4", "BA.5", "undetermined"]]]))


In [None]:
fig, axes = plt.subplots(1,3, figsize=(20,5))

common_mut = des_matrix_mut.T.dot(des_matrix_mut)
sns.heatmap(common_mut, square=True, cmap="viridis", annot=common_mut, ax=axes[0])
axes[0].set_title("common mutations")

corr_mut = (des_matrix_mut).corr()
sns.heatmap(corr_mut, square=True, cmap="viridis", annot=corr_mut, ax=axes[1], fmt=".1g")
axes[1].set_title("correlation")

from sklearn.metrics.pairwise import pairwise_distances
jac_sim = 1 - pairwise_distances(des_matrix_mut.T, metric = "hamming")
jac_sim = pd.DataFrame(jac_sim, index=des_matrix_mut.columns, columns=des_matrix_mut.columns)
sns.heatmap(jac_sim, square=True, cmap="viridis", annot=jac_sim, ax=axes[2])
axes[2].set_title("jaccard similarity ((A∩B)/(A∪B))")

fig.show()

In [None]:
preproc.df_tally[preproc.df_tally.proto == "v41"].date.min()

In [None]:
all_conds_df = []
for proto in preproc.df_tally.proto.unique(): 

    t_df_tally_zh = preproc.df_tally[preproc.df_tally.location == "Zürich (ZH)"]
    t_df_tally_zh = t_df_tally_zh[t_df_tally_zh.proto == proto]

    conds = []
    for date in  t_df_tally_zh.date.unique():
        des_matrix = t_df_tally_zh[
            (t_df_tally_zh.date == date) & 
            (t_df_tally_zh["cov"] >= 5)][variants_list + ["undetermined", "mutations"]].drop_duplicates("mutations").set_index("mutations")
        des_matrix_mut = des_matrix[~des_matrix.index.str.startswith("-")]
        des_matrix_wt = des_matrix[des_matrix.index.str.startswith("-")]
        
        jac_sim = 1 - pairwise_distances(des_matrix_mut[["BA.1", "BA.2", "BA.4", "BA.5"]].T, metric = "hamming")
        jac_sim = pd.DataFrame(jac_sim)
        jac_arr = jac_sim.values
        np.fill_diagonal(jac_arr, np.nan)
        maxjac = np.nanmax(jac_arr)
        
        corr_mut = (des_matrix_mut).corr()
        corr_arr = corr_mut.values
        np.fill_diagonal(corr_arr, np.nan)
        maxcorr = np.nanmax(corr_arr)

        
        conds.append({"n_mut":des_matrix_mut.shape[0],
                      "cond_number":np.linalg.cond(des_matrix),
                      "cond_number_omicron":np.linalg.cond(des_matrix[["BA.1", "BA.2", "BA.4", "BA.5"]]), 
                      "max_jac":maxjac, 
                      "max_corr":maxcorr
                     })
        
        
    conds_df = pd.DataFrame(
        conds,
        index=t_df_tally_zh.date.unique()
    )
    conds_df["proto"] = proto
    all_conds_df.append(conds_df)
    # print(np.linalg.cond(des_matrix_mut.drop('undetermined', axis=1)))



In [None]:
t_df_tally_zh["proto"].unique()

In [None]:
fig, axes = plt.subplots(1,3,figsize=(18,5))

all_conds_df_conc = pd.concat(all_conds_df)
all_conds_df_conc = all_conds_df_conc.reset_index()
all_conds_df_conc
g = sns.lineplot(
    x=all_conds_df_conc["index"],
    y=all_conds_df_conc["cond_number_omicron"], 
    hue = all_conds_df_conc["proto"], 
    ax=axes[0]
)
axes[0].set_title("condition number")

g.set_ylim(bottom=5, top=20)
g.set_xlim(left=np.datetime64("2021-12-01"))
g.set_yscale("log")


i = sns.lineplot(
    x=all_conds_df_conc["index"],
    y=all_conds_df_conc["max_corr"], 
    hue = all_conds_df_conc["proto"], 
    ax=axes[1]
)
# h.set_ylim(top=20)
i.set_xlim(left=np.datetime64("2021-12-01"))
axes[1].set_title("max correlation")


h = sns.lineplot(
    x=all_conds_df_conc["index"],
    y=all_conds_df_conc["max_jac"], 
    hue = all_conds_df_conc["proto"], 
    ax=axes[2]
)
# h.set_ylim(top=20)
h.set_xlim(left=np.datetime64("2021-12-01"))
axes[2].set_title("max jaccard sim")



# sns.lineplot(x=conds_df.index, y=conds_df[2])

In [None]:
t_df_tally_zh

# Do piecewise deconvolution with diff variant, bootstrap

In [None]:
import yaml
with open(r'./WwDec/config.yaml', 'r') as file:
    conf_yaml = yaml.load(file,  Loader=yaml.FullLoader)

In [None]:
def resample_mutations(df_city1, mutations):
    """
    Function to resample mutations by replacement (preserving mutation-complement pairs). 
    Returns a copy of the DataFrame with <resample_value> column indicating how many times the mutation was in the resample.
    """

    # resample indices of mutations with replacement (warning: high is one above actual high!
    rand_idcs = np.random.randint(0, high=int(len(mutations)/2), size=int(len(mutations)/2))
    # for all mutations, count how many times they appear in the resample (0, 1, 2 ...)
    resamples_counts = np.bincount(rand_idcs, minlength=int(len(mutations)/2))
    # make a dict of {mutation : occurences in the resample} pairs
    resample_coeff_dict = dict(zip(mutations, np.concatenate([resamples_counts, resamples_counts])))
    # make a column with coefficients for how many times a row should be accounted for according to the resample
    df_sampled = df_city1.copy()
    df_sampled.loc[:,"resample_value"] = df_sampled.mutations.map(resample_coeff_dict)
    
    return df_sampled, rand_idcs

## Do it with linear reg / MSE error

In [None]:
%%time

np.random.seed(42)
n_boot = 100
linear_deconv2 = []

for city in tqdm(cities_list):
    print(city)
    temp_df = preproc.df_tally[preproc.df_tally["location"] == city]
    for boot in tqdm(range(n_boot)):
        temp_dfb = resample_mutations(temp_df, temp_df.mutations.unique())[0]
        for idx, mindate in enumerate(list(conf_yaml["var_dates"].keys())):
            temp_df2 = temp_dfb[temp_dfb.date >= mindate]
            if idx < len(conf_yaml["var_dates"]) - 1: 
                temp_df2 = temp_df2[temp_df2.date < list(conf_yaml["var_dates"].keys())[idx+1]]
            if temp_df2.size == 0:
                continue

            t_kdec = KernelDeconv(
                temp_df2[conf_yaml["var_dates"][mindate] + ["undetermined"]],
                temp_df2["frac"],
                temp_df2["date"],
                weights=temp_df2["resample_value"],
                kernel=GaussianKernel(10),
                reg=NnlsReg(),
                confint=NullConfint()
            )
            t_kdec = t_kdec.deconv_all()
            res = t_kdec.renormalize().fitted
            res["city"] = city
            linear_deconv2.append(res)
linear_deconv2_df = pd.concat(linear_deconv2)
linear_deconv2_df = linear_deconv2_df.fillna(0)

# backup data
linear_deconv2_df.to_csv("linear_deconv2.csv")

### aggregate results

In [None]:
linear_deconv2_df = pd.concat(linear_deconv2)
linear_deconv2_df = linear_deconv2_df.fillna(0)

linear_deconv2_df_flat = linear_deconv2_df.melt(
    id_vars="city",
    value_vars=[i for i in variants_list if i in linear_deconv2_df.columns] + ["undetermined"],
    var_name="variant",
    value_name="frac",
    ignore_index=False,
)

linear_deconv2_df_agg = linear_deconv2_df_flat.reset_index().groupby(["city", "index", "variant"]).agg(
    [np.mean,
     lambda x: np.quantile(x, q=0.025),
     lambda x: np.quantile(x, q=0.975)]
).reset_index()
linear_deconv2_df_agg.head()

### plot

In [None]:
fig, axes = plt.subplots(nrows=3, ncols=3, figsize=(plotwidth, plotwidth/2), sharex=True)
axes = axes.flatten()

for i, city in enumerate(linear_deconv2_df.city.unique()):
    axes[i].set_title(city)
    
    for var in linear_deconv2_df_agg["variant"].unique():
        tt_df = linear_deconv2_df_agg[(linear_deconv2_df_agg["variant"] == var) & (linear_deconv2_df_agg["city"] == city)].reset_index()
        g = sns.lineplot(
            x=tt_df["index"], 
            y=tt_df["frac"]["mean"], 
            hue=tt_df["variant"],
            ax = axes[i], 
            palette = color_map
        )
        g.get_legend().remove()
        axes[i].fill_between(
            x=tt_df["index"], 
            y1=tt_df["frac"]["<lambda_0>"], 
            y2=tt_df["frac"]["<lambda_1>"],
            alpha = 0.2,
            #color="grey"
            color=color_map[var],
        )
handles, labels = axes[i].get_legend_handles_labels()
fig.legend(handles, labels, loc='lower center', ncol=len(labels), bbox_to_anchor=(0.5,0.05))
fig.suptitle(f'Gaussian Kernel Deconvolution ($k=10$)')
# plt.savefig(os.path.join(plots_dir, f"combined-linear.pdf"))

### covSPECTRUM export

In [None]:
update_data={ }

cities_uniq = linear_deconv2_df.city.unique()
var_uniq = linear_deconv2_df_agg["variant"].unique()

export_columns = {
    ('index',''):"date",
    ('frac','mean'):"proportion",
    ('frac','<lambda_0>'):"proportionLower",
    ('frac','<lambda_1>'):"proportionUpper",
}

for var in tqdm(var_uniq, desc='Variants', position=0):
    update_data[rename_variants.get(var, var)] = { }
    for city in tqdm(cities_uniq, desc=var, position=1, leave=False):
        tt_df = linear_deconv2_df_agg.loc[(linear_deconv2_df_agg["variant"] == var) & (linear_deconv2_df_agg["city"] == city),export_columns.keys()].copy()
        tt_df.columns = export_columns.values()
        tt_df["date"] = tt_df["date"].astype("str")

        update_data[rename_variants.get(var, var)][city] = {
            "timeseriesSummary": [dict(tt_df.iloc[i,]) for i in range(tt_df.shape[0]) ]
        }

import json
with open(update_data_lin_file, 'w') as file:
     file.write(json.dumps(update_data))

## With Robust reg

### Do just one to try

In [None]:
%%time

np.random.seed(42)
robust_deconv_short = []

for city in [cities_list[1]]:
    print(city)
    temp_df = preproc.df_tally[preproc.df_tally["location"] == city]
    temp_dfb = temp_df
    for idx, mindate in enumerate(list(conf_yaml["var_dates"].keys())):
        print(mindate)
        temp_df2 = temp_dfb[temp_dfb.date >= mindate]
        if idx < len(conf_yaml["var_dates"]) - 1: 
            temp_df2 = temp_df2[temp_df2.date < list(conf_yaml["var_dates"].keys())[idx+1]]
        if temp_df2.size == 0:
            continue
        t_kdec = KernelDeconv(
            temp_df2[conf_yaml["var_dates"][mindate] + ["undetermined"]],
            temp_df2["frac"],
            temp_df2["date"],
            kernel=GaussianKernel(10),
            reg=RobustReg(f_scale=0.01),
            confint=NullConfint()
        )
        t_kdec = t_kdec.deconv_all()
        res = t_kdec.renormalize().fitted
        res["city"] = city
        robust_deconv_short.append(res)
robust_deconv_short_df = pd.concat(robust_deconv_short)
robust_deconv_short_df = robust_deconv_short_df.fillna(0)


In [None]:
robust_deconv_short_df[[i for i in robust_deconv_short_df.columns if i != "city"] + ["undetermined"]].plot()

In [None]:
robust_deconv_short_df[[i for i in robust_deconv_short_df.columns if i != "city"] + ["undetermined"]].plot()

In [None]:
%%time

np.random.seed(42)
n_boot = 100
robust_deconv2 = []

for city in tqdm(cities_list):
    print(city)
    temp_df = preproc.df_tally[preproc.df_tally["location"] == city]
    for boot in tqdm(range(n_boot)):
        temp_dfb = resample_mutations(temp_df, temp_df.mutations.unique())[0]
        for idx, mindate in enumerate(list(conf_yaml["var_dates"].keys())):
            temp_df2 = temp_dfb[temp_dfb.date >= mindate]
            if idx < len(conf_yaml["var_dates"]) - 1: 
                temp_df2 = temp_df2[temp_df2.date < list(conf_yaml["var_dates"].keys())[idx+1]]
            if temp_df2.size == 0:
                continue
            t_kdec = KernelDeconv(
                temp_df2[conf_yaml["var_dates"][mindate] + ["undetermined"]],
                temp_df2["frac"],
                temp_df2["date"],
                weights=temp_df2["resample_value"],
                kernel=GaussianKernel(10),
                reg=RobustReg(f_scale=0.01),
                confint=NullConfint()
            )
            t_kdec = t_kdec.deconv_all(min_tol=1e-3) # min tol changed from default
            res = t_kdec.renormalize().fitted
            res["city"] = city
            robust_deconv2.append(res)
robust_deconv2_df = pd.concat(robust_deconv2)
robust_deconv2_df = robust_deconv2_df.fillna(0)


# backup data
robust_deconv2_df.to_csv("robust_deconv2.csv")

### aggregate

In [None]:
robust_deconv2_df = pd.concat(robust_deconv2)
robust_deconv2_df = robust_deconv2_df.fillna(0)

robust_deconv2_df_flat = robust_deconv2_df.melt(
    id_vars="city",
    value_vars=[i for i in variants_list if i in robust_deconv2_df.columns] + ["undetermined"],
    var_name="variant",
    value_name="frac",
    ignore_index=False,
)

robust_deconv2_df_agg = robust_deconv2_df_flat.reset_index().groupby(["city", "index", "variant"]).agg(
    [np.mean,
     lambda x: np.quantile(x, q=0.025),
     lambda x: np.quantile(x, q=0.975)]
).reset_index()
robust_deconv2_df_agg.head()

### plot

In [None]:
fig, axes = plt.subplots(nrows=3, ncols=3, figsize=(plotwidth, plotwidth/2), sharex=True)
axes = axes.flatten()

for i, city in enumerate(robust_deconv2_df_agg.city.unique()):
    axes[i].set_title(city)
    
    for var in robust_deconv2_df_agg["variant"].unique():
        tt_df = robust_deconv2_df_agg[(robust_deconv2_df_agg["variant"] == var) & (robust_deconv2_df_agg["city"] == city)]
        g = sns.lineplot(
            x=tt_df["index"], 
            y=tt_df["frac"]["mean"], 
            hue=tt_df["variant"],
            ax = axes[i], 
            palette = color_map
        )
        g.get_legend().remove()
        axes[i].fill_between(
            x=tt_df["index"], 
            y1=tt_df["frac"]["<lambda_0>"], 
            y2=tt_df["frac"]["<lambda_1>"],
            alpha = 0.2,
            #color="grey",
            color=color_map[var],
        )
handles, labels = axes[i].get_legend_handles_labels()
fig.legend(handles, labels, loc='lower center', ncol=len(labels), bbox_to_anchor=(0.5,0.05))
fig.suptitle(f'Robust Piecewise Gaussian Kernel Deconvolution ($k=10$, $f=0.01$)')
plt.savefig(os.path.join(plots_dir, f"combined-robust.pdf"))

### covSPECTRUM export

In [None]:
update_data={ }

cities_uniq = robust_deconv2_df.city.unique()
var_uniq = robust_deconv2_df_agg["variant"].unique()

export_columns = {
    ('index',''):"date",
    ('frac','mean'):"proportion",
    ('frac','<lambda_0>'):"proportionLower",
    ('frac','<lambda_1>'):"proportionUpper",
}

for var in tqdm(var_uniq, desc='Variants', position=0):
    update_data[rename_variants.get(var, var)] = { }
    for city in tqdm(cities_uniq, desc=var, position=1, leave=False):
        tt_df = robust_deconv2_df_agg.loc[(robust_deconv2_df_agg["variant"] == var) & (robust_deconv2_df_agg["city"] == city),export_columns.keys()].copy()
        tt_df.columns = export_columns.values()
        tt_df["date"] = tt_df["date"].astype("str")

        update_data[rename_variants.get(var, var)][city] = {
            "timeseriesSummary": [dict(tt_df.iloc[i,]) for i in range(tt_df.shape[0]) ]
        }

import json
with open(update_data_rob_file, 'w') as file:
     file.write(json.dumps(update_data))

## Robust reg but without smoothing



In [None]:
%%time

np.random.seed(42)
robust_deconv2 = []

for city in tqdm(cities_list):
    print(city)
    temp_df = preproc.df_tally[preproc.df_tally["location"] == city]
    temp_dfb = temp_df
    for idx, mindate in enumerate(list(conf_yaml["var_dates"].keys())):
        temp_df2 = temp_dfb[temp_dfb.date >= mindate]
        if idx < len(conf_yaml["var_dates"]) - 1: 
            temp_df2 = temp_df2[temp_df2.date < list(conf_yaml["var_dates"].keys())[idx+1]]
        if temp_df2.size == 0:
            continue
        t_kdec = KernelDeconv(
            temp_df2[conf_yaml["var_dates"][mindate] + ["undetermined"]],
            temp_df2["frac"],
            temp_df2["date"],
            kernel=GaussianKernel(0.0001),
            reg=RobustReg(f_scale=0.01),
            confint=NullConfint()
        )
        t_kdec = t_kdec.deconv_all(min_tol=1e-3)
        res = t_kdec.renormalize().fitted
        res["city"] = city
        robust_deconv2.append(res)
robust_deconv2_noisy_df = pd.concat(robust_deconv2)
robust_deconv2_noisy_df = robust_deconv2_noisy_df.fillna(0)


# backup data
robust_deconv2_noisy_df.to_csv("robust_deconv2_noisy.csv")

In [None]:
robust_deconv2_noisy_df.index.max()

## LS reg without smoothing

In [None]:
%%time

np.random.seed(42)
linear_deconv3 = []

for city in tqdm(cities_list):
    print(city)
    temp_df = preproc.df_tally[preproc.df_tally["location"] == city]
    temp_dfb = temp_df
    for idx, mindate in enumerate(list(conf_yaml["var_dates"].keys())):
        temp_df2 = temp_dfb[temp_dfb.date >= mindate]
        if idx < len(conf_yaml["var_dates"]) - 1: 
            temp_df2 = temp_df2[temp_df2.date < list(conf_yaml["var_dates"].keys())[idx+1]]
        if temp_df2.size == 0:
            continue
        t_kdec = KernelDeconv(
            temp_df2[conf_yaml["var_dates"][mindate] + ["undetermined"]],
            temp_df2["frac"],
            temp_df2["date"],
            kernel=GaussianKernel(0.0001),
            reg=NnlsReg(),
            confint=NullConfint()
        )
        t_kdec = t_kdec.deconv_all(min_tol=1e-3)
        res = t_kdec.renormalize().fitted
        res["city"] = city
        linear_deconv3.append(res)
linear_deconv3_noisy_df = pd.concat(linear_deconv3)
linear_deconv3_noisy_df = linear_deconv3_noisy_df.fillna(0)


# backup data
linear_deconv3_noisy_df.to_csv("linear_deconv3_noisy.csv")

In [None]:
fig, axes = plt.subplots(nrows=3, ncols=3, figsize=(plotwidth, plotwidth/2), sharex=True)
axes = axes.flatten()

for i, city in enumerate(robust_deconv2_noisy_df.city.unique()):
    axes[i].set_title(city)
    
    robust_deconv2_noisy_df[robust_deconv2_noisy_df.city == city].plot(ax=axes[i])
# handles, labels = axes[i].get_legend_handles_labels()
# fig.legend(handles, labels, loc='lower center', ncol=len(labels), bbox_to_anchor=(0.5,0.05))
# fig.suptitle(f'Robust Piecewise Gaussian Kernel Deconvolution ($k=10$, $f=0.01$)')
# plt.savefig(os.path.join(plots_dir, f"combined-robust.pdf"))

In [None]:
len(set(variants_pangolin.values()))