In [None]:
from IPython.display import display, HTML

display(HTML("<style>.container { width:100% !important; }</style>"))
plotwidth = 40

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd
import yaml
import json
from tqdm.notebook import tqdm, trange
import time
import datetime
import os
import sys
import netrc
import psycopg2

# from xml.dom import minidom

# Globals

In [None]:
# Source of inspiration from covariatns, see:
# https://github.com/hodcroftlab/covariants/blob/master/web/data/clusters.json
#
# Keep in sync with covspectrum, see:
# https://github.com/GenSpectrum/cov-spectrum-website/blob/develop/src/models/wasteWater/constants.ts
color_map = {
    "B.1.1.7": "#D16666",
    "B.1.351": "#FF6665",
    "P.1": "#FFB3B3",
    "B.1.617.1": "#66C265",
    "B.1.617.2": "#66A366",
    "BA.1": "#A366A3",
    "BA.2": "#cfafcf",
    "BA.4": "#8a66ff",
    "BA.5": "#585eff",
    "BA.2.12.1": "#0400e0",
    "BA.2.75": "#008fe0",
    "BA.2.75.2": "#208fe0",  # improv
    "BQ.1.1": "#8fe000",  # improv
    "XBB.1.9": "#dd6bff",  # improv
    "XBB.1.5": "#ff5656",
    "XBB.1.16": "#e99b30",
    "XBB.2.3": "#b4b82a",  # improv
    "EG.5": "#359f99",  # improv
    "BA.2.86": "#FF20E0",  # pink elephant variant
    "undetermined": "#969696",
}

In [None]:
### Input
# directory where V-pipe was run
datadir = "./work-vp-test/variants/"


#### Output
plots_dir = "./plots"  # for graphics files
outdir = "."  # for data output

In [None]:
# Load data to be uploaded
jsonfile_smooth = os.path.join(datadir, "deconvoluted_upload.json")
# 'all_bootstrap_upload.json') # 'basel_upload.json') #"lausanneupdate.json") # all_upload.json")

# backup the combined data
update_data_combined_file = os.path.join(outdir, "ww_update_data_combined.json")
reformatted = "/tmp/ww_update_data_smooth_kernel_rob.json"

# Data

In [None]:
# Smoothed curves
print(
    f"reusing {jsonfile_smooth} last modified: {time.ctime(os.path.getmtime(jsonfile_smooth))}"
)

with open(jsonfile_smooth, "r") as file:
    update_data_smooth = json.load(file)

In [None]:
# Locations list in smooth
locations_file = list({c for c in update_data_smooth.keys()})
", ".join(locations_file)

In [None]:
# Date max (useful for the e-mail)
for l in locations_file:
    print(
        l,
        max(
            [
                d["date"]
                for v in update_data_smooth[l].values()
                for d in v["timeseriesSummary"]
            ]
        ),
        sep="\t",
    )

In [None]:
# date sentence in e-mail
lastdates = {
    l: max(
        [
            d["date"]
            for v in update_data_smooth[l].values()
            for d in v["timeseriesSummary"]
        ]
    )
    for l in locations_file
}
bydates = {d: [] for d in sorted(list(set(lastdates.values())))}
for l, d in lastdates.items():
    bydates[d] += [l]
for d, ls in bydates.items():
    print(
        f"{datetime.datetime.strptime(d, '%Y-%m-%d').date().strftime('%B %d')} for {', '.join(sorted(ls))}."
    )

In [None]:
# this next line clips the plots from a given date
# used for cities which don't want report before a given date
# or for variants that we only report after a given date
only_start_from = {
    "Kanton Zürich": "2021-08-15",  # start_date
    "Lausanne (VD)": "2023-01-01",
    # 'B.1.1.529':'2021-10-05',
}
print(only_start_from)

update_data = {}
# HACK presume that the smoothing data is the exact set that we want to upload
locations_upload = locations_file
variants_upload = sorted(
    list({v for c in update_data_smooth.values() for v in c.keys()})
)
for loc in tqdm(locations_upload, desc="Locations", position=0):
    update_data[loc] = {}
    for var in tqdm(variants_upload, desc=loc, position=1, leave=False):
        # NOTE we always junk the heatmap, it's not up to date anyway
        update_data[loc][var] = {
            # "updateDate": todaydate,
            "timeseriesSummary": [
                x
                for x in update_data_smooth[loc][var]["timeseriesSummary"]
                if ((loc not in only_start_from) or (x["date"] >= only_start_from[loc]))
                and (
                    (var not in only_start_from) or (x["date"] >= only_start_from[var])
                )
            ],
            "mutationOccurrences": (np.nan),
        }

# Inspect

In [None]:
# Locations list in smooth
locations = sorted(list({c for c in update_data.keys()}))
", ".join(locations)

In [None]:
# Variants in smooth

variants = sorted(list({v for c in update_data.values() for v in c.keys()}))
", ".join(variants)

In [None]:
# Does each one of them has a color?
missing_color = list(set(variants) - set(color_map.keys()))
print(", ".join(missing_color))
assert 0 == len(missing_color), "ERROR: some variant without color !"

In [None]:
# Load in a dataframe
df = pd.DataFrame.from_records(
    data=[
        {
            "location": loc,
            "date": pd.to_datetime(d["date"]),
            "variant": var,
            "proportion": d["proportion"],
            "upper": d["proportionUpper"],
            "lower": d["proportionLower"],
        }
        for loc in tqdm(locations_upload, desc="Locations", position=0)
        for var in tqdm(variants_upload, desc=loc, position=1, leave=False)
        for d in update_data_smooth[loc][var]["timeseriesSummary"]
    ]
)
df

In [None]:
df.dtypes

# Plot

In [None]:
fig, axes = plt.subplots(
    nrows=5, ncols=3, figsize=(plotwidth, plotwidth / 2), sharex=True
)
axes = axes.flatten()

for i, loc in enumerate(tqdm(locations, desc="Locations", position=0, leave=False)):
    axes[i].set_title(loc)

    for var in tqdm(variants, desc=loc, position=1, leave=False):
        tt_df = (
            df[(df["variant"] == var) & (df["location"] == loc)]
            .sort_values(by=["date"])
            .fillna(0)
        )
        if tt_df.size == 0:
            continue
        g = sns.lineplot(
            x=tt_df["date"],
            y=tt_df["proportion"],
            hue=tt_df["variant"],
            ax=axes[i],
            palette=color_map,
        )
        g.get_legend().remove()
        axes[i].fill_between(
            x=tt_df["date"],
            y1=np.clip(tt_df["upper"], 0.0, 1.0),
            y2=np.clip(tt_df["lower"], 0.0, 1.0),
            alpha=0.2,
            # color="grey",
            color=color_map[var],
        )
handles, labels = axes[i].get_legend_handles_labels()
fig.legend(
    handles, labels, loc="lower center", ncol=len(labels), bbox_to_anchor=(0.5, 0.05)
)
fig.suptitle(f"Deconvolution by V-pipe")
plt.savefig(os.path.join(plots_dir, f"combined-vpipe.pdf"))

# Upload

In [None]:
with open(update_data_combined_file, "w") as file:
    file.write(
        json.dumps(update_data).replace("NaN", "null")
    )  # syntactically standard compliant JSON vs. python numpy's output.

## Upload to Cov-Spectrum

In [None]:
dbhost = (
    "db-lapis.cuupxsogkmvx.eu-central-1.rds.amazonaws.com"  #'id-hdb-psgr-cp61.ethz.ch'
)

In [None]:
# load from netrc
dbuser, dbpass = netrc.netrc().authenticators(dbhost)[0::2]

# alternative: input box
# dbuser = input(f"Enter username for database {dbhost}:\n")
# dbpass = input(f"Enter password for user {dbuser}:\n")

# alternative: enviro
# dbuser = os.environ['DB_USERNAME'],
# dbpass = os.environ['DB_PASSWORD'],

dbuser

In [None]:
dbconn = psycopg2.connect(
    host=dbhost,
    database="covspectrum",  #'sars_cov_2',
    user=dbuser,
    password=dbpass,
    port="5432",
)
dbconn

In [None]:
cur = dbconn.cursor()
cur

In [None]:
for loc in tqdm(locations, desc="Locations", position=0):
    for pango in tqdm(variants, desc=loc, position=1, leave=True):
        cur.execute(
            """
            DO $$
            BEGIN
             IF EXISTS (SELECT ww.data FROM public.wastewater_result AS ww WHERE ww.variant_name=%(var)s AND ww.location=%(city)s) THEN
              UPDATE public.wastewater_result AS ww SET data=%(data)s WHERE ww.variant_name=%(var)s AND ww.location=%(city)s;
             ELSE
              INSERT INTO public.wastewater_result (variant_name, location, data)
              VALUES(%(var)s, %(city)s, %(data)s);
             END IF;
            END
            $$
            """,
            {
                "data": json.dumps(update_data[loc][pango]).replace("NaN", "null"),
                "var": pango,
                "city": loc,
            },
        )

### Multiple-choice time:

In [None]:
## Abort DB update !
dbconn.rollback()

In [None]:
## Save to DB !
dbconn.commit()

In [None]:
cur.close()
dbconn.close()

## Upload to FOPH/BAG's Polybox

In [None]:
polybox_url = "https://bs-pangolin@polybox.ethz.ch/remote.php/dav/files/bs-pangolin/Shared/BAG-COWWID19/"

In [None]:
reformat = {
    var: {
        loc: {"timeseriesSummary": update_data[loc][var]["timeseriesSummary"]}
        for loc in tqdm(locations, desc=pango, position=1, leave=False)
    }
    for pango in tqdm(variants, desc="Variants", position=0)
}

### only in Notebook version 7.0
# from IPython.display import JSON
# display(JSON(reformat))

reformat  # ww_update_data_smooth_kernel_rob.json

In [None]:
with open(reformatted, "w") as file:
    file.write(
        json.dumps(reformat).replace("NaN", "null")
    )  # syntactically standard compliant JSON vs. python numpy's output.

In [None]:
%%bash -s "$polybox_url" "$update_data_combined_file" "$reformatted"
ls -l "$2"
curl --netrc --upload-file "$2" "$1" 
curl --netrc --upload-file "$3" "$1"