In [None]:
%load_ext autoreload
%autoreload 2
%aimport utils_1_1

import pandas as pd
import numpy as np
import altair as alt
from altair_saver import save
import datetime
import dateutil.parser
from os.path import join

from constants_1_1 import SITE_FILE_TYPES
from utils_1_1 import (
    get_site_file_paths,
    get_site_file_info,
    get_site_ids,
    get_visualization_subtitle,
    get_country_color_map,
)
from theme import apply_theme
from web import for_website

alt.data_transformers.disable_max_rows(); # Allow using rows more than 5000

In [None]:
data_release='2021-04-27'

df = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.beta.zero.std.toShare.csv"))

print(df.head())

# Rename columns
df = df.rename(columns={"variable": "c", "beta": "v"})

consistent_date = {
    '2020-03': 'Mar - Apr',
    '2020-05': 'May - Jun',
    '2020-07': 'Jul - Aug',
    '2020-09': 'Sep - Oct',
    '2020-11': 'Since Nov'
}

colors = ['#E79F00', '#0072B2', '#D45E00', '#CB7AA7', '#029F73', '#57B4E9']

sites = ['META', 'APHP', 'FRBDX', 'ICSM', 'BIDMC', 'MGB', 'UCLA', 'UMICH', 'UPENN', 'VA1', 'VA2', 'VA3', 'VA4', 'VA5']
site_colors = ['black', '#D45E00', '#0072B2', '#CB7AA7', '#E79F00', '#029F73', '#DBD03C', '#57B4E9', '#57B4E9', '#57B4E9', '#57B4E9', '#57B4E9']
sites = ['META', 'APHP', 'FRBDX', 'ICSM', 'UKFR', 'NWU', 'BIDMC', 'MGB', 'UCLA', 'UMICH', 'UPENN', 'UPITT', 'VA1', 'VA2', 'VA3', 'VA4', 'VA5']
site_colors = ['black', '#0072B2', '#0072B2', '#0072B2', '#0072B2', '#CB7AA7', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00','#D45E00','#D45E00']

df.siteid = df.siteid.apply(lambda x: x.upper())

print(df.siteid.unique().tolist())

group_map = {
    'age18to25': 'Age',
    'age26to49': 'Age',
    'age70to79': 'Age',
    'age80plus': 'Age',
    'sexfemale': 'Sex',
    'raceBlack': 'Race',
    'raceAsian': 'Race',
    'raceHispanic.and.Other': 'Race',
    'CRP': 'Lab',
    'albumin': 'Lab',
    'TB': 'Lab',
    "LYM": 'Lab',
    "neutrophil_count" : 'Lab',
    "WBC" : 'Lab',
    "creatinine": 'Lab',
    "AST": 'Lab',
    "AA": 'Lab',
    "DD": 'Lab',
    'mis_CRP': 'Lab Mis.',
    'mis_albumin': 'Lab Mis.',
    'mis_TB': 'Lab Mis.',
    "mis_LYM": 'Lab Mis.',
    "mis_neutrophil_count" : 'Lab Mis.',
    "mis_WBC" : 'Lab Mis.',
    "mis_creatinine": 'Lab Mis.',
    "mis_AST": 'Lab Mis.',
    "mis_AA": 'Lab Mis.',
    'mis_DD': 'Lab Mis.',
    'charlson_score': 'Charlson Score',
    'mis_charlson_score': 'Charlson Score',
}

df['g'] = df.c.apply(lambda x: group_map[x])

consistent_c = {
    'age18to25': '18 - 25',
    'age26to49': '26 - 49',
    'age70to79': '70 - 79',
    'age80plus': '80+',
    'sexfemale': 'Female',
    'raceBlack': 'Black',
    'raceAsian': 'Asian',
    'raceHispanic.and.Other': 'Hispanic and Other',
    'CRP': 'CRP',
    'albumin': 'Albumin',
    'TB': 'Total bilirubin',
    "LYM": 'Lymphocyte count',
    "neutrophil_count" : 'Neutrophil count',
    "WBC" : 'White blood cell',
    "creatinine": 'Creatinine',
    "AST": 'AST',
    "AA": 'AST/ALT',
    "DD": 'D-Dimer',
    'mis_CRP': 'CRP not tested',
    'mis_albumin': 'Albumin not tested',
    'mis_TB': 'Total bilirubin not tested',
    "mis_LYM": 'Lymphocyte count not tested',
    "mis_neutrophil_count" : 'Neutrophil count not tested',
    "mis_WBC" : 'White blood cell not tested',
    "mis_creatinine": 'Creatinine not tested',
    "mis_AST": 'AST not tested',
    "mis_AA": 'ALT/AST not available',
    'mis_DD': 'D-dimer nottested',
    'charlson_score': 'Charlson Score',
    'mis_charlson_score': 'Charlson Score not available',
}

df.c = df.c.apply(lambda x: consistent_c[x])

unique_g = df.g.unique().tolist()
print(unique_g)

unique_c = df.c.unique().tolist()
print(unique_c)

df

# All Sites

In [None]:
point=alt.OverlayMarkDef(filled=False, fill='white', strokeWidth=2)

def plot_lab(df=None, metric='cov'):
    d = df.copy()
        
    plot = alt.Chart(
        d
    ).mark_bar(
#         point=True,
        size=10,
#         opacity=0.3
    ).encode(
        y=alt.Y("c:N", title=None, axis=alt.Axis(labelAngle=0, tickCount=10), scale=alt.Scale(padding=1), sort=unique_c),
        x=alt.X("v:Q", title=None, scale=alt.Scale(zero=True, domain=[-3,3], padding=2, nice=False, clamp=True)),
        # color=alt.Color("siteid:N", scale=alt.Scale(domain=sites, range=site_colors)),
        color=alt.Color("g:N", scale=alt.Scale(domain=unique_g, range=colors), title='Category'),
    ).properties(
        width=150,
        height=250
    )

    plot = plot.facet(
        column=alt.Column("siteid:N", header=alt.Header(title=None), sort=sites)
    ).resolve_scale(color='shared')

    plot = plot.properties(
        title={
            "text": [
                f"Coefficient"
            ],
            "dx": 120,
            "subtitle": [
                'Lab values are standarized by SD',
                get_visualization_subtitle(data_release=data_release, with_num_sites=False)
            ],
            "subtitleColor": "gray",
        }
    )

    return plot

plot = plot_lab(df=df)

# plot = alt.vconcat(*(
#     plot_lab(df=df, lab=lab) for lab in unique_sites
# ), spacing=30)

plot = apply_theme(
    plot,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='bottom',
    legend_title_orient='left',
    axis_label_font_size=14,
    header_label_font_size=16,
    point_size=100
)

plot

## Final Meta

In [None]:
def plot_lab(df=None, metric='cov'):
    d = df.copy()
    d = d[d.siteid == 'META']
    print(unique_c)
    
    plot = alt.Chart(
        d
    ).mark_point(
        #point=True,
        size=120,
        filled=True,
#         opacity=0.3
    ).encode(
        y=alt.Y("c:N", title=None, axis=alt.Axis(labelAngle=0, tickCount=10, grid=True), scale=alt.Scale(padding=1), sort=unique_c),
        x=alt.X("v:Q", title=None, scale=alt.Scale(zero=True, domain=[-1.8,1.8], padding=0, nice=False, clamp=True)),
        # color=alt.Color("siteid:N", scale=alt.Scale(domain=sites, range=site_colors)),
        color=alt.Color("g:N", scale=alt.Scale(domain=unique_g, range=colors), title='Category'),
    ).properties(
        width=550,
        height=350
    )
    line = alt.Chart(pd.DataFrame({'x': [0]})).mark_rule().encode(x='x', strokeWidth=alt.value(3))
    tick = plot.mark_errorbar(
        opacity=0.7 #, color='black',
        #color=alt.Color("g:N", scale=alt.Scale(domain=unique_g, range=colors), title='Category')
    ).encode(
        y=alt.Y("c:N", sort=unique_c),
        x=alt.X("ci_l:Q"),
        x2=alt.X2("ci_u:Q"),
        stroke=alt.value('black'),
        strokeWidth=alt.value(1)
    )
    plot = (plot+tick+line)
#     plot = plot.facet(
#         column=alt.Column("siteid:N", header=alt.Header(title=None), sort=sites)
#     ).resolve_scale(color='shared')

    #plot = plot.properties(
     #   title={
     #       "text": [
      #          f"Meta-Analysis Of Coefficient"
      #      ],
      #      "dx": 120,
      #      "subtitle": [
      #          'Lab values are standarized by SD'#,
       #         #get_visualization_subtitle(data_release=data_release, with_num_sites=False)
       #     ],
       #     "subtitleColor": "gray",
       # }
    #)

    return plot

plot = plot_lab(df=df)

# plot = alt.vconcat(*(
#     plot_lab(df=df, lab=lab) for lab in unique_sites
# ), spacing=30)

plot = apply_theme(
    plot,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='right',
    legend_title_orient='top',
    axis_label_font_size=14,
    header_label_font_size=16,
    point_size=100
)

plot
save(plot,join("..", "result", "final-beta-std-zero-meta.png"), scalefactor=8.0)



## Final country

In [None]:
def plot_beta(df=None, metric='cov', country=None):
    d = df.copy()
    d = d[d.siteid == country]
    
    plot = alt.Chart(
        d
    ).mark_point(
#         point=True,
        size=120,
        filled=True,
#         opacity=0.3
    ).encode(
        y=alt.Y("c:N", title=None, axis=alt.Axis(labelAngle=0, tickCount=10, grid=True), scale=alt.Scale(padding=1), sort=unique_c),
        x=alt.X("v:Q", title=None, scale=alt.Scale(zero=True, domain=[-1.8,1.8], padding=0, nice=False, clamp=True)),
        # color=alt.Color("siteid:N", scale=alt.Scale(domain=sites, range=site_colors)),
        color=alt.Color("g:N", scale=alt.Scale(domain=unique_g, range=colors), title='Category'),
    ).properties(
        width=750,
        height=550
    )
    line = alt.Chart(pd.DataFrame({'x': [0]})).mark_rule().encode(x='x', strokeWidth=alt.value(3))

    tick = plot.mark_errorbar(
        opacity=0.7 #, color='black'
    ).encode(
        y=alt.Y("c:N", sort=unique_c),
        x=alt.X("ci_l:Q"),
        x2=alt.X2("ci_u:Q"),
        stroke=alt.value('black'),
        strokeWidth=alt.value(1)
    )
    plot = (plot+line+tick)

#     plot = plot.facet(
#         column=alt.Column("siteid:N", header=alt.Header(title=None), sort=sites)
#     ).resolve_scale(color='shared')

    plot = plot.properties(
        title={
            "text": [
                country.replace("META-","")
            ],
            "dx": 120,
            #"subtitle": [
            #    'Lab values are standarized by SD'
            #],
            #"subtitleColor": "gray",
        }
    )

    return plot

countrylist1 = ["META-USA", "META-FRANCE"]
countrylist2 = ["META-ITALY", "META-GERMANY"]


plot1 = alt.vconcat(*(
    plot_beta(df=df, country=country) for country in countrylist1
), spacing=30).resolve_scale(color='independent')


plot2 = alt.vconcat(*(
    plot_beta(df=df, country=country) for country in countrylist2
), spacing=30).resolve_scale(color='independent')


#plot=alt.vconcat(plot1, plot2)
plot=plot1

plot = apply_theme(
    plot,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='bottom',
    legend_title_orient='left',
    axis_label_font_size=14,
    header_label_font_size=16,
    point_size=100
)


plot

save(plot,join("..", "result", "final-beta-std-zero-country.png"), scalefactor=8.0)


