# Analyse de la prévalence de la maladie

## Nettoyage des données

Préambule

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set_style("whitegrid")

Chargement des données

In [None]:
from getting_started import df_patient, df_pcr, pd

df_patient = df_patient.convert_dtypes()
df_pcr = df_pcr.convert_dtypes()

Nettoyage et déduplication

In [None]:
from entity_resolution import detect_duplicates

df_patient = detect_duplicates(df_patient)

In [None]:
df_patient.head(10)

In [None]:
df_patient["dedup_id"] = df_patient.index.to_frame().astype("Int64")

df = df_patient[df_patient.duplicated("phone_number", keep=False)].dropna().reset_index()
df = df[["phone_number", "patient_id"]]
df = df.merge(df, on="phone_number").drop(columns="phone_number")
df = df.groupby(["patient_id_x"]).min()
df = df.rename_axis("patient_id").rename(columns={"patient_id_y": "dedup_id"}).astype("Int64")

df_patient.update(df)
df_patient = df_patient.convert_dtypes()

df_patient

In [None]:
df_patient.info()

Taux de déduplication

In [None]:
len(df_patient[df_patient.dedup_id.duplicated(keep=False)]) / len(df_patient)

Conversion des résultats de test PCR en variable catégorielle

In [None]:
df_pcr.pcr = pd.Categorical(df_pcr.pcr.str[0], categories=["N", "P"], ordered=True)

df_pcr.pcr.value_counts()

Jointure avec le référentiel de patient

In [None]:
df_pcr = df_pcr.merge(df_patient["dedup_id"], left_on="patient_id", right_index=True, validate="m:1")

df_pcr.pcr.value_counts()

Patients affectés par COVID-19

In [None]:
df_prevalance = (
    df_pcr[["dedup_id", "pcr"]]
    .groupby("dedup_id").max()
    .rename(columns={"pcr": "affected"})
    .merge(df_patient, on="dedup_id")
    .set_index("dedup_id")
)

df_prevalance

Prévalance à l'échelle de l'âge

In [None]:
df_prevalance["age_category"] = pd.cut(df_prevalance.age, bins=range(0, 101, 10))

df_prevalance[["age", "age_category"]]

In [None]:
df_prevalence_age = (
    df_prevalance[["age_category", "affected"]]
    .replace({"affected": {"N": 0, "P": 1}})
    .groupby("age_category").agg(["sum", "count"])
    .droplevel(level=0, axis="columns")
    .rename(columns={"sum": "affected", "count": "tested"})
)

fig, ax = plt.subplots(figsize=(20, 8))
df_prevalence_age.plot.bar(stacked=True, ax=ax)

Prévalance à l'échelle de l'état

In [None]:
df_prevalence_state = (
    df_prevalance[["state", "affected"]]
    .replace({"affected": {"N": 0, "P": 1}})
    .groupby("state").agg(["sum", "count"])
    .droplevel(level=0, axis="columns")
    .rename(columns={"sum": "affected", "count": "tested"})
)

fig, ax = plt.subplots(figsize=(20, 8))
df_prevalence_state.plot.bar(stacked=True, ax=ax)

Prévalance à l'échelle code postal

In [None]:
df_prevalence_postcode = (
    df_prevalance[["postcode", "affected"]]
    .replace({"affected": {"N": 0, "P": 1}})
    .groupby("postcode").agg(["sum", "count"])
    .droplevel(level=0, axis="columns")
    .rename(columns={"sum": "affected", "count": "tested"})
)

df_prevalence_postcode.sort_values("affected", ascending=False)

Cartography

In [None]:
from ipyleaflet import Choropleth, HeatMap, LegendControl, Map, basemaps
from branca.colormap import linear
import json

map_ = Map(basemap=basemaps.OpenStreetMap.BlackAndWhite, center=(-25.8, 136.8698), zoom=4)

geo_data = json.load(open("aus_state.geojson"))

choro_data = df_prevalence_state["affected"].rename(index={"nsw":0, "vic":1, "qld":2, "sa":3, "wa":4, "tas":5, "nt":6, "act":7}).to_dict()
choro_data.update({8: 0})

choropleth = Choropleth(
    geo_data=geo_data,
    choro_data=choro_data,
    colormap=linear.YlOrRd_04,
    style={'fillOpacity': 0.8, 'dashArray': '5, 5'},
)

map_.add_layer(choropleth)

# TODO
locations = pd.read_csv("aus_postcode.csv", dtype=str).set_index("postcode").convert_dtypes()
locations = df_prevalence.merge(locations, left_index=True, right_index=True)

heatmap = Heatmap(
    locations=locations,
    radius=10,
    min_opacity=1,
    gradient={
        0.4: 'blue',
        0.6: 'cyan',
        0.7: 'yellow',
        0.8: 'orange',
        1.0: 'red'
    }
)

map_.add_layer(heatmap)

map_