# Analyse de la prévalence de la maladie

## Nettoyage des données

Préambule

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

plt.rcParams["figure.figsize"] = (12, 8)
plt.rcParams['figure.dpi'] = 100

Chargement des données

In [None]:
from getting_started import df_patient, df_pcr, pd

df_patient = df_patient.convert_dtypes()
df_pcr = df_pcr.convert_dtypes()

Nettoyage et rapprochement

In [None]:
from entity_resolution import detect_duplicates

df_patient = detect_duplicates(df_patient)

df_patient[df_patient.dedup_id.duplicated(keep=False)].sort_values("dedup_id")

Taux de rapprochement

In [None]:
100 * len(df_patient[df_patient.dedup_id.duplicated(keep=False)]) / len(df_patient)

Conversion des résultats de test PCR en variable catégorielle

In [None]:
df_pcr.pcr = pd.Categorical(df_pcr.pcr.str[0], categories=["N", "P"], ordered=True)

df_pcr.pcr.value_counts()

Fusion de l'échantillons de tests PCR avec le référentiel patient

In [None]:
df_prevalence = (
    df_pcr.merge(df_patient.dedup_id, left_on="patient_id", right_index=True, validate="m:1")
    .drop(columns="patient_id")
    .groupby("dedup_id").max()
    .rename_axis("patient_id")
    .rename(columns={"pcr": "affected"})
    .merge(df_patient, left_index=True, right_index=True, validate="1:1")
    .drop(columns="dedup_id")
)


## Prévalance par catégorie d'âge

In [None]:
df_prevalence["age_category"] = (
    pd.cut(
        df_prevalence.age,
        bins=[0, 10, 20, 30, 40, 100],
        include_lowest=True,
        ordered=True,
    )
)

df_prevalence_age = (
    df_prevalence[["age_category", "affected"]]
    .replace({"affected": {"N": 0, "P": 1}})
    .groupby("age_category").agg(["sum", "count"])
    .droplevel(level=0, axis="columns")
    .rename(columns={"sum": "affected", "count": "tested"})
)

In [None]:
fig, ax = plt.subplots(ncols=2, figsize=(20, 8))
labels = ["0-9", "10-19", "20-29", "30-39", "40+"]
df_prevalence_age.affected.plot.pie(ax=ax[0], labels=labels)
ax[0].set_title("Distribution of affected patients")
ax[0].yaxis.set_visible(False)
df_prevalence_age.plot.bar(stacked=True, rot=False, ax=ax[1])
_ = ax[1].xaxis.set_ticklabels(labels)

## Prévalance par état

In [None]:
df_prevalence_state = (
    df_prevalence[["state", "affected"]]
    .replace({"affected": {"N": 0, "P": 1}})
    .groupby("state").agg(["sum", "count"])
    .droplevel(level=0, axis="columns")
    .rename(columns={"sum": "affected", "count": "tested"})
)

In [None]:
fig, ax = plt.subplots(ncols=2, figsize=(20, 8))
df_prevalence_state.affected.plot.pie(ax=ax[0])
ax[0].set_title("Distribution of affected patients")
ax[0].yaxis.set_visible(False)
df_prevalence_state.plot.bar(stacked=True, rot=False, ax=ax[1])

## Cartographie de la prévalence

Préparation du Choropleth

In [None]:
from ipyleaflet import Choropleth
from branca.colormap import linear
import json

geo_data = json.load(open("aus_state.geojson"))

choro_data = (
    df_prevalence_state["affected"]
    .rename(index={
        "nsw": 0,
        "vic": 1,
        "qld": 2,
        "sa": 3,
        "wa": 4,
        "tas": 5,
        "nt": 6,
        "act": 7
    })
).to_dict()

choro_data[8] = 0   # Other territories

choropleth = Choropleth(
    geo_data=geo_data,
    choro_data=choro_data,
    colormap=linear.YlOrRd_04,
    style={'fillOpacity': 0.6, 'dashArray': '5, 5'},
    name="choropleth"
)

Préparation des foyers d'infection

In [None]:
from ipyleaflet import AwesomeIcon, Marker, MarkerCluster

locations = (
    pd.read_csv(
        "australian_postcodes.csv",
        index_col="id",
        usecols=("id","postcode", "long", "lat"),
        dtype={
            "id": "int64",
            "postcode": "str",
            "long": "float",
            "lat": "float",
        }
    )
    .rename(columns={"long": "longitude", "lat": "latitude"})
    .drop_duplicates("postcode", keep="first")
    .set_index("postcode")
)

locations = (
    df_prevalence.merge(locations, left_on="postcode", right_index=True)
    [["latitude", "longitude"]].values.tolist()
)

icon = AwesomeIcon(
    name="plus-square",
    icon_color="white",
    marker_color="black",
)

markers = [
    Marker(icon=icon, location=location)
    for location in locations
]

marker_cluster = MarkerCluster(markers=markers, name="clusters")

Composition de la carte interactive

In [None]:
from ipywidgets import Layout
from ipyleaflet import Choropleth, Map, basemaps
from ipyleaflet import LayersControl, SearchControl

# Base map
map_ = Map(
    basemap=basemaps.OpenStreetMap.BlackAndWhite,
    center=(-25.8, 136.8698),
    zoom=5,
    #scroll_wheel_zoom=True,
    layout=Layout(width="100%", height="800px")
)

# Layer control
layer_control = LayersControl(position="topleft")


# Search control
search_marker = Marker(icon=AwesomeIcon(name="cirle"))

search_control = SearchControl(
    position="topright",
    url="https://nominatim.openstreetmap.org/search?format=json&q={s}",
    zoom=10,
    marker=search_marker,
)

# Compose layers
map_.add_layer(choropleth)
map_.add_layer(marker_cluster)

# Compose controls
map_.add_control(layer_control)
map_.add_control(search_control)

# Display map
map_