In [2]:
import polars as pl
import plotly.express as px
# import plotly.graph_objects as go

df = pl.read_csv("Enunciado_Tarea1/datos_tarea1.csv")
df = df.rename(
    {
        "1__Cold": "Cold",
        "2__Cough": "Cough",
        "3__Fever": "Fever",
        "4__Muscular_pain": "Muscular Pain",
        "5__Breathing_difficulty": "Breathing difficulty",
        "6__Self_reported_anosmia": "Self-reported anosmia",
        "7__Self_reported_ageusia": "Self-reported ageusia",
        "COVID19_PCR": "COVID19 PCR Result",
    }
)
df

Banana,Caramel,Mint,Orange,Pineapple,Vanilla,Cold,Cough,Fever,Muscular Pain,Breathing difficulty,Self-reported anosmia,Self-reported ageusia,COVID19 PCR Result
i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64
0,0,1,0,1,1,1,0,0,0,0,1,0,1
0,1,1,1,0,1,0,1,0,1,0,0,0,1
0,1,1,0,1,1,1,0,0,1,0,0,0,1
0,0,1,1,0,1,1,1,1,1,0,0,0,1
0,1,1,0,0,1,1,1,1,0,0,0,0,1
…,…,…,…,…,…,…,…,…,…,…,…,…,…
1,1,1,1,1,1,0,0,0,0,0,0,0,0
1,1,1,1,1,0,0,1,0,1,0,0,0,0
1,1,1,1,1,0,0,0,0,0,0,0,0,0
1,1,1,0,1,1,0,1,0,0,0,0,0,0


In [3]:
df_neg = df.filter(pl.col("COVID19 PCR Result") == 0)
df_pos = df.filter(pl.col("COVID19 PCR Result") == 1)

## Hypothesis 1

**Covid-positive individuals most frequently identify orange, pineapple and mint odors.**


In [4]:
scent_cols = ["Banana", "Caramel", "Mint", "Orange", "Pineapple", "Vanilla"]

scent_data = {
    "Scent": [],
    "Group": [],
    "Count": []
}

for scent in scent_cols:
    scent_data["Scent"].append(scent)
    scent_data["Group"].append("PCR Positive")
    scent_data["Count"].append(df_pos.filter(pl.col(scent) == 1).shape[0])

# for scent in scent_cols:
    scent_data["Scent"].append(scent)
    scent_data["Group"].append("PCR Negative")
    scent_data["Count"].append(df_neg.filter(pl.col(scent) == 1).shape[0])

scent_df = pl.from_dict(scent_data)
scent_df

Scent,Group,Count
str,str,i64
"""Banana""","""PCR Positive""",192
"""Banana""","""PCR Negative""",514
"""Caramel""","""PCR Positive""",229
"""Caramel""","""PCR Negative""",523
"""Mint""","""PCR Positive""",214
…,…,…
"""Orange""","""PCR Negative""",488
"""Pineapple""","""PCR Positive""",194
"""Pineapple""","""PCR Negative""",544
"""Vanilla""","""PCR Positive""",222


In [5]:
fig = px.bar(
    scent_df,
    x="Scent",
    y="Count",
    color="Group",
    barmode="group",
    labels={"Count": "Count of Patients"},
    title="Scent Detection by PCR Result",
    text="Count",
)
fig.show()

Based on the plot, the hypothesis does not seem to be supported. 

## Hypothesis 2
**¿Son las personas con comorbilidades menos propensas a contraer COVID - 19?**

English: Are people with comorbidities less prone to contracting
COVID - 19?

Meaning that we have to check if there is a correlation between the people with e.g. fever and the PCR test result.

In [6]:
symptoms = [
    "Cold",
    "Cough",
    "Fever",
    "Muscular Pain",
    "Breathing difficulty",
    "Self-reported anosmia",
    "Self-reported ageusia",
]
symptom_data = {
    "Symptom": [],
    "Group": [],
    "Count": []
}
for symptom in symptoms:
    symptom_data["Symptom"].append(symptom)
    symptom_data["Group"].append("Positive PCR")
    symptom_data["Count"].append(df_pos.filter(pl.col(symptom) == 1).shape[0])

    symptom_data["Symptom"].append(symptom)
    symptom_data["Group"].append("Negative PCR")
    symptom_data["Count"].append(df_neg.filter(pl.col(symptom) == 1).shape[0])

    
symptom_df = pl.from_dict(symptom_data)
symptom_df

Symptom,Group,Count
str,str,i64
"""Cold""","""Positive PCR""",151
"""Cold""","""Negative PCR""",162
"""Cough""","""Positive PCR""",154
"""Cough""","""Negative PCR""",166
"""Fever""","""Positive PCR""",93
…,…,…
"""Breathing difficulty""","""Negative PCR""",75
"""Self-reported anosmia""","""Positive PCR""",47
"""Self-reported anosmia""","""Negative PCR""",11
"""Self-reported ageusia""","""Positive PCR""",26


In [7]:
fig = px.bar(
    symptom_df,
    x="Symptom",
    y="Count",
    color="Group",
    barmode="group",
    labels={"Count": "Count of Patients"},
    title="Symptoms by PCR Result",
    text="Count",
)
fig.show()

## Hypthesis 3
¿Existe une relación entre las dificultades respiratorias y la función
olfativa en individuos?

English: Is there a relationship between breathing difficulties and olfactory function in individuals

### Thought process
Make a an olifactory score, summing the number of olfactory tests that were positive. 
Then, check if there is a correlation between the olfactory score and the breathing difficulties.

In [8]:
scents = ["Banana", "Caramel", "Mint", "Orange", "Pineapple", "Vanilla"]
df = df.with_columns(
    [
       pl.sum_horizontal(pl.col(scent) for scent in scents).alias("Olifactory Score"),
    ]
)
olifactory_df = df.select(scents + ["Olifactory Score", "Breathing difficulty"])
olifactory_df.columns

['Banana',
 'Caramel',
 'Mint',
 'Orange',
 'Pineapple',
 'Vanilla',
 'Olifactory Score',
 'Breathing difficulty']

In [9]:
# olifactory_df.with_columns(
#     pl.when(pl.col("Breathing difficulty") == 1)
#     .then("Yes")
#     .otherwise("No")
#     .alias("Difficulties breathing")
# )

fig = px.box(
    olifactory_df,
    x="Breathing difficulty",
    y="Olifactory Score",
    color="Breathing difficulty",
    title="Olifactory Score by Breathing Difficulty",
)
fig.show()

In [10]:
olifactory_scores = [0, 1, 2, 3, 4, 5, 6]
olifactory_data = {
    "Olifactory Score": [],
    "Group": [],
    "Count": []
}

for score in olifactory_scores:
    olifactory_data["Olifactory Score"].append(score)
    olifactory_data["Group"].append("Difficulties Breathing")
    olifactory_data["Count"].append(df.filter(pl.col("Breathing difficulty") == 1).filter(pl.col("Olifactory Score") == score).shape[0])
    
    olifactory_data["Olifactory Score"].append(score)
    olifactory_data["Group"].append("No Difficulties Breathing")
    olifactory_data["Count"].append(df.filter(pl.col("Breathing difficulty") == 0).filter(pl.col("Olifactory Score") == score).shape[0])
    
olifactory_df = pl.from_dict(olifactory_data)
olifactory_df

fig = px.bar(
    olifactory_df,
    x="Olifactory Score",
    y="Count",
    color="Group",
    barmode="group",
    labels={"Count": "Count of Patients"},
    title="Olifactory Score by Breathing Difficulty",
    text="Count",
)
fig.show()