# Calculate odds ratios
Sample code to calculate odds ratios, p-values, and 95% for rates
of chemosensory changes in COVID patients during successive waves of COVID.

Reproduces the analysis published in [Decreasing Incidence of Chemosensory Changes by COVID-19 Variant](https://journals.sagepub.com/doi/abs/10.1177/01945998221097656),
although the confidence intervals differ slightly because I used an 
[online calculator](https://www.medcalc.org/calc/odds_ratio.php) to compute 
them for the paper.


In [None]:
from math import isclose
import numpy as np
import pandas as pd
from scipy.stats import fisher_exact
from scipy.stats.contingency import odds_ratio

In [None]:
# Create dataset using previously published counts.
data = [
    [0, 117904, 73947, 150713, 281291],
    [1, 1499, 470, 892, 570]
]
cols = ['smell_taste_loss', 'untyped_42', 'alpha_42', 'delta_42', 'omicron_42']
df = pd.DataFrame(data, columns=cols).sort_values('smell_taste_loss')
df

In [None]:
def calculate_odds_ratio(two_by_two: pd.DataFrame):
    """Calculates the odds ratio, 95% CI, and p-values for a 2x2 matrix"""

    fisher_or, pvalue = fisher_exact(two_by_two)
    res = odds_ratio(two_by_two)
    contingency_or = res.statistic
    lower_ci, upper_ci = res.confidence_interval()

    # Round values to four significant digits.
    fisher_or, pvalue, lower_ci, upper_ci = tuple(
        [round(x,4) for x in (fisher_or, pvalue, lower_ci, upper_ci)])
    return fisher_or, pvalue, (lower_ci, upper_ci)


In [None]:
# Calculate odds ratios for each wave vs. untyped.
wave_cols = [c for c in df.columns if '42' in c]
wave_stats = []
for wave in wave_cols:    
    if wave != 'untyped_42':        
        two_by_two = df[['untyped_42', wave]]
        odds, p, ci = calculate_odds_ratio(two_by_two)
    else:
        two_by_two = df[['untyped_42']]
        odds = p = ci = '-'

    covid = two_by_two[wave].min()
    control = two_by_two[wave].max()
    total = two_by_two[wave].sum()
    prevalence = (covid / total).round(4)
    wave_stats.append([covid, control, total, prevalence, odds, p, str(ci)])
cols = ['covid', 'control', 'total', 'prevalence', 'odds_ratio', 'p_value', 
        'CI_95']
wave_stats = pd.DataFrame(wave_stats, columns=cols, index=wave_cols)
wave_stats = wave_stats.reset_index().rename(columns={'index':'wave'})
wave_stats

# Intra-wave odds ratios by disease severity

In [None]:
# Create a dummy dataset of severity and S/T loss
data = [
    ['Mild', 0, 80],
    ['Moderate', 0, 15],
    ['Severe', 0, 5],
    ['Mild', 1, 40],
    ['Moderate', 1, 7],
    ['Severe', 1, 3]
]
df = pd.DataFrame(data, columns=['severity', 'smell_taste_loss', 'untyped_42'])
df

In [None]:
# Calculate odds ratios for moderate and severe disease vs. mild.
def get_count(df: pd.DataFrame, severity: str, smell_taste_loss: int, 
              wave: str):
    """Gets a count from the dataframe for the given parameters"""
    row = df[(df.severity==severity)&(df.smell_taste_loss==smell_taste_loss)]
    return row[wave].values[0]

wave_cols = [c for c in df.columns if '42' in c]
severities = ['Mild', 'Moderate', 'Severe']
rows = []
for wave in wave_cols:
    # Wave control (Mild) counts
    control_mild = get_count(df, 'Mild', 0, wave)
    st_mild = get_count(df, 'Mild', 1, wave)

    for severity in severities:
        control_severity = get_count(df, severity, 0, wave)
        st_severity = get_count(df, severity, 1, wave)

        if severity == 'Mild':
            odds = p = ci = '-'
        else:
            data = [[control_mild, control_severity],[st_mild, st_severity]]
            two_by_two = pd.DataFrame(data)
            odds, p, ci = calculate_odds_ratio(two_by_two)

        row = (wave, severity, control_severity, st_severity, odds, p, ci)
        rows.append(row)
pd.DataFrame(rows, 
    columns=['wave', 'severity', 'No_ST_Loss', 'ST_Loss', 'OR', 'p', 'CI_95'])