<a href="https://colab.research.google.com/github/gitcnk/covid_experiment/blob/main/covid_heatmap_python.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
# prompt: covid_clean = read.csv('https://raw.githubusercontent.com/gitcnk/Data/refs/heads/master/Stat254/covid_clean.csv')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

covid_clean = pd.read_csv('https://raw.githubusercontent.com/gitcnk/Data/refs/heads/master/Stat254/covid_clean.csv')

In [4]:
covid_clean.head()

Unnamed: 0,admitted_to_icu,diabetes,pneumonia,age,obesity,hypertension,covid_res,ICU_status
0,No,No,No,54,Yes,No,Positive,0
1,No,No,Yes,30,No,No,Positive,0
2,No,Yes,No,60,No,Yes,Positive,0
3,Yes,Yes,Yes,47,No,No,Positive,1
4,No,No,No,63,No,Yes,Positive,0


In [7]:
# Assuming covid_clean is a pandas DataFrame
# Filter data for age < 90
tiny_data = covid_clean[covid_clean['age'] < 90]

# Group by specified columns and calculate required statistics
icu_percent_data = (
    tiny_data
    .groupby(['diabetes', 'hypertension', 'obesity', 'pneumonia', 'age'], as_index=False)
    .agg(
        sample_size=('ICU_status', 'size'),
        icu_count=('ICU_status', 'sum'),
        icu_percent=('ICU_status', lambda x: x.sum() / x.size)
    )
)

# Drop rows with missing values
icu_percent_data = icu_percent_data.dropna()

# Create `bucket_code` column
icu_percent_data['bucket_code'] = (
    icu_percent_data['diabetes'].astype(int) - 1 +
    icu_percent_data['hypertension'].astype(int) - 1 +
    icu_percent_data['obesity'].astype(int) - 1 +
    icu_percent_data['pneumonia'].astype(int) - 1
)

# Define the custom function to convert a row to a string representation
def myf(row):
    return ', '.join(map(str, row))

# Apply the function to create `bucket_id`
icu_percent_data['bucket_id'] = icu_percent_data.apply(
    lambda row: myf(row[['diabetes', 'hypertension', 'obesity', 'pneumonia']]), axis=1
)


ValueError: invalid literal for int() with base 10: 'No'

In [None]:

# Assuming `icu_percent_data` is a pandas DataFrame
# Example column names: 'age', 'bucket_id', 'icu_percent', 'sample_size'

# Create the plot
fig, ax = plt.subplots(figsize=(10, 6))

# Create a pivot table for heatmap (optional, depends on the data structure)
heatmap_data = icu_percent_data.pivot("bucket_id", "age", "icu_percent")

# Draw the heatmap
sns.heatmap(
    heatmap_data,
    ax=ax,
    cmap="RdYlBu",  # Adjust the color scheme similar to 'green' to 'purple'
    linewidths=1,
    linecolor="black",
    cbar_kws={"label": "ICU Percent"}
)

# Add an optional overlay for sample size as annotations
for i, row in icu_percent_data.iterrows():
    ax.text(
        row['age'],
        row['bucket_id'],
        f"{row['sample_size']}",
        ha='center', va='center', color='red', fontsize=8
    )

# Final adjustments
plt.title("ICU Percent Heatmap with Sample Size")
plt.xlabel("Age")
plt.ylabel("Bucket ID")
plt.tight_layout()

plt.show()
