In [1]:
import pandas as pd
import numpy as np
from great_tables import GT, html
from great_tables import GT, style, loc

new_data = {
    '': [
        'Diagnosis', 'Hypopharynx Tumor', 'Larynx Tumor', 'Oral Cavity Tumor', 'Oropharynx Tumor',
        'Age', 'Mean (SD)', 'Range',
        'Gender', 'Male', 'Female',
        'Smoking', 'Yes', 'No',
        'Alcohol', 'Yes', 'No',
        'Stratification', 'Intermediate', 'High'
    ],
    'Responder (N=31)': [
        '', '0 (0.0%)', '3 (9.7%)', '26 (83.9%)', '2 (6.5%)',
        '', '58.2 (10.6)', '28 - 76',
        '', '23 (74.2%)', '8 (25.8%)',
        '', '15 (48.4%)', '16 (51.6%)',
        '', '9 (29.0%)', '22 (71.0%)',
        '', '17 (54.8%)', '14 (45.2%)'
    ],
    'Non-Responder (N=37)': [
        '', '1 (2.7%)', '4 (10.8%)', '32 (86.5%)', '0 (0.0%)',
        '', '54.9 (12.7)', '27 - 75',
        '', '25 (67.6%)', '12 (32.4%)',
        '', '24 (64.9%)', '13 (35.1%)',
        '', '13 (35.1%)', '24 (64.9%)',
        '', '14 (37.8%)', '23 (62.2%)'
    ],
    'Total (N=68)': [
        '', '1 (1.5%)', '7 (10.3%)', '58 (85.3%)', '2 (2.9%)',
        '', '56.4 (11.8)', '27 - 76',
        '', '48 (70.6%)', '20 (29.4%)',
        '', '39 (57.4%)', '29 (42.6%)',
        '', '22 (32.4%)', '46 (67.6%)',
        '', '31 (45.6%)', '37 (54.4%)'
    ],
    'p value': [
        '0.353', '', '', '', '',
        '0.262', '', '',
        '0.550', '', '',
        '0.171', '', '',
        '0.592', '', '',
        '0.161', '', ''
    ]
}

# Create DataFrame
df = pd.DataFrame(new_data)

# Function to identify main categories (those with p-values)
def identify_main_categories(df):
    return df[df['p value'].notna()][''].tolist()

# Get main categories
main_categories = identify_main_categories(df)

# Create the great_tables object
table = (
    GT(df)
    .tab_header(
        title="Table 1. Demographic Characteristics of the HNSCC Study Cohort by Treatment Response",
        subtitle="Overview of patient demographics in the head and neck squamous cell carcinoma (HNSCC) cohort, categorized based on their response to treatment.",
    )
    .tab_style(style.fill("#FFF8F9"), loc.title())
    .tab_style(style.fill("#FFF8F9"), loc.subtitle())
    .tab_spanner(
        label="Response Status",
        columns=["Responder (N=31)", "Non-Responder (N=37)"]
    )
    .tab_style(style.text(weight="bold"), loc.spanner_labels(ids=["Response Status"]))
    .tab_style(style.text(weight="bold"), loc.column_labels(columns=["Responder (N=31)", "Non-Responder (N=37)", "Total (N=68)", "p value"]))
    .fmt_markdown(columns=[""])
    .tab_style(
        style=style.text(weight="bold"),
        locations=loc.body(rows=[0,5,8,11,14, 17])  # Fix here
    )
    .tab_style(
        style=style.fill(color="white"),
        locations=loc.body(rows=list(range(0, len(df), 1)))
    )
    .tab_style(
        style=style.fill(color="#FFF8F9"),
        locations=loc.body(rows=[0,5,8,11,14, 17])
    )
    .tab_style(
        style=style.text(weight="bold"),
        locations=loc.body(rows=[0,5,8,11,14, 17])  # Fix here
    )
    .cols_align(
        align="center",
        columns=["Responder (N=31)", "Non-Responder (N=37)", "Total (N=68)", "p value"]
    )
    .cols_align(
        align="left",
        columns=[""]
    )
    .opt_row_striping()
    .opt_align_table_header(align="left")
    .opt_horizontal_padding(scale=3)
)

In [2]:
table.write_raw_html('metadata_table.html')

In [3]:
table

Table 1. Demographic Characteristics of the HNSCC Study Cohort by Treatment Response,Table 1. Demographic Characteristics of the HNSCC Study Cohort by Treatment Response,Table 1. Demographic Characteristics of the HNSCC Study Cohort by Treatment Response,Table 1. Demographic Characteristics of the HNSCC Study Cohort by Treatment Response,Table 1. Demographic Characteristics of the HNSCC Study Cohort by Treatment Response
"Overview of patient demographics in the head and neck squamous cell carcinoma (HNSCC) cohort, categorized based on their response to treatment.","Overview of patient demographics in the head and neck squamous cell carcinoma (HNSCC) cohort, categorized based on their response to treatment.","Overview of patient demographics in the head and neck squamous cell carcinoma (HNSCC) cohort, categorized based on their response to treatment.","Overview of patient demographics in the head and neck squamous cell carcinoma (HNSCC) cohort, categorized based on their response to treatment.","Overview of patient demographics in the head and neck squamous cell carcinoma (HNSCC) cohort, categorized based on their response to treatment."
Unnamed: 0_level_2,Response Status,Response Status,Total (N=68),p value
Unnamed: 0_level_3,Responder (N=31),Non-Responder (N=37),Total (N=68),p value
Diagnosis,,,,0.353
Hypopharynx Tumor,0 (0.0%),1 (2.7%),1 (1.5%),
Larynx Tumor,3 (9.7%),4 (10.8%),7 (10.3%),
Oral Cavity Tumor,26 (83.9%),32 (86.5%),58 (85.3%),
Oropharynx Tumor,2 (6.5%),0 (0.0%),2 (2.9%),
Age,,,,0.262
Mean (SD),58.2 (10.6),54.9 (12.7),56.4 (11.8),
Range,28 - 76,27 - 75,27 - 76,
Gender,,,,0.55
Male,23 (74.2%),25 (67.6%),48 (70.6%),
