In [18]:
import pandas as pd
import numpy as np
from great_tables import GT, html
from great_tables import GT, style, loc

# Updated data based on the final corrected dataset
new_data = {
    '': [
        'Diagnosis', 'Hypopharynx', 'Larynx', 'Oral Cavity', 'Oropharynx',
        'Age', 'Mean (SD)', 'Range',
        'Gender', 'Male', 'Female',
        'Smoking', 'Yes', 'No',
        'Alcohol', 'Yes', 'No',
        'Race', 'White', 'Black or African American', 'Native Hawaiian or Other Pacific Islander', 'Unknown',
        'Ethnicity', 'Hispanic', 'Non-Hispanic', 'Unknown',
        'HPV', 'Yes', 'Unknown', 'No',
        'Risk Stratification', 'Intermediate', 'High'
    ],
    'Responder (N=40)': [
        '', '0 (0.0%)', '5 (12.5%)', '35 (87.5%)', '0 (0.0%)',
        '', '58.4 (11.0)', '28 - 76',
        '', '29 (72.5%)', '11 (27.5%)',
        '', '23 (57.5%)', '17 (42.5%)',
        '', '17 (42.5%)', '23 (57.5%)',
        '', '39 (97.5%)', '0 (0.0%)', '1 (2.5%)', '0 (0.0%)',
        '', '0 (0.0%)', '40 (100.0%)', '0 (0.0%)',
        '', '0 (0.0%)', '26 (65.0%)', '14 (35.0%)',
        '', '23 (57.5%)', '17 (42.5%)'
    ],
    'Non-Responder (N=28)': [
        '', '1 (3.6%)', '2 (7.1%)', '24 (85.7%)', '1 (3.6%)',
        '', '53.6 (12.5)', '27 - 72',
        '', '19 (67.9%)', '9 (32.1%)',
        '', '20 (71.4%)', '8 (28.6%)',
        '', '14 (50.0%)', '14 (50.0%)',
        '', '26 (92.9%)', '1 (3.6%)', '0 (0.0%)', '1 (3.6%)',
        '', '1 (3.6%)', '26 (92.9%)', '1 (3.6%)',
        '', '1 (3.6%)', '15 (53.6%)', '12 (42.9%)',
        '', '8 (28.6%)', '20 (71.4%)'
    ],
    'Total (N=68)': [
        '', '1 (1.5%)', '7 (10.3%)', '59 (86.8%)', '1 (1.5%)',
        '', '56.426 (11.804)', '27.000 - 76.000',
        '', '48 (70.6%)', '20 (29.4%)',
        '', '43 (63.2%)', '25 (36.8%)',
        '', '31 (45.6%)', '37 (54.4%)',
        '', '65 (95.6%)', '1 (1.5%)', '1 (1.5%)', '1 (1.5%)',
        '', '1 (1.5%)', '66 (97.1%)', '1 (1.5%)',
        '', '1 (1.5%)', '41 (60.3%)', '26 (38.2%)',
        '', '31 (45.6%)', '37 (54.4%)'
    ],
    'p value': [
        '0.345', '', '', '', '',
        '0.100', '', '',
        '0.679', '', '',
        '0.241', '', '',
        '0.541', '', '',
        '0.309', '', '', '', '',
        '0.229', '', '', '',
        '0.359', '', '', '',
        '0.018', '', ''
    ]
}

df = pd.DataFrame(new_data)

def identify_main_categories(df):
    return df[df['p value'].notna()][''].tolist()

main_categories = identify_main_categories(df)

table = (
    GT(df)
    .tab_header(
        title="Table 1. Demographic Characteristics of the HNSCC Study Cohort by Treatment Response",
        subtitle="Overview of patient demographics in the head and neck squamous cell carcinoma (HNSCC) cohort, categorized based on their response to treatment.",
    )
    .tab_style(style.fill("#FFF8F9"), loc.title())
    .tab_style(style.fill("#FFF8F9"), loc.subtitle())
    .tab_spanner(
        label="Response Status",
        columns=["Responder (N=40)", "Non-Responder (N=28)"]
    )
    .tab_style(style.text(weight="bold"), loc.spanner_labels(ids=["Response Status"]))
    .tab_style(style.text(weight="bold"), loc.column_labels(columns=["Responder (N=40)", "Non-Responder (N=28)", "Total (N=68)", "p value"]))
    .fmt_markdown(columns=[""])
    .tab_style(
        style=style.text(weight="bold"),
        locations=loc.body(rows=[0, 5, 8, 11, 14, 17, 22, 26, 30])  # Row indices for main categories
    )
    .tab_style(
        style=style.fill(color="white"),
        locations=loc.body(rows=list(range(0, len(df), 1)))
    )
    .tab_style(
        style=style.fill(color="#FFF8F9"),
        locations=loc.body(rows=[0, 5, 8, 11, 14, 17, 22, 26, 30])  # Row indices for main categories
    )
    .cols_align(
        align="center",
        columns=["Responder (N=40)", "Non-Responder (N=28)", "Total (N=68)", "p value"]
    )
    .cols_align(
        align="left",
        columns=[""]
    )
    .opt_row_striping()
    .opt_align_table_header(align="left")
    .opt_horizontal_padding(scale=3)
)

In [19]:
table.write_raw_html('metadata_table.html')

In [20]:
table

Table 1. Demographic Characteristics of the HNSCC Study Cohort by Treatment Response,Table 1. Demographic Characteristics of the HNSCC Study Cohort by Treatment Response,Table 1. Demographic Characteristics of the HNSCC Study Cohort by Treatment Response,Table 1. Demographic Characteristics of the HNSCC Study Cohort by Treatment Response,Table 1. Demographic Characteristics of the HNSCC Study Cohort by Treatment Response
"Overview of patient demographics in the head and neck squamous cell carcinoma (HNSCC) cohort, categorized based on their response to treatment.","Overview of patient demographics in the head and neck squamous cell carcinoma (HNSCC) cohort, categorized based on their response to treatment.","Overview of patient demographics in the head and neck squamous cell carcinoma (HNSCC) cohort, categorized based on their response to treatment.","Overview of patient demographics in the head and neck squamous cell carcinoma (HNSCC) cohort, categorized based on their response to treatment.","Overview of patient demographics in the head and neck squamous cell carcinoma (HNSCC) cohort, categorized based on their response to treatment."
Unnamed: 0_level_2,Response Status,Response Status,Total (N=68),p value
Unnamed: 0_level_3,Responder (N=40),Non-Responder (N=28),Total (N=68),p value
Diagnosis,,,,0.345
Hypopharynx,0 (0.0%),1 (3.6%),1 (1.5%),
Larynx,5 (12.5%),2 (7.1%),7 (10.3%),
Oral Cavity,35 (87.5%),24 (85.7%),59 (86.8%),
Oropharynx,0 (0.0%),1 (3.6%),1 (1.5%),
Age,,,,0.1
Mean (SD),58.4 (11.0),53.6 (12.5),56.426 (11.804),
Range,28 - 76,27 - 72,27.000 - 76.000,
Gender,,,,0.679
Male,29 (72.5%),19 (67.9%),48 (70.6%),


In [21]:
df.to_csv('T1.csv', index=False)