# Lecture 4.1: Descriptives and "Table 1"

## Learning objectives
- Simulate a simple epidemiology dataset
- Create descriptive statistics (continuous and categorical)
- Create a "Table 1" using the `tableone` package

In [3]:
# 1. Install Required Package
!pip install tableone



In [4]:
# 2. Import Libraries
import pandas as pd
import numpy as np
from tableone import TableOne

In [5]:
# 3. Simulate Example Data
np.random.seed(2025)
n = 500
data = pd.DataFrame({
    'age': np.random.normal(50, 10, n).round(1),           # age in years
    'sex': np.random.choice(['Male', 'Female'], size=n),    # binary sex
    'bmi': np.random.normal(27, 5, n).round(1),             # body mass index
    'smoker': np.random.choice(['Yes', 'No'], size=n, p=[0.2, 0.8]),  # smoker status
    'group': np.random.choice(['Treatment', 'Control'], size=n)       # random group assignment
})

# Quick look at the data
data.head()

Unnamed: 0,age,sex,bmi,smoker,group
0,49.1,Female,19.8,No,Treatment
1,57.3,Male,30.8,No,Treatment
2,35.6,Female,28.2,No,Control
3,43.4,Male,27.6,No,Treatment
4,49.0,Female,29.9,No,Treatment


In [6]:
# 4. Basic Descriptive Statistics

# Continuous variables
data[['age', 'bmi']].describe()

Unnamed: 0,age,bmi
count,500.0,500.0
mean,49.3954,27.312
std,9.656563,4.954597
min,25.6,11.5
25%,42.4,24.075
50%,49.05,27.3
75%,56.525,30.5
max,79.1,41.6


In [8]:
# Categorical variables
data['sex'].value_counts(normalize=True) * 100

sex
Male      52.0
Female    48.0
Name: proportion, dtype: float64

In [9]:
data['smoker'].value_counts(normalize=True) * 100

smoker
No     80.2
Yes    19.8
Name: proportion, dtype: float64

In [10]:
data['group'].value_counts(normalize=True) * 100

group
Treatment    50.2
Control      49.8
Name: proportion, dtype: float64

## Creating a table 1

In [16]:
# Define variables of interest
columns = ['age', 'sex', 'bmi', 'smoker']
categorical = ['sex', 'smoker']
groupby = 'group'

# Create TableOne object
table1 = TableOne(data, columns=columns, categorical=categorical, groupby=groupby) # Toggle pval
# table1 = TableOne(data, columns=columns, categorical=categorical, groupby=groupby, pval=True) # Toggle pval

# View Table 1
table1

Unnamed: 0_level_0,Unnamed: 1_level_0,Grouped by group,Grouped by group,Grouped by group,Grouped by group
Unnamed: 0_level_1,Unnamed: 1_level_1,Missing,Overall,Control,Treatment
n,,,500,249,251
"age, mean (SD)",,0.0,49.4 (9.7),49.2 (9.6),49.5 (9.7)
"sex, n (%)",Female,,240 (48.0),124 (49.8),116 (46.2)
"sex, n (%)",Male,,260 (52.0),125 (50.2),135 (53.8)
"bmi, mean (SD)",,0.0,27.3 (5.0),27.5 (5.0),27.1 (4.9)
"smoker, n (%)",No,,401 (80.2),202 (81.1),199 (79.3)
"smoker, n (%)",Yes,,99 (19.8),47 (18.9),52 (20.7)


In [17]:
# 6. Export Table 1 to CSV (optional)
table1.to_csv('table1.csv')