 # Corizo mini Project 1 – NHANES Analysis
This notebook completes all tasks from the Corizo mini‑project.

## 1–2. Load Male & Female NHANES Data

In [None]:

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

male = np.loadtxt('nhanes_adult_male_bmx_2020.csv', delimiter=',', skiprows=1)
female = np.loadtxt('nhanes_adult_female_bmx_2020.csv', delimiter=',', skiprows=1)

male_weights = male[:,0]
female_weights = female[:,0]

male, female[:5], male[:5]


## 3. Histograms of Male and Female Weights

In [None]:

plt.figure(figsize=(10,8))

xmin = min(female_weights.min(), male_weights.min())
xmax = max(female_weights.max(), male_weights.max())

plt.subplot(2,1,1)
plt.hist(female_weights, bins=30)
plt.title("Female Weight Distribution")
plt.xlim(xmin, xmax)

plt.subplot(2,1,2)
plt.hist(male_weights, bins=30)
plt.title("Male Weight Distribution")
plt.xlim(xmin, xmax)

plt.tight_layout()
plt.show()


## 4. Boxplot Comparison of Male and Female Weights

In [None]:

plt.figure(figsize=(6,6))
plt.boxplot([female_weights, male_weights], labels=["Female", "Male"])
plt.title("Weight Comparison")
plt.show()


## 5. Numerical Aggregates

In [None]:

import scipy.stats as st

def describe(x):
    return {
        "mean": np.mean(x),
        "median": np.median(x),
        "std": np.std(x),
        "skew": st.skew(x),
        "kurtosis": st.kurtosis(x)
    }

describe(male_weights), describe(female_weights)


## 6. Add BMI Column to Female Matrix

In [None]:

height_m = female[:,1] / 100
bmi = female[:,0] / (height_m**2)
female = np.column_stack([female, bmi])
female[:5]


## 7. Create Standardised Matrix zfemale

In [None]:

zfemale = (female - female.mean(axis=0)) / female.std(axis=0)
zfemale[:5]


## 8. Pairplot & Correlations

In [None]:

import seaborn as sns
df = pd.DataFrame(zfemale[:, [0,1,6,5,7]], 
                  columns=["weight","height","waist","hip","bmi"])
sns.pairplot(df)
plt.show()

pearson = df.corr(method='pearson')
spearman = df.corr(method='spearman')
pearson, spearman


## 9. Add Waist/Height & Waist/Hip Ratios

In [None]:

male = np.column_stack([male, male[:,6]/male[:,1], male[:,6]/male[:,5]])
female = np.column_stack([female, female[:,6]/female[:,1], female[:,6]/female[:,5]])

male[:3], female[:3]


## 10. Boxplot Comparing Ratios

In [None]:

plt.figure(figsize=(10,6))
plt.boxplot([
    female[:,-2], male[:,-2],
    female[:,-1], male[:,-1]
], labels=["F WHtR","M WHtR","F WHR","M WHR"])
plt.title("Waist Ratios Comparison")
plt.show()


## 11. Advantages/Disadvantages of BMI, WHtR, WHR


**BMI**  
+ Simple, widely used  
− Does not separate fat vs muscle  

**Waist‑to‑Height Ratio (WHtR)**  
+ Better predictor of central obesity  
− Height errors influence ratio  

**Waist‑to‑Hip Ratio (WHR)**  
+ Good indicator of fat distribution  
− Hip measurement inconsistency  


## 12. Lowest & Highest BMI Participants

In [None]:

bmi_index = np.argsort(zfemale[:,7])

selected = zfemale[bmi_index[:5]]
selected = np.vstack([selected, zfemale[bmi_index[-5:]]])
selected
