In [62]:
import pandas as pd
import nhanes_loader
import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import LinearRegression
from nhanes.regressions import generate_demo_group, do_age_regression

In [2]:
adult_final = pd.read_pickle('data/adult_final.pkl.gz')

In [3]:
youth_final = pd.read_pickle('data/youth_final.pkl.gz')

Some of the spirometry variables of interest are not ratios.

In [34]:
varsnonratio = [ 'EXPIR','FEF75', 'FEV05', 'FEV1', 'FEV3', 'FEV6', 'FVC',  'MMEF', 'PEFR']

Some of the variables of interest are ratios.

In [33]:
varsratio= ['FEF75OFVC', 'FEF75OPEFR', 
            'FEV05OFEV3', 'FEV05OFVC', 
            'FEV1OFEV3',  'FEV1OFEV6', 
            'FEV1OFVC', 'FEV3OFEV6',
            'FEV3OFVC', 'FEV6OFVC', 
            'MMEFOFVC', 'MMEFOPEFR', 
            'PEFROFEV1', 'PEFROFEV6', 
            'PEFROFVC']

We want to run linear regressions for these ratio variables against age for various demographics groups.

24 demographic combinations:
* male vs female (2)
* black vs non-black vs white-only vs mexican-only (4)
* all ages vs over age cut-off ( 20 for men and 18 for women) vs under age cut-off (2)

In [63]:
regression_results = []

for sex in ('male', 'female'):
    for ethnicity in ('black', 'non-black', 'white', 'mexican'):
        for age_range in ('all', 'under', 'over'):
             # generate demographic group dataframe
            demo_group = generate_demo_group(adult_final, youth_final, ethnicity, sex, age_range)
            
            # do linear regression against age for variables
            for ratio_var in varsratio:
                intercept, slope, r_squared = do_age_regression(demo_group, ratio_var) 
                slope = slope[0]
                std = demo_group.loc[:,ratio_var].std()
                count = demo_group.loc[:,ratio_var].count()
                regression_results.append({
                    'sex' : sex,
                    'ethnicity' : ethnicity,
                    'age_range' : age_range,
                    'ratio_var' : ratio_var,
                    'intercept' : intercept,
                    'slope' : slope,
                    'r_squared' : r_squared,
                    'std' : std,
                    'count' : count
                })

In [65]:
pd.DataFrame(regression_results).to_csv('data/ratio_var_regresssions.csv')