# Notes


In [1]:
import duckdb
import os
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from graphviz import Digraph
from sympy.physics.units import magnetic_density

from variables import Variables as vars

plt.style.use('../../notebook.mplstyle')

os.makedirs("./output", exist_ok=True)

In [2]:
# con.close()
con = duckdb.connect("./data/us_births.db")  #, read_only=True)


In [3]:
mage_eth_c = con.execute(
    """
    SELECT b.year,
           b.mracehisp_c                                      as ethnicity,
           CASE WHEN b.mage_c < 35 THEN '<35' ELSE '>=35' END as mage_group,
           COUNT(*)                                           as birth_count,
           SUM(b.down_ind)::INT as ds_recorded, SUM(b.p_ds_lb_nt) as ds_est_no_term,
           SUM(b.ds_case_weight)                               as case_weighted,
    -- SUM (b.p_ds_lb_nt * (1 - r.reduction)) as ds_est_reduction,
    FROM us_births as b
             JOIN reduction_rate_year r
                  ON b.year = r.year
    GROUP BY b.year, b.mracehisp_c, mage_group
    ORDER BY b.year, b.mracehisp_c, mage_group
    """
).df()
mage_eth_c.to_csv(f"./output/case-weighted-ethnicity-age-group-{datetime.now().strftime("%Y%m%d%H%M")}.csv",
                  index=False)
mage_eth_c

Unnamed: 0,year,ethnicity,mage_group,birth_count,ds_recorded,ds_est_no_term,case_weighted
0,1989,1,<35,2301071,1116,2157.940076,2073.5280
1,1989,1,>=35,229693,324,1356.330542,601.9920
2,1989,2,<35,576886,145,481.626133,411.8145
3,1989,2,>=35,34673,49,213.800335,139.1649
4,1989,3,<35,30718,13,26.017171,28.9276
...,...,...,...,...,...,...,...
387,2023,,>=35,584277,730,4115.804936,1802.2970
388,2024,5,<35,817024,263,811.776154,729.0360
389,2024,5,>=35,174691,305,1284.921670,845.4600
390,2024,,<35,2044163,581,2235.098404,1428.6209


In [4]:
mage_eth_c = con.execute(
    """
    SELECT b.year,
           b.mracehisp_c        as ethnicity,
           b.mage_c             as age,
           COUNT(*)             as birth_count,
           SUM(b.down_ind)::INT as ds_recorded, SUM(b.p_ds_lb_nt) as ds_est_no_term,
           SUM(b.ds_case_weight) as case_weighted,
    -- SUM (b.p_ds_lb_nt * (1 - r.reduction)) as ds_est_reduction,
    FROM us_births as b
             JOIN reduction_rate_year r
                  ON b.year = r.year
    GROUP BY b.year, b.mracehisp_c, b.mage_c
    ORDER BY b.year, b.mracehisp_c, b.mage_c
    """
).df()
mage_eth_c.to_csv(f"./output/case-weighted-ethnicity-ages-{datetime.now().strftime("%Y%m%d%H%M")}.csv", index=False)
mage_eth_c

Unnamed: 0,year,ethnicity,age,birth_count,ds_recorded,ds_est_no_term,case_weighted
0,1989,1,10,22,0,0.014442,0.0000
1,1989,1,11,14,0,0.009196,0.0000
2,1989,1,12,38,0,0.024982,0.0000
3,1989,1,13,291,0,0.191523,0.0000
4,1989,1,14,2016,1,1.328818,1.8580
...,...,...,...,...,...,...,...
7712,2024,,46,1927,12,59.418978,29.5068
7713,2024,,47,1121,8,37.198405,19.6712
7714,2024,,48,664,1,23.330428,2.4589
7715,2024,,49,515,0,18.913266,0.0000


In [5]:
mage_eth_c = con.execute(
    """
    SELECT b.year,
           b.mracehisp_c        as ethnicity,
           CASE
               WHEN b.mage_c < 20 THEN '<20'
               WHEN b.mage_c < 25 THEN '20-24'
               WHEN b.mage_c < 30 THEN '25-29'
               WHEN b.mage_c < 35 THEN '30-34'
               WHEN b.mage_c < 40 THEN '35-39'
               WHEN b.mage_c < 45 THEN '40-44'
               ELSE '>=45'
               END              as mage_group,
           COUNT(*)             as birth_count,
           SUM(b.down_ind)::INT as ds_recorded, SUM(b.p_ds_lb_nt) as ds_est_no_term,
           SUM(b.ds_case_weight) as case_weighted,
    -- SUM (b.p_ds_lb_nt * (1 - r.reduction)) as ds_est_reduction,
    FROM us_births as b
             JOIN reduction_rate_year r
                  ON b.year = r.year
    GROUP BY b.year, b.mracehisp_c, mage_group
    ORDER BY b.year, b.mracehisp_c, mage_group
    """
).df()
mage_eth_c.to_csv(f"./output/case-weighted-ethnicity-age-group-2-{datetime.now().strftime("%Y%m%d%H%M")}.csv",
                  index=False)
mage_eth_c

Unnamed: 0,year,ethnicity,mage_group,birth_count,ds_recorded,ds_est_no_term,case_weighted
0,1989,1,20-24,620099,238,433.370522,442.2040
1,1989,1,25-29,848825,392,709.042853,728.3360
2,1989,1,30-34,590124,371,853.926053,689.3180
3,1989,1,35-39,200654,221,911.923759,410.6180
4,1989,1,40-44,28174,95,418.613618,176.5100
...,...,...,...,...,...,...,...
1367,2024,,30-34,859947,257,1306.402063,631.9373
1368,2024,,35-39,486825,419,2330.352646,1030.2791
1369,2024,,40-44,107032,286,1656.963716,703.2454
1370,2024,,<20,80534,29,53.825074,71.3081


In [6]:
con.close()