In [6]:
from hot_deck_class import HotDeckImputer
import polars as pl

In [7]:
donor_data = {
    'donor_assets': [50000, 20000, 300000, 2000, 
                     10000, 10000, 200, 2000, 4000, 500000],
    'race_cell': ['Black','Black','Black','White','White',
                     'White','Black','White','Black','Black'],
    'sex_cell': ['M','F','F','M','F',
                     'M','F','F','M','F'],
    'work_cell': [1,0,1,0,1,
                     0,1,1,1,0],
    'weight': [1, 2, 1, 2, 1,
               2, 1, 2, 1, 2]
}

donor_data = pl.DataFrame(donor_data)

recipient_data = {
    'race_cell': ['Black','Black','Black','White','White',
                     'White','Black','White','Black','Black','Black','Black','White','White'],
    'sex_cell': ['M','F','F','M','F',
                     'M','F','F','M','F', 'F', 'M', 'M', 'F'],
    'work_cell': [1,0,1,0,1,
                     0,1,1,1,0,0,1,0,1],
    'weight': [1, 3, 2, 3, 2,
               1, 4, 2, 1, 3, 4, 2, 1, 1]
}

recipient_data = pl.DataFrame(recipient_data)

In [8]:
imputer = HotDeckImputer(donor_data = donor_data, 
                         imputation_var = 'donor_assets', 
                         weight_var = 'weight', 
                         recipient_data = recipient_data)

In [9]:
imputer.age_dollar_amounts(donor_year_cpi = 223.1, imp_year_cpi = 322.1)


Summary of donor_assets pre CPI aging:
{'mean': 89820.0, 'median': 10000.0, 'min': 200, 'max': 500000, 'std_dev': 170765.50393253707, 'count': 10, 'missing_values': 0}
Summary of donor_assets post CPI aging:
{'mean': 129677.37337516811, 'median': 14437.471985656657, 'min': 288.74943971313314, 'max': 721873.5992828329, 'std_dev': 246542.2179142546, 'count': 10, 'missing_values': 0}


In [10]:
variables = ['race_cell','sex_cell']

imputer.define_cells(variables)
imputer.cell_definitions

["race_cell == 'White' & sex_cell == 'M'",
 "race_cell == 'White' & sex_cell == 'F'",
 "race_cell == 'Black' & sex_cell == 'M'",
 "race_cell == 'Black' & sex_cell == 'F'"]

In [11]:
imputer.generate_cells()

[(col("race_cell")) == (String(White))]
[(col("sex_cell")) == (String(M))]
[(col("race_cell")) == (String(White))]
[(col("sex_cell")) == (String(F))]
[(col("race_cell")) == (String(Black))]
[(col("sex_cell")) == (String(M))]
[(col("race_cell")) == (String(Black))]
[(col("sex_cell")) == (String(F))]


In [12]:
imputer.cell_definitions

["race_cell == 'White' & sex_cell == 'M'",
 "race_cell == 'White' & sex_cell == 'F'",
 "race_cell == 'Black' & sex_cell == 'M'",
 "race_cell == 'Black' & sex_cell == 'F'"]

In [13]:
imputer.recipient_cells

{"race_cell == 'White' & sex_cell == 'M'": shape: (3, 4)
 ┌───────────┬──────────┬───────────┬────────┐
 │ race_cell ┆ sex_cell ┆ work_cell ┆ weight │
 │ ---       ┆ ---      ┆ ---       ┆ ---    │
 │ str       ┆ str      ┆ i64       ┆ i64    │
 ╞═══════════╪══════════╪═══════════╪════════╡
 │ White     ┆ M        ┆ 0         ┆ 3      │
 │ White     ┆ M        ┆ 0         ┆ 1      │
 │ White     ┆ M        ┆ 0         ┆ 1      │
 └───────────┴──────────┴───────────┴────────┘,
 "race_cell == 'White' & sex_cell == 'F'": shape: (3, 4)
 ┌───────────┬──────────┬───────────┬────────┐
 │ race_cell ┆ sex_cell ┆ work_cell ┆ weight │
 │ ---       ┆ ---      ┆ ---       ┆ ---    │
 │ str       ┆ str      ┆ i64       ┆ i64    │
 ╞═══════════╪══════════╪═══════════╪════════╡
 │ White     ┆ F        ┆ 1         ┆ 2      │
 │ White     ┆ F        ┆ 1         ┆ 2      │
 │ White     ┆ F        ┆ 1         ┆ 1      │
 └───────────┴──────────┴───────────┴────────┘,
 "race_cell == 'Black' & sex_cell == '

In [14]:
imputer.donor_cells

{"race_cell == 'White' & sex_cell == 'M'": shape: (2, 5)
 ┌──────────────┬───────────┬──────────┬───────────┬────────┐
 │ donor_assets ┆ race_cell ┆ sex_cell ┆ work_cell ┆ weight │
 │ ---          ┆ ---       ┆ ---      ┆ ---       ┆ ---    │
 │ f64          ┆ str       ┆ str      ┆ i64       ┆ i64    │
 ╞══════════════╪═══════════╪══════════╪═══════════╪════════╡
 │ 2887.494397  ┆ White     ┆ M        ┆ 0         ┆ 2      │
 │ 14437.471986 ┆ White     ┆ M        ┆ 0         ┆ 2      │
 └──────────────┴───────────┴──────────┴───────────┴────────┘,
 "race_cell == 'White' & sex_cell == 'F'": shape: (2, 5)
 ┌──────────────┬───────────┬──────────┬───────────┬────────┐
 │ donor_assets ┆ race_cell ┆ sex_cell ┆ work_cell ┆ weight │
 │ ---          ┆ ---       ┆ ---      ┆ ---       ┆ ---    │
 │ f64          ┆ str       ┆ str      ┆ i64       ┆ i64    │
 ╞══════════════╪═══════════╪══════════╪═══════════╪════════╡
 │ 14437.471986 ┆ White     ┆ F        ┆ 1         ┆ 1      │
 │ 2887.494397  ┆

In [15]:
imputer.split_cell("race_cell == 'Black' & sex_cell == 'F'", "work_cell")


[(col("race_cell")) == (String(Black))]
[(col("sex_cell")) == (String(F))]
[(col("work_cell")) == (dyn int: 0)]
[(col("race_cell")) == (String(Black))]
[(col("sex_cell")) == (String(F))]
[(col("work_cell")) == (dyn int: 1)]


In [16]:
imputer.cell_definitions

["race_cell == 'White' & sex_cell == 'M'",
 "race_cell == 'White' & sex_cell == 'F'",
 "race_cell == 'Black' & sex_cell == 'M'",
 "race_cell == 'Black' & sex_cell == 'F' & work_cell == 0",
 "race_cell == 'Black' & sex_cell == 'F' & work_cell == 1"]

In [17]:
imputer.impute()

In [18]:
imputer.recipient_cells

{"race_cell == 'White' & sex_cell == 'M'": shape: (3, 5)
 ┌───────────┬──────────┬───────────┬────────┬──────────────────┐
 │ race_cell ┆ sex_cell ┆ work_cell ┆ weight ┆ imp_donor_assets │
 │ ---       ┆ ---      ┆ ---       ┆ ---    ┆ ---              │
 │ str       ┆ str      ┆ i64       ┆ i64    ┆ f64              │
 ╞═══════════╪══════════╪═══════════╪════════╪══════════════════╡
 │ White     ┆ M        ┆ 0         ┆ 3      ┆ 2887.494397      │
 │ White     ┆ M        ┆ 0         ┆ 1      ┆ 2887.494397      │
 │ White     ┆ M        ┆ 0         ┆ 1      ┆ 2887.494397      │
 └───────────┴──────────┴───────────┴────────┴──────────────────┘,
 "race_cell == 'White' & sex_cell == 'F'": shape: (3, 5)
 ┌───────────┬──────────┬───────────┬────────┬──────────────────┐
 │ race_cell ┆ sex_cell ┆ work_cell ┆ weight ┆ imp_donor_assets │
 │ ---       ┆ ---      ┆ ---       ┆ ---    ┆ ---              │
 │ str       ┆ str      ┆ i64       ┆ i64    ┆ f64              │
 ╞═══════════╪══════════╪══

In [19]:
imputer.recipient_data

race_cell,sex_cell,work_cell,weight,imp_donor_assets
str,str,i64,i64,f64
"""White""","""M""",0,3,2887.494397
"""White""","""M""",0,1,2887.494397
"""White""","""M""",0,1,2887.494397
"""White""","""F""",1,2,2887.494397
"""White""","""F""",1,2,2887.494397
…,…,…,…,…
"""Black""","""F""",0,3,28874.943971
"""Black""","""F""",0,3,721873.599283
"""Black""","""F""",0,4,28874.943971
"""Black""","""F""",1,2,288.74944


In [20]:
imputer.apply_random_noise(variation_stdev = (1/6), floor_noise = 1.5)

In [21]:
imputer.random_noise

0.16666666666666666

In [22]:
imputer.gen_analysis_file('hot_deck_stats', '')

Cell data written to '.\hot_deck_stats.xlsx'.


In [23]:
imputer.gen_analysis_file('hot_deck_stats')

Cell data written to '.\hot_deck_stats.xlsx'.


In [24]:
pip freeze > requirements.txt

Note: you may need to restart the kernel to use updated packages.
