In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import logging
logging.basicConfig(level=logging.DEBUG, format='{asctime} - {name} - {levelname} - {message}', style='{')
import numpy as np, matplotlib.pyplot as plt, pandas as pd
# For finding and loading multiple files
import os
# Use this to see how much memory the dataframes use
from sys import getsizeof
from pathlib import Path
from linetimer import CodeTimer, linetimer

import pseudopeople as psp

from vivarium_research_prl.utils import sizemb, MappingViaAttributes, build_full_address
from vivarium_research_prl import alpha

!date
!whoami
!uname -a
!pwd
!python --version
!conda info --envs | grep '\*'
!conda list | grep -e pandas -e numpy -e vivarium -e pseudopeople
!pip freeze | grep pseudopeople

2024-01-19 17:28:40,897 - matplotlib - DEBUG - matplotlib data path: /ihme/homes/ndbs/miniconda3/envs/ppplv1.0-311/lib/python3.11/site-packages/matplotlib/mpl-data
2024-01-19 17:28:40,907 - matplotlib - DEBUG - CONFIGDIR=/ihme/homes/ndbs/.config/matplotlib
2024-01-19 17:28:40,913 - matplotlib - DEBUG - interactive is False
2024-01-19 17:28:40,914 - matplotlib - DEBUG - platform is linux
2024-01-19 17:28:41,026 - matplotlib - DEBUG - CACHEDIR=/ihme/homes/ndbs/.cache/matplotlib
2024-01-19 17:28:41,030 - matplotlib.font_manager - DEBUG - Using fontManager instance from /ihme/homes/ndbs/.cache/matplotlib/fontlist-v330.json
2024-01-19 17:28:41,510 - numexpr.utils - INFO - Note: NumExpr detected 56 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.


Fri 19 Jan 2024 05:28:44 PM PST
ndbs
Linux gen-slurm-sarchive-p0126 5.4.0-156-generic #173-Ubuntu SMP Tue Jul 11 07:25:22 UTC 2023 x86_64 x86_64 x86_64 GNU/Linux
/mnt/share/code/ndbs/vivarium_research_prl/pseudopeople_testing
Python 3.11.7
ppplv1.0-311          *  /ihme/homes/ndbs/miniconda3/envs/ppplv1.0-311
numpy                     1.26.3                   pypi_0    pypi
pandas                    2.1.4                    pypi_0    pypi
pseudopeople              0.7.2                    pypi_0    pypi
vivarium                  2.3.1                    pypi_0    pypi
vivarium-research-prl     0.0.1                     dev_0    <develop>
pseudopeople @ git+https://github.com/ihmeuw/pseudopeople.git@7d7e1db36125700fdd75ae9c667706d20b451bfd


In [3]:
# A clickable link to the GitHub UI to see what version I'm using
! pip freeze | grep pseudopeople | sed -e 's|pseudopeople @ ||' | sed -e 's|git+||' | sed -e 's|\.git@|/tree/|'

https://github.com/ihmeuw/pseudopeople/tree/7d7e1db36125700fdd75ae9c667706d20b451bfd


# Find data

```
/mnt/team/simulation_science/pub/models/vivarium_census_prl_synth_pop
```

In [4]:
project_path = Path('/mnt/team/simulation_science/pub/models/vivarium_census_prl_synth_pop')
usa_path = project_path / 'results/release_02_yellow/full_data/united_states_of_america'
latest_run = usa_path / 'latest'
best_run = usa_path / 'best'
latest_run_path = usa_path / '2023_08_21_16_35_27/final_results/2024_01_11_07_27_51/pseudopeople_input_data_usa_2.0.0'
best_run_path = usa_path / '2023_07_28_08_33_09/final_results/2023_07_31_08_59_48/'

ri_data_dir = best_run_path / 'states/pseudopeople_input_data_rhode_island_0.0.2'
usa_data_dir = best_run_path / 'pseudopeople_input_data_usa_0.0.2'


In [5]:
!ls -halt $project_path/results/release_02_yellow/full_data/united_states_of_america/latest

lrwxrwxrwx 1 albrja IHME-Simulationscience 218 Jan 11 12:08 /mnt/team/simulation_science/pub/models/vivarium_census_prl_synth_pop/results/release_02_yellow/full_data/united_states_of_america/latest -> /mnt/team/simulation_science/pub/models/vivarium_census_prl_synth_pop/results/release_02_yellow/full_data/united_states_of_america/2023_08_21_16_35_27/final_results/2024_01_11_07_27_51/pseudopeople_input_data_usa_2.0.0


In [6]:
!ls $latest_run_path

american_community_survey  logs			     taxes_1040
CHANGELOG.rst		   metadata_proportions.csv  taxes_w2_and_1099
current_population_survey  metadata.yaml	     women_infants_and_children
decennial_census	   social_security


# Set up a logger to log to file

In [7]:
# Set up a logger
file_logger = logging.getLogger(__name__) # This gets a new logger for the current, __main__ module

# Create and configure file log handler
file_handler = logging.FileHandler('usa_decennial_census_20210119.log')
file_handler.setLevel(logging.DEBUG)
file_format = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
file_handler.setFormatter(file_format)

# Add handlers to the logger
# logger.addHandler(console_handler)
file_logger.addHandler(file_handler)

# Generate full USA decennial census

In [None]:
%%time
with CodeTimer("USA decennial census generation", unit='m', logger_func=file_logger.info):
    census = psp.generate_decennial_census(str(latest_run_path))

Noising data:   0%|                                                             | 0/334 [00:00<?, ?it/s]
Applying noise:   0%|                                                          | 0/15 [00:00<?, ?type/s][A
Applying noise:   7%|███▎                                              | 1/15 [00:08<01:52,  8.02s/type][A
Applying noise:  13%|██████▋                                           | 2/15 [00:52<06:23, 29.49s/type][A
Applying noise:  20%|██████████                                        | 3/15 [00:53<03:15, 16.25s/type][A
Applying noise:  27%|█████████████▎                                    | 4/15 [00:54<01:55, 10.49s/type][A
Applying noise:  33%|████████████████▋                                 | 5/15 [00:55<01:09,  6.90s/type][A
Applying noise:  40%|████████████████████                              | 6/15 [00:56<00:43,  4.82s/type][A
Applying noise:  47%|███████████████████████▎                          | 7/15 [00:56<00:26,  3.31s/type][A
Applying noise:  53%|██████████