From f041f896c17b0aaa8d7c6a08a370e0ea31559ac3 Mon Sep 17 00:00:00 2001 From: PyMap Date: Tue, 28 Jul 2020 18:42:11 -0300 Subject: [PATCH] draw households --- demos/Synthesis workflow.ipynb | 4281 +++----------------------------- 1 file changed, 376 insertions(+), 3905 deletions(-) diff --git a/demos/Synthesis workflow.ipynb b/demos/Synthesis workflow.ipynb index 60ff2b5..69ffaaf 100644 --- a/demos/Synthesis workflow.ipynb +++ b/demos/Synthesis workflow.ipynb @@ -26,7 +26,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "DIAGRAM 0 HERE: synthesize all" + "![Synthesis](img/hor_synthesis.png)" ] }, { @@ -40,7 +40,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "DEFINITION HERE: ESTO ES LO QUE VAMOS A ANALIZAR, PERO QUE SIGNIFICA? REPRESENTA ALGO EN PARTICULAR? COMO SE INTERPRETA ESTA TABLA?" + "`hh` and `p` synthesis datasets are built based on PUMA geographies. The dataset paths shows a `state`, followed by a `county` and an `acs` based year. In the following example, we see synthetic households and persons for the state of Alaska and all the counties that are inside the state. \n", + "\n", + "The main idea of this process is to use the public user microdata survey (PUMS) which is a sample of the acs answers provided at the maximum dissaggrgated geography as possible (the PUMA) and use it to match each record within a `block group`. Using PUMS and acs subject tables it is possible to build different household types and match persons to them, building a dataset that will show individual records following different characteristics (race of head, tenure, children, etc, etc) " ] }, { @@ -461,7 +463,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -471,17 +473,9 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['acsyear', 'c', 'county', 'get_available_geography_ids', 'get_geography_name', 'get_household_joint_dist_for_geography', 'get_household_marginal_for_geography', 'get_num_geographies', 'get_person_joint_dist_for_geography', 'get_person_marginal_for_geography', 'h_acs', 'h_acs_cat', 'h_pums_cols', 'p_acs', 'p_acs_cat', 'p_pums_cols', 'state', 'tract']\n" - ] - } - ], + "outputs": [], "source": [ "# what do we get from starter class?\n", "print([m for m in dir(starter) if not m.startswith('__')])" @@ -496,20 +490,9 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "('02', '290')" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# ...state and county pair\n", "starter.state, starter.county" @@ -538,143 +521,16 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
NAMEB19001_001EB19001_002EB19001_003EB19001_004EB19001_005EB19001_006EB19001_007EB19001_008EB19001_009E...B08201_002EB08201_003EB08201_004EB08201_005EB08201_006EB08202_001EB08202_002EB08202_003EB08202_004EB08202_005E
0Block Group 1, Census Tract 1, Yukon-Koyukuk C...187521320112012118...1194016371878974212
1Block Group 2, Census Tract 1, Yukon-Koyukuk C...33668194220112789...2147130513336160133373
\n", - "

2 rows × 127 columns

\n", - "
" - ], - "text/plain": [ - " NAME B19001_001E \\\n", - "0 Block Group 1, Census Tract 1, Yukon-Koyukuk C... 187 \n", - "1 Block Group 2, Census Tract 1, Yukon-Koyukuk C... 336 \n", - "\n", - " B19001_002E B19001_003E B19001_004E B19001_005E B19001_006E \\\n", - "0 52 13 20 11 20 \n", - "1 68 19 42 20 11 \n", - "\n", - " B19001_007E B19001_008E B19001_009E ... B08201_002E B08201_003E \\\n", - "0 12 1 18 ... 119 40 \n", - "1 27 8 9 ... 214 71 \n", - "\n", - " B08201_004E B08201_005E B08201_006E B08202_001E B08202_002E \\\n", - "0 16 3 7 187 89 \n", - "1 30 5 13 336 160 \n", - "\n", - " B08202_003E B08202_004E B08202_005E \n", - "0 74 21 2 \n", - "1 133 37 3 \n", - "\n", - "[2 rows x 127 columns]" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "starter.h_acs.head(2)" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -684,57 +540,27 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "starter.c " ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "c" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['_get_fips_lookup', '_get_pums_relationship', '_query', '_read_csv', '_scale_and_merge', 'acsyear_files', 'base_url', 'block_group_and_tract_query', 'block_group_query', 'c', 'download_household_pums', 'download_population_pums', 'fips_df', 'fips_url', 'pums00_household_base_url', 'pums00_population_base_url', 'pums10_household_base_url', 'pums10_population_base_url', 'pums_cache', 'pums_household_state_base_url', 'pums_population_state_base_url', 'pums_relationship_df', 'pums_relationship_file_url', 'tract_query', 'tract_to_puma', 'try_fips_lookup']\n" - ] - } - ], + "outputs": [], "source": [ "# that has his own methods \n", "print([ m for m in dir(c) if not m.startswith('__')])" @@ -751,20 +577,9 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - ">" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# we create and merge both tables with:\n", "c.block_group_and_tract_query" @@ -781,17 +596,9 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['ALL', '_acs', 'acs', 'acs1', 'acs1dp', 'acs3', 'acs3dp', 'acs5', 'acs5dp', 'session', 'sf1', 'sf3']\n" - ] - } - ], + "outputs": [], "source": [ "# imported methods from census module\n", "print([ m for m in dir(c.c) if not m.startswith('__')])" @@ -806,7 +613,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -821,7 +628,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -832,110 +639,16 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
NAMEB11005_001EB11005_002EB11005_011Estatecountytract
0Census Tract 1, Yukon-Koyukuk Census Area, Alaska523.0140.0383.002290000100
1Census Tract 2, Yukon-Koyukuk Census Area, Alaska549.0155.0394.002290000200
2Census Tract 3, Yukon-Koyukuk Census Area, Alaska612.0251.0361.002290000300
3Census Tract 4, Yukon-Koyukuk Census Area, Alaska372.0151.0221.002290000400
\n", - "
" - ], - "text/plain": [ - " NAME B11005_001E \\\n", - "0 Census Tract 1, Yukon-Koyukuk Census Area, Alaska 523.0 \n", - "1 Census Tract 2, Yukon-Koyukuk Census Area, Alaska 549.0 \n", - "2 Census Tract 3, Yukon-Koyukuk Census Area, Alaska 612.0 \n", - "3 Census Tract 4, Yukon-Koyukuk Census Area, Alaska 372.0 \n", - "\n", - " B11005_002E B11005_011E state county tract \n", - "0 140.0 383.0 02 290 000100 \n", - "1 155.0 394.0 02 290 000200 \n", - "2 251.0 361.0 02 290 000300 \n", - "3 151.0 221.0 02 290 000400 " - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "pd.DataFrame(tr)" ] }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -946,173 +659,9 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
NAMEB11005_001EB11005_002EB11005_011Estatecountytractblock group
0Block Group 1, Census Tract 1, Yukon-Koyukuk C...187.058.0129.0022900001001
1Block Group 2, Census Tract 1, Yukon-Koyukuk C...336.082.0254.0022900001002
2Block Group 1, Census Tract 2, Yukon-Koyukuk C...208.057.0151.0022900002001
3Block Group 2, Census Tract 2, Yukon-Koyukuk C...341.098.0243.0022900002002
4Block Group 1, Census Tract 3, Yukon-Koyukuk C...234.0108.0126.0022900003001
5Block Group 2, Census Tract 3, Yukon-Koyukuk C...190.076.0114.0022900003002
6Block Group 3, Census Tract 3, Yukon-Koyukuk C...188.067.0121.0022900003003
7Block Group 1, Census Tract 4, Yukon-Koyukuk C...180.082.098.0022900004001
8Block Group 2, Census Tract 4, Yukon-Koyukuk C...192.069.0123.0022900004002
\n", - "
" - ], - "text/plain": [ - " NAME B11005_001E \\\n", - "0 Block Group 1, Census Tract 1, Yukon-Koyukuk C... 187.0 \n", - "1 Block Group 2, Census Tract 1, Yukon-Koyukuk C... 336.0 \n", - "2 Block Group 1, Census Tract 2, Yukon-Koyukuk C... 208.0 \n", - "3 Block Group 2, Census Tract 2, Yukon-Koyukuk C... 341.0 \n", - "4 Block Group 1, Census Tract 3, Yukon-Koyukuk C... 234.0 \n", - "5 Block Group 2, Census Tract 3, Yukon-Koyukuk C... 190.0 \n", - "6 Block Group 3, Census Tract 3, Yukon-Koyukuk C... 188.0 \n", - "7 Block Group 1, Census Tract 4, Yukon-Koyukuk C... 180.0 \n", - "8 Block Group 2, Census Tract 4, Yukon-Koyukuk C... 192.0 \n", - "\n", - " B11005_002E B11005_011E state county tract block group \n", - "0 58.0 129.0 02 290 000100 1 \n", - "1 82.0 254.0 02 290 000100 2 \n", - "2 57.0 151.0 02 290 000200 1 \n", - "3 98.0 243.0 02 290 000200 2 \n", - "4 108.0 126.0 02 290 000300 1 \n", - "5 76.0 114.0 02 290 000300 2 \n", - "6 67.0 121.0 02 290 000300 3 \n", - "7 82.0 98.0 02 290 000400 1 \n", - "8 69.0 123.0 02 290 000400 2 " - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "pd.DataFrame(bg)" ] @@ -1126,163 +675,9 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
NAMEB08201_001EB08201_002EB08201_003EB08201_004EB08201_005EB08201_006E
0Block Group 1, Census Tract 1, Yukon-Koyukuk C...187119401637
1Block Group 2, Census Tract 1, Yukon-Koyukuk C...3362147130513
2Block Group 1, Census Tract 2, Yukon-Koyukuk C...2085173502111
3Block Group 2, Census Tract 2, Yukon-Koyukuk C...34183120833418
4Block Group 1, Census Tract 3, Yukon-Koyukuk C...234127583584
5Block Group 2, Census Tract 3, Yukon-Koyukuk C...190103472873
6Block Group 3, Census Tract 3, Yukon-Koyukuk C...188102462873
7Block Group 1, Census Tract 4, Yukon-Koyukuk C...18091463252
8Block Group 2, Census Tract 4, Yukon-Koyukuk C...19298503553
\n", - "
" - ], - "text/plain": [ - " NAME B08201_001E \\\n", - "0 Block Group 1, Census Tract 1, Yukon-Koyukuk C... 187 \n", - "1 Block Group 2, Census Tract 1, Yukon-Koyukuk C... 336 \n", - "2 Block Group 1, Census Tract 2, Yukon-Koyukuk C... 208 \n", - "3 Block Group 2, Census Tract 2, Yukon-Koyukuk C... 341 \n", - "4 Block Group 1, Census Tract 3, Yukon-Koyukuk C... 234 \n", - "5 Block Group 2, Census Tract 3, Yukon-Koyukuk C... 190 \n", - "6 Block Group 3, Census Tract 3, Yukon-Koyukuk C... 188 \n", - "7 Block Group 1, Census Tract 4, Yukon-Koyukuk C... 180 \n", - "8 Block Group 2, Census Tract 4, Yukon-Koyukuk C... 192 \n", - "\n", - " B08201_002E B08201_003E B08201_004E B08201_005E B08201_006E \n", - "0 119 40 16 3 7 \n", - "1 214 71 30 5 13 \n", - "2 51 73 50 21 11 \n", - "3 83 120 83 34 18 \n", - "4 127 58 35 8 4 \n", - "5 103 47 28 7 3 \n", - "6 102 46 28 7 3 \n", - "7 91 46 32 5 2 \n", - "8 98 50 35 5 3 " - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# tract variables\n", "starter.h_acs[['NAME']+vehicle_columns]" @@ -1297,7 +692,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1313,104 +708,9 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
NAMEB11005_001EB11005_002EB11005_011Estatecountytractblock groupB08201_001EB08201_002EB08201_003EB08201_004EB08201_005EB08201_006E
0Block Group 1, Census Tract 1, Yukon-Koyukuk C...18758129022900001001187119401637
1Block Group 2, Census Tract 1, Yukon-Koyukuk C...336822540229000010023362147130513
\n", - "
" - ], - "text/plain": [ - " NAME B11005_001E \\\n", - "0 Block Group 1, Census Tract 1, Yukon-Koyukuk C... 187 \n", - "1 Block Group 2, Census Tract 1, Yukon-Koyukuk C... 336 \n", - "\n", - " B11005_002E B11005_011E state county tract block group B08201_001E \\\n", - "0 58 129 02 290 000100 1 187 \n", - "1 82 254 02 290 000100 2 336 \n", - "\n", - " B08201_002E B08201_003E B08201_004E B08201_005E B08201_006E \n", - "0 119 40 16 3 7 \n", - "1 214 71 30 5 13 " - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "h_acs.head(2)" ] @@ -1431,7 +731,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1440,20 +740,9 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "cat.categorize" ] @@ -1467,7 +756,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1482,166 +771,9 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
cat_namehh_carshh_children
cat_valuenoneonetwo or morenoyes
statecountytractblock group
022900001001119402612958
2214714825482
000200151738215157
28312013524398
00030011275847126108
2103473811476
3102463812167
00040019146399882
298504312369
\n", - "
" - ], - "text/plain": [ - "cat_name hh_cars hh_children \n", - "cat_value none one two or more no yes\n", - "state county tract block group \n", - "02 290 000100 1 119 40 26 129 58\n", - " 2 214 71 48 254 82\n", - " 000200 1 51 73 82 151 57\n", - " 2 83 120 135 243 98\n", - " 000300 1 127 58 47 126 108\n", - " 2 103 47 38 114 76\n", - " 3 102 46 38 121 67\n", - " 000400 1 91 46 39 98 82\n", - " 2 98 50 43 123 69" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "h_acs_cat" ] @@ -1655,404 +787,9 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
cat_namehh_age_of_headhh_carshh_childrenhh_income...hispanic_headseniorssf_detachedtenure_mover
cat_valuegt35-lt65gt65lt35noneonetwo or morenoyesgt100-lt150gt150...noyesnoyesnoyesown not recentown recentrent not recentrent recent
statecountytractblock group
022900001001122254011940261295860...187016225018713593112
221077492147148254823315...33152597720316222104658
000200112367185173821515782...2044138701519314744512
2229763683120135243985925...33922548744297240116921
00030011464345127584712610891...234018450123315094728
212144251034738114764116...1882143476184129132919
3112373910246381216780...188014741418413523318
0004001924939914639988237...180013149018012164211
2108463898504312369166...18841444871859786918
\n", - "

9 rows × 34 columns

\n", - "
" - ], - "text/plain": [ - "cat_name hh_age_of_head hh_cars \\\n", - "cat_value gt35-lt65 gt65 lt35 none one \n", - "state county tract block group \n", - "02 290 000100 1 122 25 40 119 40 \n", - " 2 210 77 49 214 71 \n", - " 000200 1 123 67 18 51 73 \n", - " 2 229 76 36 83 120 \n", - " 000300 1 146 43 45 127 58 \n", - " 2 121 44 25 103 47 \n", - " 3 112 37 39 102 46 \n", - " 000400 1 92 49 39 91 46 \n", - " 2 108 46 38 98 50 \n", - "\n", - "cat_name hh_children hh_income \\\n", - "cat_value two or more no yes gt100-lt150 \n", - "state county tract block group \n", - "02 290 000100 1 26 129 58 6 \n", - " 2 48 254 82 33 \n", - " 000200 1 82 151 57 8 \n", - " 2 135 243 98 59 \n", - " 000300 1 47 126 108 9 \n", - " 2 38 114 76 41 \n", - " 3 38 121 67 8 \n", - " 000400 1 39 98 82 3 \n", - " 2 43 123 69 16 \n", - "\n", - "cat_name ... hispanic_head seniors \\\n", - "cat_value gt150 ... no yes no yes \n", - "state county tract block group ... \n", - "02 290 000100 1 0 ... 187 0 162 25 \n", - " 2 15 ... 331 5 259 77 \n", - " 000200 1 2 ... 204 4 138 70 \n", - " 2 25 ... 339 2 254 87 \n", - " 000300 1 1 ... 234 0 184 50 \n", - " 2 16 ... 188 2 143 47 \n", - " 3 0 ... 188 0 147 41 \n", - " 000400 1 7 ... 180 0 131 49 \n", - " 2 6 ... 188 4 144 48 \n", - "\n", - "cat_name sf_detached tenure_mover \\\n", - "cat_value no yes own not recent own recent \n", - "state county tract block group \n", - "02 290 000100 1 0 187 135 9 \n", - " 2 20 316 222 10 \n", - " 000200 1 15 193 147 4 \n", - " 2 44 297 240 11 \n", - " 000300 1 1 233 150 9 \n", - " 2 6 184 129 13 \n", - " 3 4 184 135 2 \n", - " 000400 1 0 180 121 6 \n", - " 2 7 185 97 8 \n", - "\n", - "cat_name \n", - "cat_value rent not recent rent recent \n", - "state county tract block group \n", - "02 290 000100 1 31 12 \n", - " 2 46 58 \n", - " 000200 1 45 12 \n", - " 2 69 21 \n", - " 000300 1 47 28 \n", - " 2 29 19 \n", - " 3 33 18 \n", - " 000400 1 42 11 \n", - " 2 69 18 \n", - "\n", - "[9 rows x 34 columns]" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# here the entire table built in starter2\n", "starter.h_acs_cat" @@ -2081,20 +818,9 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - ">" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "c.tract_to_puma" ] @@ -2108,24 +834,9 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "puma10 id for tract 000100 : 00400\n", - "puma00 id for tract 000100 : 00400\n", - "puma10 id for tract 000200 : 00400\n", - "puma00 id for tract 000200 : 00400\n", - "puma10 id for tract 000300 : 00400\n", - "puma00 id for tract 000300 : 00400\n", - "puma10 id for tract 000400 : 00400\n", - "puma00 id for tract 000400 : 00400\n" - ] - } - ], + "outputs": [], "source": [ "for tract in ['000100','000200','000300','000400']:\n", " print('puma10 id for tract {} : {}'.format(tract, c.tract_to_puma(state, county, tract)[0]))\n", @@ -2174,7 +885,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -2185,280 +896,18 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Downloading households pums from https://s3-us-west-1.amazonaws.com/synthpop-data2/\n", - "Reading PUMS00 from https://s3-us-west-1.amazonaws.com/synthpop-data2/\n" - ] - } - ], + "outputs": [], "source": [ "h_pums = c.download_household_pums(state, puma10, puma00, usecols=h_pums_cols)" ] }, { "cell_type": "code", - "execution_count": 34, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
serialnoRTpuma00puma10NPTYPEBLDTENVEHHINCPMVR18R65
02012000000643H-9400212.03.00.033360.01.00.00.0
22012000001889H-9400412.02.00.089420.07.00.02.0
32012000003083H-9400112.02.00.020800.07.00.00.0
42012000004753H-9400712.02.00.026300.06.01.00.0
52012000005450H-9400313.03.01.065300.01.01.00.0
..........................................
58572011001481195H400-9612.03.01.0165600.07.01.01.0
58582011001486432H400-9512.02.00.023900.05.01.00.0
58622011001495660H400-9312.02.02.029500.07.00.02.0
58652011001496296H400-9112.04.00.0900.07.00.00.0
58682011001498413H400-9412.03.01.019600.02.01.00.0
\n", - "

3625 rows × 13 columns

\n", - "
" - ], - "text/plain": [ - " serialno RT puma00 puma10 NP TYPE BLD TEN VEH HINCP MV \\\n", - "0 2012000000643 H -9 400 2 1 2.0 3.0 0.0 33360.0 1.0 \n", - "2 2012000001889 H -9 400 4 1 2.0 2.0 0.0 89420.0 7.0 \n", - "3 2012000003083 H -9 400 1 1 2.0 2.0 0.0 20800.0 7.0 \n", - "4 2012000004753 H -9 400 7 1 2.0 2.0 0.0 26300.0 6.0 \n", - "5 2012000005450 H -9 400 3 1 3.0 3.0 1.0 65300.0 1.0 \n", - "... ... .. ... ... .. ... ... ... ... ... ... \n", - "5857 2011001481195 H 400 -9 6 1 2.0 3.0 1.0 165600.0 7.0 \n", - "5858 2011001486432 H 400 -9 5 1 2.0 2.0 0.0 23900.0 5.0 \n", - "5862 2011001495660 H 400 -9 3 1 2.0 2.0 2.0 29500.0 7.0 \n", - "5865 2011001496296 H 400 -9 1 1 2.0 4.0 0.0 900.0 7.0 \n", - "5868 2011001498413 H 400 -9 4 1 2.0 3.0 1.0 19600.0 2.0 \n", - "\n", - " R18 R65 \n", - "0 0.0 0.0 \n", - "2 0.0 2.0 \n", - "3 0.0 0.0 \n", - "4 1.0 0.0 \n", - "5 1.0 0.0 \n", - "... ... ... \n", - "5857 1.0 1.0 \n", - "5858 1.0 0.0 \n", - "5862 0.0 2.0 \n", - "5865 0.0 0.0 \n", - "5868 1.0 0.0 \n", - "\n", - "[3625 rows x 13 columns]" - ] - }, - "execution_count": 34, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "h_pums" ] @@ -2479,20 +928,9 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 35, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "cat.joint_distribution" ] @@ -2506,87 +944,9 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
cat_id
hh_carshh_children
noneno0
yes1
oneno2
yes3
two or moreno4
yes5
\n", - "
" - ], - "text/plain": [ - " cat_id\n", - "hh_cars hh_children \n", - "none no 0\n", - " yes 1\n", - "one no 2\n", - " yes 3\n", - "two or more no 4\n", - " yes 5" - ] - }, - "execution_count": 36, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "cat.category_combinations(h_acs_cat.columns)" ] @@ -2600,7 +960,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -2620,7 +980,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -2642,95 +1002,9 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
cat_idfrequency
hh_carshh_children
noneno0843
yes1919
oneno2573
yes3431
two or moreno4491
yes5368
\n", - "
" - ], - "text/plain": [ - " cat_id frequency\n", - "hh_cars hh_children \n", - "none no 0 843\n", - " yes 1 919\n", - "one no 2 573\n", - " yes 3 431\n", - "two or more no 4 491\n", - " yes 5 368" - ] - }, - "execution_count": 39, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "jd_households" ] @@ -2746,298 +1020,9 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
serialnoRTpuma00puma10NPTYPEBLDTENVEHHINCPMVR18R65hh_carshh_childrencat_id
02012000000643H-9400212.03.00.033360.01.00.00.0noneno0
22012000001889H-9400412.02.00.089420.07.00.02.0noneno0
32012000003083H-9400112.02.00.020800.07.00.00.0noneno0
152012000017291H-9400412.02.00.054800.05.00.00.0noneno0
312012000041419H-9400212.02.00.033300.07.00.01.0noneno0
...................................................
57622011001370081H400-9112.02.06.0300300.05.00.00.0two or moreno4
57712011001380825H400-9212.04.02.0173600.01.00.00.0two or moreno4
58352011001457691H400-9211.03.02.097400.03.00.00.0two or moreno4
58432011001464975H400-9412.01.02.08650.05.00.00.0two or moreno4
58622011001495660H400-9312.02.02.029500.07.00.02.0two or moreno4
\n", - "

3625 rows × 16 columns

\n", - "
" - ], - "text/plain": [ - " serialno RT puma00 puma10 NP TYPE BLD TEN VEH HINCP MV \\\n", - "0 2012000000643 H -9 400 2 1 2.0 3.0 0.0 33360.0 1.0 \n", - "2 2012000001889 H -9 400 4 1 2.0 2.0 0.0 89420.0 7.0 \n", - "3 2012000003083 H -9 400 1 1 2.0 2.0 0.0 20800.0 7.0 \n", - "15 2012000017291 H -9 400 4 1 2.0 2.0 0.0 54800.0 5.0 \n", - "31 2012000041419 H -9 400 2 1 2.0 2.0 0.0 33300.0 7.0 \n", - "... ... .. ... ... .. ... ... ... ... ... ... \n", - "5762 2011001370081 H 400 -9 1 1 2.0 2.0 6.0 300300.0 5.0 \n", - "5771 2011001380825 H 400 -9 2 1 2.0 4.0 2.0 173600.0 1.0 \n", - "5835 2011001457691 H 400 -9 2 1 1.0 3.0 2.0 97400.0 3.0 \n", - "5843 2011001464975 H 400 -9 4 1 2.0 1.0 2.0 8650.0 5.0 \n", - "5862 2011001495660 H 400 -9 3 1 2.0 2.0 2.0 29500.0 7.0 \n", - "\n", - " R18 R65 hh_cars hh_children cat_id \n", - "0 0.0 0.0 none no 0 \n", - "2 0.0 2.0 none no 0 \n", - "3 0.0 0.0 none no 0 \n", - "15 0.0 0.0 none no 0 \n", - "31 0.0 1.0 none no 0 \n", - "... ... ... ... ... ... \n", - "5762 0.0 0.0 two or more no 4 \n", - "5771 0.0 0.0 two or more no 4 \n", - "5835 0.0 0.0 two or more no 4 \n", - "5843 0.0 0.0 two or more no 4 \n", - "5862 0.0 2.0 two or more no 4 \n", - "\n", - "[3625 rows x 16 columns]" - ] - }, - "execution_count": 40, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "h_pums" ] @@ -3069,24 +1054,9 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "state 02\n", - "county 290\n", - "tract 000100\n", - "block group 1\n", - "dtype: object" - ] - }, - "execution_count": 41, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "#...for a group of available geographies inside a county/state pair\n", "list(starter.get_available_geography_ids())[0]" @@ -3094,7 +1064,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -3103,7 +1073,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -3120,55 +1090,9 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "cat_name cat_value \n", - "hh_age_of_head gt35-lt65 108\n", - " gt65 46\n", - " lt35 38\n", - "hh_cars none 98\n", - " one 50\n", - " two or more 43\n", - "hh_children no 123\n", - " yes 69\n", - "hh_income gt100-lt150 16\n", - " gt150 6\n", - " gt30-lt60 32\n", - " gt60-lt100 81\n", - " lt30 57\n", - "hh_race_of_head asian 0\n", - " black 2\n", - " other 108\n", - " white 82\n", - "hh_size four or more 36\n", - " one 61\n", - " three 44\n", - " two 51\n", - "hh_workers none 69\n", - " one 77\n", - " two or more 45\n", - "hispanic_head no 188\n", - " yes 4\n", - "seniors no 144\n", - " yes 48\n", - "sf_detached no 7\n", - " yes 185\n", - "tenure_mover own not recent 97\n", - " own recent 8\n", - " rent not recent 69\n", - " rent recent 18\n", - "Name: (02, 290, 000400, 2), dtype: int64" - ] - }, - "execution_count": 44, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# This is the marginal table we stored for the last census tract (400)\n", "h_marg" @@ -3183,166 +1107,9 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
cat_namehh_carshh_children
cat_valuenoneonetwo or morenoyes
statecountytractblock group
022900001001119402612958
2214714825482
000200151738215157
28312013524398
00030011275847126108
2103473811476
3102463812167
00040019146399882
298504312369
\n", - "
" - ], - "text/plain": [ - "cat_name hh_cars hh_children \n", - "cat_value none one two or more no yes\n", - "state county tract block group \n", - "02 290 000100 1 119 40 26 129 58\n", - " 2 214 71 48 254 82\n", - " 000200 1 51 73 82 151 57\n", - " 2 83 120 135 243 98\n", - " 000300 1 127 58 47 126 108\n", - " 2 103 47 38 114 76\n", - " 3 102 46 38 121 67\n", - " 000400 1 91 46 39 98 82\n", - " 2 98 50 43 123 69" - ] - }, - "execution_count": 45, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# can check these totals in our acs subject table...\n", "h_acs_cat" @@ -3350,7 +1117,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -3360,26 +1127,9 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "cat_name cat_value \n", - "hh_cars none 91\n", - " one 46\n", - " two or more 39\n", - "hh_children no 98\n", - " yes 82\n", - "Name: (02, 290, 000400, 1), dtype: int64" - ] - }, - "execution_count": 47, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "hh_marginals_tract_400_block_gp_1" ] @@ -3407,7 +1157,7 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -3416,7 +1166,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -3425,27 +1175,9 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "hh_cars hh_children\n", - "none no 46.529753\n", - " yes 46.538428\n", - "one no 27.835805\n", - " yes 19.209650\n", - "two or more no 23.634442\n", - " yes 16.251921\n", - "dtype: float64" - ] - }, - "execution_count": 50, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# this is our contraints table\n", "h_constraint" @@ -3453,20 +1185,9 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "4" - ] - }, - "execution_count": 51, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# this is the number of iterations performed to achieve the constraint values\n", "_" @@ -3481,7 +1202,7 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -3490,24 +1211,9 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "cat_name cat_value \n", - "hh_cars none 91\n", - " one 46\n", - " two or more 39\n", - "Name: (02, 290, 000400, 1), dtype: int64" - ] - }, - "execution_count": 53, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "bg_targets" ] @@ -3521,89 +1227,18 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
cat_idfrequency
hh_carshh_children
noneno0843
yes1919
\n", - "
" - ], - "text/plain": [ - " cat_id frequency\n", - "hh_cars hh_children \n", - "none no 0 843\n", - " yes 1 919" - ] - }, - "execution_count": 54, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "jd_households[:2]" ] }, { "cell_type": "code", - "execution_count": 55, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "1762" - ] - }, - "execution_count": 55, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "jd_households['frequency'][:2].sum()" ] @@ -3619,31 +1254,9 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Block group target value: 91\n", - "Total value of the combined category is: 1762\n", - "Proportion of each category in the combined variables total: 0.051645856980703744\n", - "New Block group target value: 4.69977298524404\n", - "*********************************************************************************\n", - "Block group target value: 46\n", - "Total value of the combined category is: 1004\n", - "Proportion of each category in the combined variables total: 0.045816733067729085\n", - "New Block group target value: 2.1075697211155378\n", - "*********************************************************************************\n", - "Block group target value: 39\n", - "Total value of the combined category is: 859\n", - "Proportion of each category in the combined variables total: 0.04540162980209546\n", - "New Block group target value: 1.7706635622817228\n", - "*********************************************************************************\n" - ] - } - ], + "outputs": [], "source": [ "sub_category_idx_0 = 0\n", "sub_category_idx_1 = 2\n", @@ -3668,25 +1281,9 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[ 0, 843],\n", - " [ 1, 919],\n", - " [ 2, 573],\n", - " [ 3, 431],\n", - " [ 4, 491],\n", - " [ 5, 368]])" - ] - }, - "execution_count": 57, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "jd_households.values" ] @@ -3707,7 +1304,7 @@ }, { "cell_type": "code", - "execution_count": 58, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -3716,7 +1313,7 @@ }, { "cell_type": "code", - "execution_count": 59, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -3728,7 +1325,7 @@ }, { "cell_type": "code", - "execution_count": 60, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -3741,22 +1338,9 @@ }, { "cell_type": "code", - "execution_count": 61, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[0. , 4.69977299],\n", - " [1. , 2.10756972],\n", - " [2. , 1.77066356]])" - ] - }, - "execution_count": 61, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "next_constraints" ] @@ -3770,7 +1354,7 @@ }, { "cell_type": "code", - "execution_count": 62, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -3779,37 +1363,18 @@ }, { "cell_type": "code", - "execution_count": 63, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "2338.0" - ] - }, - "execution_count": 63, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "np.abs(current_constraints, next_constraints).sum()" ] }, { "cell_type": "code", - "execution_count": 64, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Recalculate constraints table\n" - ] - } - ], + "outputs": [], "source": [ "if np.abs(current_constraints, next_constraints).sum()>tolerance:\n", " print('Recalculate constraints table')" @@ -3824,26 +1389,9 @@ }, { "cell_type": "code", - "execution_count": 65, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "cat_name cat_value \n", - "hh_cars none 91\n", - " one 46\n", - " two or more 39\n", - "hh_children no 98\n", - " yes 82\n", - "Name: (02, 290, 000400, 1), dtype: int64" - ] - }, - "execution_count": 65, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# ... our marginals table with total values for the block group\n", "hh_marginals_tract_400_block_gp_1" @@ -3851,32 +1399,24 @@ }, { "cell_type": "code", - "execution_count": 66, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "hh_cars hh_children\n", - "none no 46.529753\n", - " yes 46.538428\n", - "one no 27.835805\n", - " yes 19.209650\n", - "two or more no 23.634442\n", - " yes 16.251921\n", - "dtype: float64" - ] - }, - "execution_count": 66, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# updated with new totals.\n", "h_constraint" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# that will be used to update joint distributions frequencies\n", + "jd_households" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -3892,12 +1432,10 @@ ] }, { - "cell_type": "code", - "execution_count": 106, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "from synthpop.ipu.ipu import household_weights" + "![ipu](img/ipf_ipu.png)" ] }, { @@ -3905,11 +1443,13 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "from synthpop.ipu.ipu import household_weights" + ] }, { "cell_type": "code", - "execution_count": 67, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -3918,27 +1458,9 @@ }, { "cell_type": "code", - "execution_count": 68, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "cat_id\n", - "0 46.529753\n", - "1 46.538428\n", - "2 27.835805\n", - "3 19.209650\n", - "4 23.634442\n", - "5 16.251921\n", - "dtype: float64" - ] - }, - "execution_count": 68, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "h_constraint" ] @@ -3952,7 +1474,7 @@ }, { "cell_type": "code", - "execution_count": 69, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -3961,176 +1483,16 @@ }, { "cell_type": "code", - "execution_count": 70, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
serialnoRTpuma00puma10NPTYPEBLDTENVEHHINCPMVR18R65hh_carshh_childrencat_id
02012000000643H-9400212.03.00.033360.01.00.00.0noneno0
22012000001889H-9400412.02.00.089420.07.00.02.0noneno0
32012000003083H-9400112.02.00.020800.07.00.00.0noneno0
152012000017291H-9400412.02.00.054800.05.00.00.0noneno0
312012000041419H-9400212.02.00.033300.07.00.01.0noneno0
\n", - "
" - ], - "text/plain": [ - " serialno RT puma00 puma10 NP TYPE BLD TEN VEH HINCP MV \\\n", - "0 2012000000643 H -9 400 2 1 2.0 3.0 0.0 33360.0 1.0 \n", - "2 2012000001889 H -9 400 4 1 2.0 2.0 0.0 89420.0 7.0 \n", - "3 2012000003083 H -9 400 1 1 2.0 2.0 0.0 20800.0 7.0 \n", - "15 2012000017291 H -9 400 4 1 2.0 2.0 0.0 54800.0 5.0 \n", - "31 2012000041419 H -9 400 2 1 2.0 2.0 0.0 33300.0 7.0 \n", - "\n", - " R18 R65 hh_cars hh_children cat_id \n", - "0 0.0 0.0 none no 0 \n", - "2 0.0 2.0 none no 0 \n", - "3 0.0 0.0 none no 0 \n", - "15 0.0 0.0 none no 0 \n", - "31 0.0 1.0 none no 0 " - ] - }, - "execution_count": 70, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "households_sample_df.head()" ] }, { "cell_type": "code", - "execution_count": 71, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -4139,7 +1501,7 @@ }, { "cell_type": "code", - "execution_count": 72, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -4148,7 +1510,7 @@ }, { "cell_type": "code", - "execution_count": 73, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -4157,7 +1519,7 @@ }, { "cell_type": "code", - "execution_count": 74, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -4166,198 +1528,109 @@ }, { "cell_type": "code", - "execution_count": 75, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
cat_id012345
hh_id
01.00.00.00.00.00.0
21.00.00.00.00.00.0
31.00.00.00.00.00.0
40.01.00.00.00.00.0
50.00.00.01.00.00.0
\n", - "
" - ], - "text/plain": [ - "cat_id 0 1 2 3 4 5\n", - "hh_id \n", - "0 1.0 0.0 0.0 0.0 0.0 0.0\n", - "2 1.0 0.0 0.0 0.0 0.0 0.0\n", - "3 1.0 0.0 0.0 0.0 0.0 0.0\n", - "4 0.0 1.0 0.0 0.0 0.0 0.0\n", - "5 0.0 0.0 0.0 1.0 0.0 0.0" - ] - }, - "execution_count": 75, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "h_freq_table.head()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Togheter, frequency tables and constraints will be used for weights matrix and fit quality:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "best_weights, fit_quality, iterations = household_weights(h_freq_table,\n", + " None,\n", + " h_constraint,\n", + " None)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from synthpop.ipu.ipu import _FrequencyAndConstraints" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "freq_wrap = _FrequencyAndConstraints(h_freq_table, h_constraint)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This wrapper returns every `cat_id` column with non zero values:\n", + "\n", + "* `0` represents households with no car and no children \n", + "* `1` represents households with no car and children \n", + "* `2` represents households with one car and no children\n", + "* `3` represents households with one car and children\n", + "* `4` represents households with two or more cars and no children\n", + "* `5` represents households with two or more cars and children\n", + "\n", + "The wrapper returns each variables combination (from 0 to 5) inside a tuple with:\n", + "\n", + "1) `cat_id` value\n", + "\n", + "2) a `weights matrix`\n", + "\n", + "3) the new target value (the `constraint` or maximum value that the combination can reach within the block group) we built in the `ipf`.\n", + "\n", + "4) the index of non zero column values " + ] + }, { "cell_type": "code", - "execution_count": 107, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "best_weights, fit_quality, iterations = household_weights(h_freq_table,\n", - " None,\n", - " h_constraint,\n", - " None)" + "# here the information for all the combinations\n", + "freq_wrap.iter_columns()" ] }, { "cell_type": "code", - "execution_count": 108, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "hh_id\n", - "0 0.055195\n", - "2 0.055195\n", - "3 0.055195\n", - "4 0.050640\n", - "5 0.044570\n", - " ... \n", - "5857 0.044570\n", - "5858 0.050640\n", - "5862 0.048135\n", - "5865 0.055195\n", - "5868 0.044570\n", - "Length: 3625, dtype: float64" - ] - }, - "execution_count": 108, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "best_weights" + "# and here, we can see for category \"0\" that non zero values are...\n", + "freq_wrap.get_column(0)" ] }, { "cell_type": "code", - "execution_count": 109, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "1.636802750002612e-16" - ] - }, - "execution_count": 109, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "fit_quality" + "#... in the index 0, 1, 2, 7, (...)\n", + "h_freq_table[0][0:8]" ] }, { - "cell_type": "code", - "execution_count": 110, + "cell_type": "markdown", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "2" - ] - }, - "execution_count": 110, - "metadata": {}, - "output_type": "execute_result" - } - ], "source": [ - "iterations" + "With this wrapper, we build the fit quality od each `cat_id`" ] }, { @@ -4365,810 +1638,88 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "from synthpop.ipu.ipu import _average_fit_quality\n", + "from synthpop.ipu.ipu import _fit_quality" + ] }, { "cell_type": "code", - "execution_count": 111, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ + "# weights matrix\n", "weights = np.ones(len(h_freq_table), dtype='float')\n", - "best_weights = weights.copy()" + "\n", + "# column (this is the non-zero elements of the \"0\" cat_id column of the frequency table)\n", + "cat_id_0_column = [e for e in freq_wrap.get_column(0)][1]\n", + "\n", + "# nz (this is the idx of the frequency table where non zero values are stored)\n", + "cat_id_0_nz = [e for e in freq_wrap.get_column(0)][3]" ] }, { "cell_type": "code", - "execution_count": 113, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "from synthpop.ipu.ipu import _FrequencyAndConstraints" + "# the non zero values should have the same length when filtering the weights matrix\n", + "len(cat_id_0_column) == len(weights[cat_id_0_nz])" ] }, { "cell_type": "code", - "execution_count": 116, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "freq_wrap = _FrequencyAndConstraints(h_freq_table, h_constraint)" + "# the new target value for the block group\n", + "constraint = [e for e in freq_wrap.get_column(0)][2]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# this is the \"fit quality\" value for cat_id \"0\"\n", + "_fit_quality(cat_id_0_column, weights[cat_id_0_nz], constraint)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "This wrapper returns every `cat_id` column with non zero values:\n", - "\n", - "* `0` represents households with no car and no children \n", - "* `1` represents households with no car and children \n", - "* `2` represents households with one car and no children\n", - "* `3` represents households with one car and children\n", - "* `4` represents households with two or more cars and no children\n", - "* `5` represents households with two or more cars and children\n", - "\n", - "The wrapper returns each variables combination (from 0 to 5) with a weights matrix, the new target value we built in the `ipf` and the index of non zero columns: " + "This value is the result of multiplying the non-zero values by the weights matrix, which returns the total frequency of the category within the PUMA: " ] }, { "cell_type": "code", - "execution_count": 121, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[(0,\n", - " array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),\n", - " 46.529753144154704,\n", - " array([ 0, 1, 2, 7, 16, 18, 20, 27, 32, 34, 38,\n", - " 41, 47, 58, 60, 74, 84, 86, 87, 88, 90, 95,\n", - " 97, 99, 106, 107, 108, 115, 116, 118, 121, 122, 130,\n", - " 131, 135, 138, 140, 159, 163, 165, 169, 175, 182, 190,\n", - " 192, 205, 207, 211, 212, 214, 215, 221, 223, 226, 227,\n", - " 228, 229, 232, 235, 240, 242, 256, 261, 268, 270, 279,\n", - " 280, 281, 282, 283, 285, 287, 291, 295, 297, 298, 299,\n", - " 300, 308, 309, 310, 311, 315, 320, 324, 331, 332, 336,\n", - " 338, 345, 356, 361, 363, 366, 367, 368, 372, 380, 383,\n", - " 388, 393, 400, 403, 406, 408, 418, 419, 430, 432, 433,\n", - " 434, 449, 450, 465, 466, 471, 472, 473, 483, 486, 499,\n", - " 504, 513, 514, 519, 537, 538, 540, 545, 546, 548, 552,\n", - " 553, 559, 565, 573, 579, 580, 583, 585, 589, 590, 597,\n", - " 598, 609, 613, 622, 624, 629, 632, 635, 636, 637, 638,\n", - " 641, 642, 646, 649, 650, 660, 661, 669, 672, 673, 684,\n", - " 687, 692, 697, 698, 706, 713, 716, 725, 727, 737, 738,\n", - " 739, 741, 747, 751, 754, 761, 762, 768, 797, 804, 805,\n", - " 809, 814, 815, 820, 822, 824, 825, 826, 837, 839, 840,\n", - " 842, 847, 855, 863, 866, 868, 872, 876, 887, 891, 901,\n", - " 905, 911, 916, 917, 923, 937, 939, 942, 945, 946, 952,\n", - " 960, 967, 969, 970, 973, 974, 975, 978, 984, 989, 990,\n", - " 993, 997, 999, 1000, 1005, 1006, 1009, 1011, 1015, 1021, 1022,\n", - " 1024, 1031, 1037, 1040, 1043, 1044, 1057, 1059, 1062, 1063, 1071,\n", - " 1072, 1073, 1079, 1083, 1090, 1093, 1094, 1096, 1097, 1101, 1106,\n", - " 1107, 1108, 1114, 1118, 1124, 1130, 1137, 1138, 1140, 1143, 1144,\n", - " 1161, 1171, 1175, 1178, 1180, 1186, 1187, 1188, 1192, 1196, 1197,\n", - " 1204, 1226, 1247, 1252, 1253, 1254, 1256, 1259, 1261, 1263, 1267,\n", - " 1271, 1279, 1285, 1292, 1295, 1302, 1305, 1308, 1317, 1319, 1321,\n", - " 1324, 1331, 1333, 1335, 1354, 1355, 1357, 1358, 1361, 1362, 1366,\n", - " 1367, 1373, 1382, 1395, 1396, 1397, 1406, 1407, 1408, 1409, 1415,\n", - " 1421, 1425, 1426, 1427, 1429, 1434, 1444, 1445, 1452, 1456, 1460,\n", - " 1461, 1466, 1474, 1480, 1481, 1482, 1490, 1491, 1493, 1501, 1509,\n", - " 1521, 1528, 1545, 1552, 1566, 1567, 1570, 1576, 1577, 1580, 1582,\n", - " 1594, 1603, 1605, 1618, 1619, 1627, 1628, 1643, 1645, 1650, 1653,\n", - " 1654, 1655, 1657, 1660, 1661, 1662, 1664, 1674, 1679, 1688, 1691,\n", - " 1692, 1701, 1704, 1705, 1709, 1710, 1711, 1712, 1713, 1715, 1729,\n", - " 1730, 1731, 1738, 1739, 1741, 1746, 1751, 1766, 1770, 1779, 1783,\n", - " 1785, 1787, 1800, 1802, 1811, 1816, 1820, 1821, 1824, 1829, 1833,\n", - " 1843, 1848, 1850, 1851, 1856, 1861, 1865, 1866, 1867, 1870, 1874,\n", - " 1875, 1878, 1883, 1887, 1890, 1891, 1893, 1897, 1903, 1904, 1905,\n", - " 1907, 1917, 1918, 1920, 1923, 1924, 1932, 1934, 1937, 1947, 1953,\n", - " 1957, 1958, 1962, 1965, 1971, 1972, 1975, 1976, 1979, 1980, 1998,\n", - " 2000, 2003, 2008, 2013, 2021, 2025, 2028, 2032, 2034, 2036, 2041,\n", - " 2048, 2049, 2056, 2061, 2064, 2068, 2070, 2076, 2080, 2086, 2090,\n", - " 2093, 2096, 2102, 2112, 2120, 2124, 2131, 2132, 2133, 2138, 2144,\n", - " 2146, 2159, 2165, 2167, 2173, 2174, 2183, 2184, 2189, 2198, 2200,\n", - " 2203, 2204, 2210, 2213, 2219, 2220, 2228, 2233, 2234, 2240, 2241,\n", - " 2251, 2252, 2255, 2260, 2264, 2265, 2267, 2273, 2280, 2283, 2284,\n", - " 2285, 2286, 2289, 2291, 2293, 2294, 2295, 2297, 2309, 2313, 2314,\n", - " 2317, 2321, 2331, 2335, 2344, 2350, 2352, 2364, 2369, 2370, 2371,\n", - " 2380, 2393, 2396, 2402, 2403, 2404, 2411, 2412, 2415, 2423, 2435,\n", - " 2445, 2447, 2450, 2453, 2466, 2469, 2476, 2481, 2491, 2495, 2507,\n", - " 2513, 2514, 2518, 2519, 2527, 2530, 2532, 2535, 2536, 2539, 2542,\n", - " 2545, 2546, 2547, 2559, 2562, 2565, 2577, 2579, 2584, 2588, 2593,\n", - " 2594, 2596, 2598, 2603, 2604, 2610, 2612, 2615, 2621, 2622, 2623,\n", - " 2631, 2638, 2642, 2646, 2647, 2654, 2659, 2660, 2661, 2662, 2667,\n", - " 2671, 2677, 2684, 2686, 2688, 2689, 2692, 2694, 2701, 2702, 2703,\n", - " 2705, 2706, 2712, 2714, 2715, 2717, 2719, 2724, 2728, 2729, 2735,\n", - " 2739, 2742, 2744, 2748, 2750, 2755, 2757, 2768, 2770, 2773, 2774,\n", - " 2776, 2777, 2778, 2779, 2780, 2785, 2790, 2791, 2792, 2802, 2803,\n", - " 2806, 2812, 2818, 2824, 2828, 2829, 2831, 2848, 2849, 2857, 2858,\n", - " 2861, 2864, 2872, 2874, 2877, 2882, 2884, 2885, 2890, 2898, 2900,\n", - " 2902, 2906, 2911, 2913, 2922, 2926, 2934, 2935, 2943, 2953, 2955,\n", - " 2964, 2967, 2969, 2972, 2973, 2981, 2983, 2984, 2985, 2988, 2990,\n", - " 2996, 2997, 3002, 3009, 3010, 3012, 3024, 3025, 3030, 3035, 3036,\n", - " 3043, 3045, 3047, 3052, 3053, 3055, 3062, 3069, 3072, 3077, 3079,\n", - " 3080, 3084, 3089, 3095, 3098, 3116, 3118, 3124, 3128, 3138, 3145,\n", - " 3148, 3166, 3168, 3173, 3174, 3178, 3185, 3193, 3205, 3208, 3214,\n", - " 3218, 3232, 3237, 3240, 3247, 3256, 3259, 3262, 3263, 3266, 3267,\n", - " 3270, 3273, 3287, 3294, 3296, 3301, 3302, 3311, 3316, 3320, 3336,\n", - " 3341, 3345, 3353, 3356, 3357, 3363, 3365, 3366, 3367, 3369, 3371,\n", - " 3376, 3377, 3380, 3390, 3391, 3393, 3397, 3399, 3401, 3404, 3406,\n", - " 3410, 3415, 3419, 3421, 3424, 3429, 3431, 3432, 3443, 3444, 3457,\n", - " 3458, 3459, 3462, 3466, 3468, 3470, 3471, 3472, 3473, 3478, 3488,\n", - " 3490, 3494, 3497, 3499, 3503, 3505, 3520, 3532, 3544, 3546, 3547,\n", - " 3548, 3550, 3551, 3556, 3557, 3562, 3567, 3570, 3576, 3578, 3581,\n", - " 3582, 3596, 3611, 3616, 3617, 3618, 3623])),\n", - " (1,\n", - " array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1.]),\n", - " 46.538428452649654,\n", - " array([ 3, 6, 8, 9, 21, 28, 29, 31, 33, 43, 48,\n", - " 52, 53, 55, 56, 57, 64, 69, 70, 73, 80, 83,\n", - " 93, 96, 101, 102, 103, 111, 114, 117, 120, 126, 127,\n", - " 128, 133, 134, 136, 137, 145, 148, 157, 160, 161, 164,\n", - " 170, 172, 173, 177, 183, 188, 189, 193, 198, 199, 201,\n", - " 202, 204, 206, 216, 230, 241, 247, 249, 255, 260, 265,\n", - " 267, 275, 276, 286, 289, 292, 296, 302, 304, 313, 318,\n", - " 323, 327, 334, 335, 340, 343, 351, 370, 373, 374, 381,\n", - " 386, 392, 398, 399, 404, 405, 407, 410, 412, 415, 420,\n", - " 424, 435, 437, 442, 443, 446, 451, 452, 454, 455, 460,\n", - " 462, 470, 474, 476, 479, 484, 485, 489, 490, 497, 502,\n", - " 503, 505, 508, 512, 516, 522, 530, 531, 532, 534, 539,\n", - " 542, 554, 563, 564, 581, 584, 586, 592, 595, 599, 600,\n", - " 604, 614, 619, 621, 630, 631, 633, 647, 648, 651, 653,\n", - " 654, 657, 659, 675, 676, 678, 685, 690, 695, 701, 702,\n", - " 704, 705, 707, 708, 709, 712, 714, 715, 720, 721, 728,\n", - " 731, 735, 736, 742, 743, 744, 746, 750, 756, 757, 758,\n", - " 759, 765, 773, 778, 779, 783, 786, 787, 791, 802, 806,\n", - " 808, 813, 817, 819, 821, 823, 829, 831, 833, 834, 838,\n", - " 846, 851, 853, 857, 859, 861, 867, 873, 878, 883, 888,\n", - " 890, 893, 894, 899, 907, 908, 910, 918, 922, 924, 925,\n", - " 927, 929, 930, 931, 932, 934, 941, 947, 950, 955, 965,\n", - " 968, 971, 976, 982, 995, 998, 1001, 1016, 1017, 1018, 1026,\n", - " 1030, 1032, 1035, 1036, 1046, 1060, 1066, 1067, 1070, 1075, 1077,\n", - " 1082, 1100, 1104, 1110, 1112, 1115, 1117, 1120, 1129, 1131, 1132,\n", - " 1133, 1135, 1139, 1147, 1148, 1151, 1152, 1157, 1160, 1165, 1169,\n", - " 1183, 1189, 1194, 1202, 1210, 1213, 1214, 1220, 1224, 1227, 1233,\n", - " 1238, 1243, 1244, 1249, 1258, 1275, 1277, 1280, 1282, 1283, 1284,\n", - " 1287, 1289, 1293, 1301, 1307, 1310, 1313, 1315, 1320, 1329, 1330,\n", - " 1337, 1338, 1340, 1344, 1346, 1352, 1356, 1364, 1365, 1372, 1379,\n", - " 1384, 1394, 1402, 1405, 1412, 1414, 1418, 1419, 1424, 1428, 1431,\n", - " 1436, 1440, 1446, 1453, 1454, 1464, 1468, 1469, 1471, 1473, 1475,\n", - " 1478, 1486, 1487, 1496, 1497, 1498, 1500, 1503, 1507, 1511, 1512,\n", - " 1513, 1518, 1520, 1524, 1531, 1532, 1535, 1539, 1540, 1542, 1543,\n", - " 1553, 1557, 1560, 1563, 1568, 1578, 1581, 1584, 1585, 1587, 1589,\n", - " 1590, 1591, 1595, 1599, 1601, 1604, 1606, 1610, 1614, 1615, 1616,\n", - " 1620, 1631, 1640, 1648, 1656, 1658, 1663, 1665, 1668, 1671, 1680,\n", - " 1685, 1687, 1690, 1702, 1706, 1707, 1725, 1726, 1728, 1733, 1734,\n", - " 1736, 1740, 1750, 1754, 1755, 1760, 1762, 1769, 1773, 1775, 1778,\n", - " 1780, 1781, 1782, 1784, 1788, 1789, 1791, 1794, 1797, 1798, 1806,\n", - " 1810, 1815, 1818, 1822, 1827, 1831, 1832, 1834, 1836, 1838, 1855,\n", - " 1857, 1858, 1859, 1860, 1862, 1863, 1864, 1869, 1876, 1880, 1886,\n", - " 1888, 1894, 1899, 1901, 1909, 1913, 1915, 1919, 1921, 1925, 1926,\n", - " 1929, 1936, 1938, 1940, 1941, 1943, 1952, 1955, 1956, 1960, 1963,\n", - " 1966, 1970, 1973, 1981, 1984, 1991, 1999, 2001, 2002, 2004, 2006,\n", - " 2009, 2012, 2015, 2016, 2017, 2022, 2027, 2031, 2035, 2038, 2039,\n", - " 2047, 2051, 2055, 2063, 2065, 2067, 2071, 2077, 2079, 2083, 2084,\n", - " 2088, 2092, 2104, 2114, 2115, 2117, 2118, 2126, 2134, 2139, 2140,\n", - " 2142, 2145, 2147, 2148, 2155, 2156, 2160, 2161, 2168, 2169, 2175,\n", - " 2177, 2178, 2186, 2188, 2190, 2191, 2199, 2201, 2221, 2224, 2226,\n", - " 2229, 2230, 2232, 2239, 2243, 2246, 2247, 2248, 2249, 2250, 2254,\n", - " 2259, 2266, 2269, 2270, 2274, 2275, 2282, 2292, 2300, 2301, 2302,\n", - " 2304, 2305, 2306, 2311, 2312, 2320, 2323, 2327, 2332, 2342, 2346,\n", - " 2354, 2357, 2358, 2360, 2362, 2363, 2366, 2367, 2368, 2373, 2374,\n", - " 2375, 2376, 2377, 2379, 2382, 2385, 2388, 2392, 2397, 2399, 2407,\n", - " 2409, 2417, 2418, 2419, 2420, 2421, 2424, 2430, 2432, 2439, 2441,\n", - " 2443, 2449, 2451, 2454, 2465, 2474, 2478, 2498, 2499, 2501, 2502,\n", - " 2503, 2506, 2521, 2528, 2529, 2533, 2537, 2543, 2551, 2553, 2554,\n", - " 2556, 2564, 2569, 2572, 2573, 2576, 2578, 2580, 2585, 2587, 2600,\n", - " 2602, 2607, 2608, 2609, 2619, 2620, 2624, 2625, 2626, 2628, 2639,\n", - " 2640, 2648, 2651, 2658, 2664, 2673, 2679, 2680, 2697, 2698, 2708,\n", - " 2710, 2713, 2718, 2723, 2730, 2731, 2732, 2733, 2736, 2738, 2740,\n", - " 2741, 2743, 2746, 2749, 2751, 2752, 2756, 2761, 2766, 2767, 2782,\n", - " 2786, 2788, 2795, 2798, 2799, 2804, 2807, 2809, 2813, 2816, 2817,\n", - " 2821, 2822, 2826, 2836, 2841, 2850, 2854, 2859, 2865, 2867, 2869,\n", - " 2871, 2873, 2879, 2889, 2892, 2895, 2896, 2903, 2907, 2908, 2914,\n", - " 2917, 2923, 2927, 2929, 2932, 2936, 2941, 2946, 2948, 2949, 2957,\n", - " 2959, 2962, 2965, 2970, 2975, 2976, 2978, 2979, 2989, 2995, 2998,\n", - " 3001, 3003, 3005, 3006, 3008, 3014, 3017, 3018, 3021, 3023, 3028,\n", - " 3029, 3031, 3032, 3033, 3034, 3037, 3042, 3046, 3048, 3051, 3057,\n", - " 3063, 3066, 3074, 3081, 3083, 3085, 3103, 3106, 3113, 3114, 3121,\n", - " 3122, 3125, 3129, 3132, 3134, 3137, 3139, 3146, 3147, 3153, 3158,\n", - " 3160, 3169, 3171, 3172, 3177, 3179, 3188, 3189, 3190, 3192, 3199,\n", - " 3201, 3209, 3220, 3226, 3233, 3238, 3239, 3241, 3242, 3245, 3246,\n", - " 3249, 3250, 3251, 3252, 3253, 3254, 3260, 3261, 3264, 3265, 3268,\n", - " 3275, 3282, 3288, 3290, 3292, 3298, 3303, 3312, 3315, 3319, 3326,\n", - " 3328, 3330, 3339, 3347, 3351, 3360, 3361, 3373, 3375, 3379, 3381,\n", - " 3382, 3386, 3392, 3396, 3398, 3412, 3413, 3418, 3423, 3425, 3426,\n", - " 3430, 3433, 3434, 3436, 3442, 3447, 3452, 3456, 3461, 3465, 3474,\n", - " 3477, 3480, 3481, 3482, 3483, 3489, 3496, 3501, 3502, 3504, 3506,\n", - " 3510, 3514, 3517, 3518, 3519, 3524, 3527, 3530, 3533, 3534, 3536,\n", - " 3537, 3538, 3540, 3541, 3545, 3549, 3552, 3553, 3559, 3563, 3565,\n", - " 3566, 3572, 3575, 3577, 3579, 3580, 3583, 3586, 3587, 3597, 3598,\n", - " 3599, 3600, 3602, 3605, 3609, 3621])),\n", - " (2,\n", - " array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),\n", - " 27.835804541797003,\n", - " array([ 10, 11, 19, 22, 25, 26, 42, 44, 46, 71, 72,\n", - " 77, 85, 89, 94, 109, 110, 113, 123, 124, 144, 146,\n", - " 149, 151, 171, 174, 179, 184, 187, 194, 196, 197, 213,\n", - " 218, 224, 234, 243, 246, 248, 252, 253, 266, 271, 278,\n", - " 303, 307, 317, 321, 325, 333, 339, 341, 342, 357, 359,\n", - " 369, 376, 382, 395, 397, 409, 414, 416, 417, 421, 423,\n", - " 426, 436, 438, 445, 456, 457, 458, 467, 469, 480, 487,\n", - " 507, 510, 524, 527, 529, 535, 536, 547, 550, 551, 560,\n", - " 562, 566, 568, 569, 570, 571, 572, 575, 576, 582, 591,\n", - " 596, 602, 603, 605, 610, 617, 626, 644, 652, 658, 665,\n", - " 668, 670, 677, 680, 686, 689, 693, 700, 717, 726, 730,\n", - " 732, 740, 749, 753, 763, 766, 767, 772, 774, 776, 780,\n", - " 781, 785, 789, 792, 795, 810, 844, 848, 854, 856, 865,\n", - " 871, 874, 877, 881, 895, 896, 902, 903, 904, 906, 912,\n", - " 914, 915, 928, 935, 948, 957, 959, 963, 981, 986, 994,\n", - " 1014, 1020, 1025, 1045, 1055, 1065, 1099, 1123, 1134, 1136, 1141,\n", - " 1142, 1154, 1158, 1159, 1168, 1172, 1173, 1174, 1177, 1184, 1185,\n", - " 1191, 1193, 1199, 1200, 1201, 1206, 1222, 1223, 1229, 1230, 1234,\n", - " 1242, 1246, 1255, 1260, 1272, 1273, 1278, 1288, 1290, 1291, 1296,\n", - " 1316, 1325, 1326, 1334, 1336, 1343, 1345, 1348, 1350, 1353, 1359,\n", - " 1368, 1369, 1374, 1375, 1381, 1385, 1391, 1393, 1410, 1411, 1432,\n", - " 1451, 1462, 1483, 1516, 1517, 1519, 1525, 1527, 1529, 1530, 1541,\n", - " 1546, 1548, 1556, 1559, 1565, 1573, 1583, 1597, 1600, 1609, 1617,\n", - " 1621, 1624, 1625, 1629, 1633, 1634, 1642, 1646, 1647, 1649, 1651,\n", - " 1659, 1667, 1672, 1677, 1689, 1699, 1716, 1717, 1718, 1720, 1742,\n", - " 1748, 1757, 1764, 1767, 1772, 1777, 1786, 1792, 1804, 1809, 1812,\n", - " 1813, 1825, 1826, 1828, 1842, 1854, 1873, 1882, 1884, 1900, 1902,\n", - " 1906, 1911, 1935, 1942, 1948, 1951, 1954, 1964, 1968, 1988, 1993,\n", - " 1994, 1995, 1997, 2010, 2024, 2026, 2029, 2037, 2046, 2059, 2060,\n", - " 2072, 2075, 2078, 2082, 2098, 2099, 2107, 2109, 2111, 2119, 2121,\n", - " 2128, 2136, 2152, 2157, 2170, 2171, 2172, 2176, 2192, 2194, 2205,\n", - " 2215, 2217, 2218, 2222, 2223, 2225, 2236, 2242, 2261, 2276, 2277,\n", - " 2279, 2281, 2303, 2307, 2308, 2315, 2322, 2328, 2348, 2353, 2361,\n", - " 2365, 2378, 2383, 2386, 2389, 2390, 2391, 2398, 2406, 2408, 2410,\n", - " 2434, 2436, 2437, 2442, 2444, 2457, 2458, 2460, 2461, 2463, 2467,\n", - " 2480, 2486, 2493, 2497, 2505, 2508, 2510, 2522, 2524, 2526, 2534,\n", - " 2544, 2548, 2549, 2552, 2557, 2561, 2582, 2586, 2590, 2592, 2595,\n", - " 2601, 2606, 2632, 2650, 2652, 2655, 2663, 2665, 2666, 2668, 2669,\n", - " 2672, 2674, 2681, 2687, 2690, 2691, 2693, 2699, 2711, 2720, 2722,\n", - " 2727, 2734, 2745, 2747, 2758, 2759, 2769, 2771, 2772, 2775, 2781,\n", - " 2783, 2787, 2789, 2796, 2810, 2811, 2820, 2827, 2833, 2839, 2840,\n", - " 2842, 2843, 2847, 2853, 2860, 2875, 2876, 2881, 2888, 2894, 2901,\n", - " 2904, 2916, 2919, 2928, 2950, 2954, 2960, 2963, 2966, 2986, 2987,\n", - " 2992, 2994, 2999, 3000, 3007, 3039, 3059, 3061, 3064, 3067, 3073,\n", - " 3078, 3082, 3087, 3088, 3090, 3099, 3104, 3112, 3126, 3127, 3131,\n", - " 3135, 3144, 3149, 3150, 3154, 3155, 3157, 3162, 3170, 3175, 3180,\n", - " 3181, 3182, 3183, 3184, 3187, 3197, 3204, 3210, 3216, 3221, 3225,\n", - " 3227, 3230, 3235, 3236, 3243, 3244, 3271, 3272, 3276, 3278, 3283,\n", - " 3289, 3293, 3304, 3305, 3306, 3307, 3318, 3324, 3329, 3346, 3348,\n", - " 3350, 3352, 3358, 3362, 3383, 3385, 3388, 3405, 3408, 3422, 3437,\n", - " 3439, 3446, 3448, 3450, 3486, 3507, 3512, 3513, 3516, 3522, 3523,\n", - " 3554, 3560, 3569, 3573, 3574, 3584, 3593, 3595, 3601, 3606, 3613,\n", - " 3614])),\n", - " (3,\n", - " array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1.]),\n", - " 19.20965012246301,\n", - " array([ 4, 5, 13, 30, 54, 59, 61, 76, 78, 82, 98,\n", - " 104, 112, 129, 132, 142, 143, 152, 155, 156, 167, 168,\n", - " 180, 181, 185, 203, 209, 222, 225, 239, 245, 251, 258,\n", - " 262, 269, 272, 284, 290, 305, 312, 316, 337, 347, 349,\n", - " 350, 352, 354, 362, 377, 385, 394, 413, 422, 425, 428,\n", - " 429, 439, 440, 441, 447, 448, 453, 459, 463, 464, 488,\n", - " 492, 496, 500, 509, 520, 521, 523, 525, 526, 533, 549,\n", - " 557, 558, 567, 574, 587, 593, 606, 607, 625, 627, 634,\n", - " 655, 663, 667, 674, 679, 683, 696, 718, 719, 729, 748,\n", - " 760, 769, 796, 800, 801, 807, 816, 818, 832, 835, 850,\n", - " 858, 862, 870, 875, 880, 884, 886, 889, 892, 897, 920,\n", - " 940, 944, 954, 958, 979, 985, 988, 991, 1002, 1010, 1027,\n", - " 1028, 1033, 1039, 1041, 1050, 1052, 1053, 1084, 1085, 1098, 1103,\n", - " 1109, 1116, 1122, 1146, 1156, 1163, 1166, 1170, 1190, 1195, 1203,\n", - " 1209, 1215, 1216, 1221, 1237, 1248, 1251, 1265, 1266, 1268, 1274,\n", - " 1298, 1304, 1318, 1322, 1347, 1349, 1377, 1390, 1392, 1404, 1417,\n", - " 1420, 1430, 1433, 1438, 1441, 1443, 1448, 1449, 1457, 1458, 1459,\n", - " 1465, 1476, 1485, 1488, 1502, 1510, 1515, 1523, 1533, 1536, 1538,\n", - " 1558, 1571, 1574, 1579, 1586, 1592, 1593, 1602, 1607, 1623, 1632,\n", - " 1636, 1637, 1638, 1639, 1652, 1670, 1673, 1682, 1683, 1684, 1698,\n", - " 1719, 1735, 1744, 1745, 1752, 1756, 1763, 1768, 1776, 1790, 1795,\n", - " 1796, 1807, 1823, 1830, 1837, 1839, 1840, 1841, 1852, 1877, 1879,\n", - " 1889, 1930, 1939, 1944, 1945, 1959, 1969, 1978, 1985, 1987, 1996,\n", - " 2007, 2014, 2018, 2030, 2043, 2044, 2066, 2081, 2095, 2101, 2105,\n", - " 2108, 2125, 2129, 2137, 2143, 2150, 2153, 2185, 2196, 2206, 2212,\n", - " 2214, 2216, 2231, 2235, 2262, 2272, 2278, 2298, 2310, 2318, 2324,\n", - " 2333, 2343, 2359, 2372, 2381, 2394, 2395, 2400, 2425, 2429, 2446,\n", - " 2462, 2470, 2471, 2472, 2475, 2477, 2484, 2485, 2487, 2488, 2489,\n", - " 2511, 2523, 2538, 2541, 2563, 2568, 2617, 2629, 2644, 2649, 2670,\n", - " 2676, 2683, 2685, 2695, 2696, 2819, 2823, 2825, 2830, 2834, 2838,\n", - " 2846, 2863, 2870, 2883, 2893, 2910, 2912, 2921, 2930, 2931, 2933,\n", - " 2939, 2940, 2942, 2945, 2958, 2971, 2980, 3013, 3015, 3016, 3019,\n", - " 3026, 3027, 3049, 3050, 3060, 3076, 3086, 3092, 3093, 3094, 3096,\n", - " 3100, 3110, 3115, 3120, 3130, 3133, 3136, 3141, 3143, 3159, 3165,\n", - " 3186, 3191, 3195, 3198, 3202, 3211, 3215, 3222, 3223, 3228, 3229,\n", - " 3234, 3248, 3257, 3258, 3280, 3281, 3286, 3291, 3314, 3317, 3325,\n", - " 3327, 3340, 3355, 3368, 3370, 3387, 3395, 3402, 3407, 3414, 3417,\n", - " 3420, 3428, 3435, 3451, 3454, 3455, 3464, 3479, 3487, 3491, 3493,\n", - " 3495, 3535, 3558, 3564, 3590, 3592, 3607, 3610, 3612, 3615, 3619,\n", - " 3620, 3624])),\n", - " (4,\n", - " array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),\n", - " 23.6344423140483,\n", - " array([ 17, 23, 24, 36, 37, 39, 40, 50, 62, 66, 67,\n", - " 75, 81, 91, 92, 100, 119, 125, 158, 162, 166, 176,\n", - " 186, 191, 195, 200, 208, 210, 220, 233, 237, 244, 250,\n", - " 257, 259, 263, 273, 293, 294, 314, 319, 322, 330, 344,\n", - " 348, 353, 360, 387, 401, 402, 461, 468, 475, 478, 482,\n", - " 493, 494, 495, 498, 501, 506, 511, 515, 518, 528, 541,\n", - " 556, 561, 577, 578, 601, 611, 612, 615, 618, 620, 623,\n", - " 640, 645, 656, 664, 666, 681, 688, 691, 694, 699, 710,\n", - " 711, 722, 723, 724, 733, 734, 745, 755, 764, 770, 771,\n", - " 777, 790, 794, 799, 803, 812, 830, 836, 841, 843, 849,\n", - " 852, 860, 879, 882, 885, 900, 909, 919, 926, 933, 936,\n", - " 943, 951, 956, 961, 964, 980, 983, 992, 1007, 1012, 1013,\n", - " 1029, 1034, 1047, 1048, 1051, 1054, 1056, 1058, 1061, 1064, 1080,\n", - " 1081, 1086, 1087, 1088, 1091, 1092, 1095, 1111, 1113, 1119, 1121,\n", - " 1125, 1126, 1127, 1145, 1149, 1150, 1164, 1167, 1176, 1181, 1205,\n", - " 1207, 1208, 1219, 1228, 1235, 1236, 1239, 1240, 1250, 1264, 1276,\n", - " 1299, 1300, 1303, 1306, 1311, 1314, 1328, 1332, 1342, 1351, 1363,\n", - " 1370, 1371, 1376, 1380, 1386, 1387, 1398, 1399, 1403, 1422, 1423,\n", - " 1435, 1437, 1439, 1442, 1447, 1467, 1470, 1472, 1479, 1489, 1492,\n", - " 1499, 1504, 1508, 1514, 1526, 1537, 1547, 1551, 1555, 1561, 1562,\n", - " 1564, 1575, 1596, 1608, 1611, 1612, 1635, 1641, 1675, 1676, 1678,\n", - " 1681, 1693, 1694, 1695, 1696, 1697, 1700, 1703, 1714, 1721, 1722,\n", - " 1723, 1732, 1747, 1753, 1758, 1759, 1761, 1765, 1774, 1805, 1819,\n", - " 1844, 1845, 1846, 1847, 1849, 1871, 1881, 1885, 1892, 1898, 1908,\n", - " 1910, 1927, 1928, 1949, 1950, 1961, 1967, 1977, 1982, 1983, 1986,\n", - " 1989, 1990, 2011, 2019, 2020, 2023, 2040, 2042, 2045, 2050, 2054,\n", - " 2057, 2058, 2085, 2087, 2097, 2100, 2106, 2110, 2123, 2130, 2141,\n", - " 2151, 2158, 2163, 2179, 2187, 2193, 2195, 2197, 2202, 2207, 2208,\n", - " 2227, 2237, 2238, 2244, 2256, 2258, 2263, 2287, 2299, 2319, 2325,\n", - " 2326, 2329, 2330, 2336, 2340, 2345, 2384, 2387, 2401, 2414, 2416,\n", - " 2426, 2427, 2431, 2433, 2452, 2456, 2459, 2468, 2483, 2492, 2494,\n", - " 2496, 2500, 2504, 2512, 2515, 2516, 2520, 2525, 2540, 2560, 2566,\n", - " 2567, 2570, 2571, 2581, 2599, 2605, 2613, 2616, 2630, 2634, 2635,\n", - " 2637, 2641, 2645, 2653, 2656, 2657, 2675, 2678, 2709, 2725, 2737,\n", - " 2753, 2760, 2762, 2763, 2784, 2797, 2800, 2801, 2808, 2815, 2832,\n", - " 2837, 2844, 2845, 2851, 2868, 2880, 2886, 2887, 2897, 2899, 2905,\n", - " 2915, 2925, 2937, 2938, 2944, 2947, 2951, 2952, 2956, 2961, 2968,\n", - " 2977, 2991, 3004, 3038, 3058, 3065, 3068, 3070, 3071, 3075, 3097,\n", - " 3102, 3105, 3109, 3117, 3119, 3123, 3140, 3152, 3161, 3163, 3164,\n", - " 3167, 3176, 3194, 3196, 3200, 3207, 3212, 3219, 3224, 3255, 3274,\n", - " 3277, 3279, 3284, 3285, 3295, 3297, 3299, 3300, 3308, 3310, 3321,\n", - " 3322, 3323, 3333, 3334, 3337, 3338, 3343, 3344, 3354, 3364, 3372,\n", - " 3378, 3384, 3394, 3409, 3411, 3416, 3427, 3438, 3440, 3441, 3449,\n", - " 3463, 3469, 3475, 3476, 3484, 3485, 3508, 3509, 3511, 3528, 3531,\n", - " 3539, 3542, 3555, 3561, 3603, 3608, 3622])),\n", - " (5,\n", - " array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),\n", - " 16.251921424887335,\n", - " array([ 12, 14, 15, 35, 45, 49, 51, 63, 65, 68, 79,\n", - " 105, 139, 141, 147, 150, 153, 154, 178, 217, 219, 231,\n", - " 236, 238, 254, 264, 274, 277, 288, 301, 306, 326, 328,\n", - " 329, 346, 355, 358, 364, 365, 371, 375, 378, 379, 384,\n", - " 389, 390, 391, 396, 411, 427, 431, 444, 477, 481, 491,\n", - " 517, 543, 544, 555, 588, 594, 608, 616, 628, 639, 643,\n", - " 662, 671, 682, 703, 752, 775, 782, 784, 788, 793, 798,\n", - " 811, 827, 828, 845, 864, 869, 898, 913, 921, 938, 949,\n", - " 953, 962, 966, 972, 977, 987, 996, 1003, 1004, 1008, 1019,\n", - " 1023, 1038, 1042, 1049, 1068, 1069, 1074, 1076, 1078, 1089, 1102,\n", - " 1105, 1128, 1153, 1155, 1162, 1179, 1182, 1198, 1211, 1212, 1217,\n", - " 1218, 1225, 1231, 1232, 1241, 1245, 1257, 1262, 1269, 1270, 1281,\n", - " 1286, 1294, 1297, 1309, 1312, 1323, 1327, 1339, 1341, 1360, 1378,\n", - " 1383, 1388, 1389, 1400, 1401, 1413, 1416, 1450, 1455, 1463, 1477,\n", - " 1484, 1494, 1495, 1505, 1506, 1522, 1534, 1544, 1549, 1550, 1554,\n", - " 1569, 1572, 1588, 1598, 1613, 1622, 1626, 1630, 1644, 1666, 1669,\n", - " 1686, 1708, 1724, 1727, 1737, 1743, 1749, 1771, 1793, 1799, 1801,\n", - " 1803, 1808, 1814, 1817, 1835, 1853, 1868, 1872, 1895, 1896, 1912,\n", - " 1914, 1916, 1922, 1931, 1933, 1946, 1974, 1992, 2005, 2033, 2052,\n", - " 2053, 2062, 2069, 2073, 2074, 2089, 2091, 2094, 2103, 2113, 2116,\n", - " 2122, 2127, 2135, 2149, 2154, 2162, 2164, 2166, 2180, 2181, 2182,\n", - " 2209, 2211, 2245, 2253, 2257, 2268, 2271, 2288, 2290, 2296, 2316,\n", - " 2334, 2337, 2338, 2339, 2341, 2347, 2349, 2351, 2355, 2356, 2405,\n", - " 2413, 2422, 2428, 2438, 2440, 2448, 2455, 2464, 2473, 2479, 2482,\n", - " 2490, 2509, 2517, 2531, 2550, 2555, 2558, 2574, 2575, 2583, 2589,\n", - " 2591, 2597, 2611, 2614, 2618, 2627, 2633, 2636, 2643, 2682, 2700,\n", - " 2704, 2707, 2716, 2721, 2726, 2754, 2764, 2765, 2793, 2794, 2805,\n", - " 2814, 2835, 2852, 2855, 2856, 2862, 2866, 2878, 2891, 2909, 2918,\n", - " 2920, 2924, 2974, 2982, 2993, 3011, 3020, 3022, 3040, 3041, 3044,\n", - " 3054, 3056, 3091, 3101, 3107, 3108, 3111, 3142, 3151, 3156, 3203,\n", - " 3206, 3213, 3217, 3231, 3269, 3309, 3313, 3331, 3332, 3335, 3342,\n", - " 3349, 3359, 3374, 3389, 3400, 3403, 3445, 3453, 3460, 3467, 3492,\n", - " 3498, 3500, 3515, 3521, 3525, 3526, 3529, 3543, 3568, 3571, 3585,\n", - " 3588, 3589, 3591, 3594, 3604]))]" - ] - }, - "execution_count": 121, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "# here the information for all the combinations\n", - "freq_wrap.iter_columns()" + "(cat_id_0_column * weights[cat_id_0_nz]).sum()" ] }, { "cell_type": "code", - "execution_count": 133, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(0,\n", - " array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),\n", - " 46.529753144154704,\n", - " array([ 0, 1, 2, 7, 16, 18, 20, 27, 32, 34, 38,\n", - " 41, 47, 58, 60, 74, 84, 86, 87, 88, 90, 95,\n", - " 97, 99, 106, 107, 108, 115, 116, 118, 121, 122, 130,\n", - " 131, 135, 138, 140, 159, 163, 165, 169, 175, 182, 190,\n", - " 192, 205, 207, 211, 212, 214, 215, 221, 223, 226, 227,\n", - " 228, 229, 232, 235, 240, 242, 256, 261, 268, 270, 279,\n", - " 280, 281, 282, 283, 285, 287, 291, 295, 297, 298, 299,\n", - " 300, 308, 309, 310, 311, 315, 320, 324, 331, 332, 336,\n", - " 338, 345, 356, 361, 363, 366, 367, 368, 372, 380, 383,\n", - " 388, 393, 400, 403, 406, 408, 418, 419, 430, 432, 433,\n", - " 434, 449, 450, 465, 466, 471, 472, 473, 483, 486, 499,\n", - " 504, 513, 514, 519, 537, 538, 540, 545, 546, 548, 552,\n", - " 553, 559, 565, 573, 579, 580, 583, 585, 589, 590, 597,\n", - " 598, 609, 613, 622, 624, 629, 632, 635, 636, 637, 638,\n", - " 641, 642, 646, 649, 650, 660, 661, 669, 672, 673, 684,\n", - " 687, 692, 697, 698, 706, 713, 716, 725, 727, 737, 738,\n", - " 739, 741, 747, 751, 754, 761, 762, 768, 797, 804, 805,\n", - " 809, 814, 815, 820, 822, 824, 825, 826, 837, 839, 840,\n", - " 842, 847, 855, 863, 866, 868, 872, 876, 887, 891, 901,\n", - " 905, 911, 916, 917, 923, 937, 939, 942, 945, 946, 952,\n", - " 960, 967, 969, 970, 973, 974, 975, 978, 984, 989, 990,\n", - " 993, 997, 999, 1000, 1005, 1006, 1009, 1011, 1015, 1021, 1022,\n", - " 1024, 1031, 1037, 1040, 1043, 1044, 1057, 1059, 1062, 1063, 1071,\n", - " 1072, 1073, 1079, 1083, 1090, 1093, 1094, 1096, 1097, 1101, 1106,\n", - " 1107, 1108, 1114, 1118, 1124, 1130, 1137, 1138, 1140, 1143, 1144,\n", - " 1161, 1171, 1175, 1178, 1180, 1186, 1187, 1188, 1192, 1196, 1197,\n", - " 1204, 1226, 1247, 1252, 1253, 1254, 1256, 1259, 1261, 1263, 1267,\n", - " 1271, 1279, 1285, 1292, 1295, 1302, 1305, 1308, 1317, 1319, 1321,\n", - " 1324, 1331, 1333, 1335, 1354, 1355, 1357, 1358, 1361, 1362, 1366,\n", - " 1367, 1373, 1382, 1395, 1396, 1397, 1406, 1407, 1408, 1409, 1415,\n", - " 1421, 1425, 1426, 1427, 1429, 1434, 1444, 1445, 1452, 1456, 1460,\n", - " 1461, 1466, 1474, 1480, 1481, 1482, 1490, 1491, 1493, 1501, 1509,\n", - " 1521, 1528, 1545, 1552, 1566, 1567, 1570, 1576, 1577, 1580, 1582,\n", - " 1594, 1603, 1605, 1618, 1619, 1627, 1628, 1643, 1645, 1650, 1653,\n", - " 1654, 1655, 1657, 1660, 1661, 1662, 1664, 1674, 1679, 1688, 1691,\n", - " 1692, 1701, 1704, 1705, 1709, 1710, 1711, 1712, 1713, 1715, 1729,\n", - " 1730, 1731, 1738, 1739, 1741, 1746, 1751, 1766, 1770, 1779, 1783,\n", - " 1785, 1787, 1800, 1802, 1811, 1816, 1820, 1821, 1824, 1829, 1833,\n", - " 1843, 1848, 1850, 1851, 1856, 1861, 1865, 1866, 1867, 1870, 1874,\n", - " 1875, 1878, 1883, 1887, 1890, 1891, 1893, 1897, 1903, 1904, 1905,\n", - " 1907, 1917, 1918, 1920, 1923, 1924, 1932, 1934, 1937, 1947, 1953,\n", - " 1957, 1958, 1962, 1965, 1971, 1972, 1975, 1976, 1979, 1980, 1998,\n", - " 2000, 2003, 2008, 2013, 2021, 2025, 2028, 2032, 2034, 2036, 2041,\n", - " 2048, 2049, 2056, 2061, 2064, 2068, 2070, 2076, 2080, 2086, 2090,\n", - " 2093, 2096, 2102, 2112, 2120, 2124, 2131, 2132, 2133, 2138, 2144,\n", - " 2146, 2159, 2165, 2167, 2173, 2174, 2183, 2184, 2189, 2198, 2200,\n", - " 2203, 2204, 2210, 2213, 2219, 2220, 2228, 2233, 2234, 2240, 2241,\n", - " 2251, 2252, 2255, 2260, 2264, 2265, 2267, 2273, 2280, 2283, 2284,\n", - " 2285, 2286, 2289, 2291, 2293, 2294, 2295, 2297, 2309, 2313, 2314,\n", - " 2317, 2321, 2331, 2335, 2344, 2350, 2352, 2364, 2369, 2370, 2371,\n", - " 2380, 2393, 2396, 2402, 2403, 2404, 2411, 2412, 2415, 2423, 2435,\n", - " 2445, 2447, 2450, 2453, 2466, 2469, 2476, 2481, 2491, 2495, 2507,\n", - " 2513, 2514, 2518, 2519, 2527, 2530, 2532, 2535, 2536, 2539, 2542,\n", - " 2545, 2546, 2547, 2559, 2562, 2565, 2577, 2579, 2584, 2588, 2593,\n", - " 2594, 2596, 2598, 2603, 2604, 2610, 2612, 2615, 2621, 2622, 2623,\n", - " 2631, 2638, 2642, 2646, 2647, 2654, 2659, 2660, 2661, 2662, 2667,\n", - " 2671, 2677, 2684, 2686, 2688, 2689, 2692, 2694, 2701, 2702, 2703,\n", - " 2705, 2706, 2712, 2714, 2715, 2717, 2719, 2724, 2728, 2729, 2735,\n", - " 2739, 2742, 2744, 2748, 2750, 2755, 2757, 2768, 2770, 2773, 2774,\n", - " 2776, 2777, 2778, 2779, 2780, 2785, 2790, 2791, 2792, 2802, 2803,\n", - " 2806, 2812, 2818, 2824, 2828, 2829, 2831, 2848, 2849, 2857, 2858,\n", - " 2861, 2864, 2872, 2874, 2877, 2882, 2884, 2885, 2890, 2898, 2900,\n", - " 2902, 2906, 2911, 2913, 2922, 2926, 2934, 2935, 2943, 2953, 2955,\n", - " 2964, 2967, 2969, 2972, 2973, 2981, 2983, 2984, 2985, 2988, 2990,\n", - " 2996, 2997, 3002, 3009, 3010, 3012, 3024, 3025, 3030, 3035, 3036,\n", - " 3043, 3045, 3047, 3052, 3053, 3055, 3062, 3069, 3072, 3077, 3079,\n", - " 3080, 3084, 3089, 3095, 3098, 3116, 3118, 3124, 3128, 3138, 3145,\n", - " 3148, 3166, 3168, 3173, 3174, 3178, 3185, 3193, 3205, 3208, 3214,\n", - " 3218, 3232, 3237, 3240, 3247, 3256, 3259, 3262, 3263, 3266, 3267,\n", - " 3270, 3273, 3287, 3294, 3296, 3301, 3302, 3311, 3316, 3320, 3336,\n", - " 3341, 3345, 3353, 3356, 3357, 3363, 3365, 3366, 3367, 3369, 3371,\n", - " 3376, 3377, 3380, 3390, 3391, 3393, 3397, 3399, 3401, 3404, 3406,\n", - " 3410, 3415, 3419, 3421, 3424, 3429, 3431, 3432, 3443, 3444, 3457,\n", - " 3458, 3459, 3462, 3466, 3468, 3470, 3471, 3472, 3473, 3478, 3488,\n", - " 3490, 3494, 3497, 3499, 3503, 3505, 3520, 3532, 3544, 3546, 3547,\n", - " 3548, 3550, 3551, 3556, 3557, 3562, 3567, 3570, 3576, 3578, 3581,\n", - " 3582, 3596, 3611, 3616, 3617, 3618, 3623]))" - ] - }, - "execution_count": 133, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "# and here, we can see for category \"0\" that non zero values are...\n", - "freq_wrap.get_column(0)" + "# Here the total households with no car and no children(843)\n", + "jd_households" ] }, { - "cell_type": "code", - "execution_count": 135, + "cell_type": "markdown", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "hh_id\n", - "0 1.0\n", - "2 1.0\n", - "3 1.0\n", - "4 0.0\n", - "5 0.0\n", - "13 0.0\n", - "14 0.0\n", - "15 1.0\n", - "Name: 0, dtype: float64" - ] - }, - "execution_count": 135, - "metadata": {}, - "output_type": "execute_result" - } - ], "source": [ - "#... in the index 0, 1, 2, 7, (...)\n", - "h_freq_table[0][0:8]" + "The sum of non zero values is weighted and then reduced by substracting the constraints..." ] }, { @@ -5176,320 +1727,223 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "(cat_id_0_column * weights[cat_id_0_nz]).sum() - constraint" + ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], - "source": [] + "source": [ + "... and finally is expressed in terms of the constraint for that category - the 𝛿 parameter described in the IPU paper -. The absolute value of the relative difference between the weighted sum and the corresponding constraint may be used as a goodness of fit measure and is defined as: " + ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "((cat_id_0_column * weights[cat_id_0_nz]).sum() - constraint) / constraint" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is basically showing the proportion of the `cat_id` within the entire population of the PUMA. In the example of this first iteration, we have that for each household that `cat_id` == `0`, there are 17 households that could be out of that category (`cat_id` != `0`). " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![constraints](img/ipu.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This process will be repeated for all household types (or `cat_id`) and build a unique average value (which is the sum of each `_fit_quality` result divided by the number of `cat_id` columns):" + ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "fit_qual_0_to_5 = _average_fit_quality(freq_wrap, weights)" + ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "fit_qual_0_to_5" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As it is explained on [A METHODOLOGY TO MATCH DISTRIBUTIONS OF BOTH HOUSEHOLD AND\n", + "PERSON ATTRIBUTES IN THE GENERATION OF SYNTHETIC POPULATIONS ](http://www.scag.ca.gov/Documents/PopulationSynthesizerPaper_TRB.pdf), the IPU algorithm starts by assuming equal weights for all households in the sample. The algorithm then proceeds by adjusting weights for each household/person constraint in an iterative process until the constraints are matched as closely as possible for both household and person attributes. " + ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "fit_change = np.inf\n", + "convergence= 1e-4" + ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "if fit_change > convergence:\n", + " print(\"Updating weights matrix until reaching a fit quality value under the convergence!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The weights for the each household level constraint are adjusted by dividing the number of households in that category (i.e., the constraint value) by the weighted sum of the first household type column: \n", + "\n", + "The `_update_weights` creates the following adjustment `adj = constraint / float((column * weights).sum())` and use it to update weights (`weights * adj`). The weights for all households of each household type will be multiplied by this ratio to satisfy the constraint. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this sense, the `households_weights` function will finally return a:" + ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "# 1. An array of corrected weights that best matches each household type \n", + "best_weights" + ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "# 2. And a fit quality based on the proportion of each hh type that reduces the fit changes under the convergence \n", + "fit_quality" + ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "# 3. Built in ...\n", + "iterations" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 4. Drawing synthetic population " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![draw](img/draw.png)" + ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "from synthpop import draw" + ] }, { "cell_type": "code", - "execution_count": 93, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "def _drop_zeros(df):\n", - " \"\"\"\n", - " Drop zeros from a DataFrame, returning an iterator over the columns\n", - " in the DataFrame.\n", - "\n", - " Yields tuples of (column name, non-zero column values, non-zero indexes).\n", - "\n", - " Parameters\n", - " ----------\n", - " df : pandas.DataFrame\n", - "\n", - " \"\"\"\n", - " def for_each_col(col):\n", - " nz = col.nonzero()[0]\n", - " return col[nz], nz\n", - "\n", - " for (col_idx, (col, nz)) in df.apply(for_each_col, axis=0, raw=True).items():\n", - " return (col_idx, col, nz)" + "num_households = int(hh_marginals_tract_400_block_gp_1.groupby(level=0).sum().mean())" ] }, { "cell_type": "code", - "execution_count": 94, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "df = h_freq_table.copy()" + "num_households" ] }, { "cell_type": "code", - "execution_count": 95, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "def for_each_col(col):\n", - " nz = col.nonzero()[0]\n", - " return col[nz], nz" + "fac = _FrequencyAndConstraints(h_freq_table, h_constraint)" ] }, { "cell_type": "code", - "execution_count": 102, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "for (col_idx, (col, nz)) in df.apply(for_each_col, axis=0, raw=True).items():\n", - " d = (col_idx, col, nz)" + "indexes = draw._draw_indexes(num_households, fac, best_weights)" ] }, { "cell_type": "code", - "execution_count": 136, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(5,\n", - " array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),\n", - " array([ 12, 14, 15, 35, 45, 49, 51, 63, 65, 68, 79,\n", - " 105, 139, 141, 147, 150, 153, 154, 178, 217, 219, 231,\n", - " 236, 238, 254, 264, 274, 277, 288, 301, 306, 326, 328,\n", - " 329, 346, 355, 358, 364, 365, 371, 375, 378, 379, 384,\n", - " 389, 390, 391, 396, 411, 427, 431, 444, 477, 481, 491,\n", - " 517, 543, 544, 555, 588, 594, 608, 616, 628, 639, 643,\n", - " 662, 671, 682, 703, 752, 775, 782, 784, 788, 793, 798,\n", - " 811, 827, 828, 845, 864, 869, 898, 913, 921, 938, 949,\n", - " 953, 962, 966, 972, 977, 987, 996, 1003, 1004, 1008, 1019,\n", - " 1023, 1038, 1042, 1049, 1068, 1069, 1074, 1076, 1078, 1089, 1102,\n", - " 1105, 1128, 1153, 1155, 1162, 1179, 1182, 1198, 1211, 1212, 1217,\n", - " 1218, 1225, 1231, 1232, 1241, 1245, 1257, 1262, 1269, 1270, 1281,\n", - " 1286, 1294, 1297, 1309, 1312, 1323, 1327, 1339, 1341, 1360, 1378,\n", - " 1383, 1388, 1389, 1400, 1401, 1413, 1416, 1450, 1455, 1463, 1477,\n", - " 1484, 1494, 1495, 1505, 1506, 1522, 1534, 1544, 1549, 1550, 1554,\n", - " 1569, 1572, 1588, 1598, 1613, 1622, 1626, 1630, 1644, 1666, 1669,\n", - " 1686, 1708, 1724, 1727, 1737, 1743, 1749, 1771, 1793, 1799, 1801,\n", - " 1803, 1808, 1814, 1817, 1835, 1853, 1868, 1872, 1895, 1896, 1912,\n", - " 1914, 1916, 1922, 1931, 1933, 1946, 1974, 1992, 2005, 2033, 2052,\n", - " 2053, 2062, 2069, 2073, 2074, 2089, 2091, 2094, 2103, 2113, 2116,\n", - " 2122, 2127, 2135, 2149, 2154, 2162, 2164, 2166, 2180, 2181, 2182,\n", - " 2209, 2211, 2245, 2253, 2257, 2268, 2271, 2288, 2290, 2296, 2316,\n", - " 2334, 2337, 2338, 2339, 2341, 2347, 2349, 2351, 2355, 2356, 2405,\n", - " 2413, 2422, 2428, 2438, 2440, 2448, 2455, 2464, 2473, 2479, 2482,\n", - " 2490, 2509, 2517, 2531, 2550, 2555, 2558, 2574, 2575, 2583, 2589,\n", - " 2591, 2597, 2611, 2614, 2618, 2627, 2633, 2636, 2643, 2682, 2700,\n", - " 2704, 2707, 2716, 2721, 2726, 2754, 2764, 2765, 2793, 2794, 2805,\n", - " 2814, 2835, 2852, 2855, 2856, 2862, 2866, 2878, 2891, 2909, 2918,\n", - " 2920, 2924, 2974, 2982, 2993, 3011, 3020, 3022, 3040, 3041, 3044,\n", - " 3054, 3056, 3091, 3101, 3107, 3108, 3111, 3142, 3151, 3156, 3203,\n", - " 3206, 3213, 3217, 3231, 3269, 3309, 3313, 3331, 3332, 3335, 3342,\n", - " 3349, 3359, 3374, 3389, 3400, 3403, 3445, 3453, 3460, 3467, 3492,\n", - " 3498, 3500, 3515, 3521, 3525, 3526, 3529, 3543, 3568, 3571, 3585,\n", - " 3588, 3589, 3591, 3594, 3604]))" - ] - }, - "execution_count": 136, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "d" + "indexes" ] }, { "cell_type": "code", - "execution_count": 141, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "hh_id\n", - "0 0.0\n", - "2 0.0\n", - "3 0.0\n", - "4 0.0\n", - "5 0.0\n", - "13 0.0\n", - "14 0.0\n", - "15 0.0\n", - "17 0.0\n", - "18 0.0\n", - "19 0.0\n", - "23 0.0\n", - "24 1.0\n", - "25 0.0\n", - "26 1.0\n", - "Name: 5, dtype: float64" - ] - }, - "execution_count": 141, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "h_freq_table[5][:15]" + "synth_hh = h_pums.loc[indexes].reset_index(drop=True)" ] }, { "cell_type": "code", - "execution_count": 104, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(5,\n", - " array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),\n", - " array([ 12, 14, 15, 35, 45, 49, 51, 63, 65, 68, 79,\n", - " 105, 139, 141, 147, 150, 153, 154, 178, 217, 219, 231,\n", - " 236, 238, 254, 264, 274, 277, 288, 301, 306, 326, 328,\n", - " 329, 346, 355, 358, 364, 365, 371, 375, 378, 379, 384,\n", - " 389, 390, 391, 396, 411, 427, 431, 444, 477, 481, 491,\n", - " 517, 543, 544, 555, 588, 594, 608, 616, 628, 639, 643,\n", - " 662, 671, 682, 703, 752, 775, 782, 784, 788, 793, 798,\n", - " 811, 827, 828, 845, 864, 869, 898, 913, 921, 938, 949,\n", - " 953, 962, 966, 972, 977, 987, 996, 1003, 1004, 1008, 1019,\n", - " 1023, 1038, 1042, 1049, 1068, 1069, 1074, 1076, 1078, 1089, 1102,\n", - " 1105, 1128, 1153, 1155, 1162, 1179, 1182, 1198, 1211, 1212, 1217,\n", - " 1218, 1225, 1231, 1232, 1241, 1245, 1257, 1262, 1269, 1270, 1281,\n", - " 1286, 1294, 1297, 1309, 1312, 1323, 1327, 1339, 1341, 1360, 1378,\n", - " 1383, 1388, 1389, 1400, 1401, 1413, 1416, 1450, 1455, 1463, 1477,\n", - " 1484, 1494, 1495, 1505, 1506, 1522, 1534, 1544, 1549, 1550, 1554,\n", - " 1569, 1572, 1588, 1598, 1613, 1622, 1626, 1630, 1644, 1666, 1669,\n", - " 1686, 1708, 1724, 1727, 1737, 1743, 1749, 1771, 1793, 1799, 1801,\n", - " 1803, 1808, 1814, 1817, 1835, 1853, 1868, 1872, 1895, 1896, 1912,\n", - " 1914, 1916, 1922, 1931, 1933, 1946, 1974, 1992, 2005, 2033, 2052,\n", - " 2053, 2062, 2069, 2073, 2074, 2089, 2091, 2094, 2103, 2113, 2116,\n", - " 2122, 2127, 2135, 2149, 2154, 2162, 2164, 2166, 2180, 2181, 2182,\n", - " 2209, 2211, 2245, 2253, 2257, 2268, 2271, 2288, 2290, 2296, 2316,\n", - " 2334, 2337, 2338, 2339, 2341, 2347, 2349, 2351, 2355, 2356, 2405,\n", - " 2413, 2422, 2428, 2438, 2440, 2448, 2455, 2464, 2473, 2479, 2482,\n", - " 2490, 2509, 2517, 2531, 2550, 2555, 2558, 2574, 2575, 2583, 2589,\n", - " 2591, 2597, 2611, 2614, 2618, 2627, 2633, 2636, 2643, 2682, 2700,\n", - " 2704, 2707, 2716, 2721, 2726, 2754, 2764, 2765, 2793, 2794, 2805,\n", - " 2814, 2835, 2852, 2855, 2856, 2862, 2866, 2878, 2891, 2909, 2918,\n", - " 2920, 2924, 2974, 2982, 2993, 3011, 3020, 3022, 3040, 3041, 3044,\n", - " 3054, 3056, 3091, 3101, 3107, 3108, 3111, 3142, 3151, 3156, 3203,\n", - " 3206, 3213, 3217, 3231, 3269, 3309, 3313, 3331, 3332, 3335, 3342,\n", - " 3349, 3359, 3374, 3389, 3400, 3403, 3445, 3453, 3460, 3467, 3492,\n", - " 3498, 3500, 3515, 3521, 3525, 3526, 3529, 3543, 3568, 3571, 3585,\n", - " 3588, 3589, 3591, 3594, 3604]))" - ] - }, - "execution_count": 104, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "d" + "synth_hh" ] }, { @@ -5497,14 +1951,31 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "mrg_tbl = pd.DataFrame(\n", + " {'serialno': synth_hh.serialno.values,\n", + " 'hh_id': synth_hh.index.values})" + ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "mrg_tbl" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, synthetic persons dataset will be built based on `p_pums` and the `hh_id` of synthetic households. As follows: \n", + "\n", + "```\n", + "synth_people = pd.merge(p_pums, mrg_tbl, left_on='serialno', right_on='serialno')\n", + "```" + ] } ], "metadata": {