In [1]:
import pandas as pd

import synthpop.zone_synthesizer as zs

#### Specify sample data csv paths. See the files listed here for expected structure. Marginal tables require multi-indexed columns with category name and category value in levels 0 and 1 of the index. Sample file category columns should be labeled with corresponding category names and values in those columns should match the category value headers in the marginal table.

In [2]:
hh_marginal_file = 'input_data/hh_marginals.csv'
person_marginal_file = 'input_data/person_marginals.csv'
hh_sample_file = 'input_data/household_sample.csv'
person_sample_file = 'input_data/person_sample.csv'

#### Load and process input marginals and samples and geography crosswalk

In [3]:
hh_marg, p_marg, hh_sample, p_sample, xwalk = zs.load_data(hh_marginal_file, person_marginal_file, hh_sample_file, person_sample_file)

In [4]:
hh_marg.head()

cat_name,cars,cars,cars,children,children,income,income,income,workers,workers,workers
cat_values,none,one,two or more,no,yes,gt100,gt35-lt100,lt35,none,one,two or more
1,7,49,197,41,215,57,125,74,72,77,105
2,9,59,237,68,239,83,126,98,87,93,125
3,10,69,275,79,279,74,170,114,102,108,146
4,11,76,302,167,224,42,105,244,111,118,160
5,18,117,466,86,517,50,261,292,171,182,247


In [5]:
p_marg.head()

cat_name,age,age,age,age,race,race,race,race,sex,sex
cat_values,19 and under,20 to 35,35 to 60,above 60,asian,black,other,white,female,male
1,312,108,223,177,64,0,0,756,440,380
2,235,143,296,181,0,0,0,855,452,403
3,303,229,445,174,0,0,24,1127,565,586
4,215,77,356,189,0,0,29,808,389,448
5,506,539,619,262,0,0,0,1926,981,945


In [6]:
p_sample.head()

Unnamed: 0,serialno,sample_geog,age,race,sex,AGEP,SEX,RAC1P
0,2012000005576,1,20 to 35,white,male,27,1,1
1,2012000017760,1,20 to 35,white,male,34,1,1
2,2012000021787,1,20 to 35,white,male,32,1,1
3,2012000021815,1,20 to 35,white,male,23,1,1
4,2012000065237,1,20 to 35,white,male,24,1,1


#### Iterate over all marginals in the geography crosswalk and synthesize in-line

In [4]:
all_households, all_persons, all_stats = zs.synthesize_all_zones(hh_marg, p_marg, hh_sample, p_sample, xwalk)

  adj = constraint / (column * weights).sum()


Drawing 254 households
Drawing 306 households
Drawing 356 households
Drawing 390 households
Drawing 601 households
Drawing 324 households
Drawing 556 households
Drawing 342 households
Drawing 273 households
Drawing 228 households
Drawing 857 households
Drawing 748 households
Drawing 744 households
Drawing 953 households
Drawing 719 households
Drawing 185 households
Drawing 183 households
Drawing 286 households
Drawing 317 households
Drawing 711 households
Drawing 345 households


In [10]:
all_households.head()

Unnamed: 0_level_0,serialno,sample_geog,cars,workers,children,income,VEH,FINCP,NOC,WIF,cat_id,geog
household_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,2009001253781,1,none,none,no,lt35,0,,0,,2,1
2,2010000262947,1,none,none,yes,lt35,0,0.0,1,0.0,5,1
3,2012001476110,1,none,none,yes,lt35,0,3100.0,3,0.0,5,1
4,2009000455972,1,none,none,yes,lt35,0,3500.0,2,0.0,5,1
5,2010000262947,1,none,none,yes,lt35,0,0.0,1,0.0,5,1


#### all_persons.household_id maps person records to all_households.index

In [9]:
all_persons.head()

Unnamed: 0,serialno,sample_geog,age,race,sex,AGEP,SEX,RAC1P,cat_id,geog,household_id
0,2012000136005,1,20 to 35,white,male,31,1,1,69,1,96
1,2012000136005,1,35 to 60,white,female,36,2,1,76,1,96
2,2012000136005,1,19 and under,white,male,10,1,1,61,1,96
3,2012000136005,1,19 and under,white,male,8,1,1,61,1,96
4,2012000136005,1,19 and under,white,male,5,1,1,61,1,96


#### Synthesize all marginal geographies in the crosswalk using a specified or default number of cores via multiprocessing

In [11]:
all_persons, all_households, all_stats = zs.multiprocess_synthesize(hh_marg, p_marg, hh_sample, p_sample, xwalk)

Drawing 390 households
Drawing 306 households
Drawing 254 households
Drawing 601 households
Drawing 356 households
Drawing 324 households
Drawing 556 households
Drawing 273 households
Drawing 228 households
Drawing 342 households
Drawing 748 households
Drawing 857 households
Drawing 744 households
Drawing 953 households
Drawing 183 households
Drawing 719 households
Drawing 185 households
Drawing 317 households
Drawing 286 households
Drawing 711 households
Drawing 345 households


In [13]:
all_persons.head()

Unnamed: 0,serialno,sample_geog,age,race,sex,AGEP,SEX,RAC1P,cat_id,geog,household_id
0,2012000021787,1,20 to 35,white,male,32,1,1,69,1,247
1,2012000021787,1,20 to 35,white,female,24,2,1,68,1,247
2,2012000021787,1,19 and under,white,male,9,1,1,61,1,247
3,2012000021787,1,19 and under,white,male,6,1,1,61,1,247
4,2012000136005,1,20 to 35,white,male,31,1,1,69,1,93


In [12]:
all_households.head()

Unnamed: 0_level_0,serialno,sample_geog,cars,workers,children,income,VEH,FINCP,NOC,WIF,cat_id,geog
household_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,2010001290355,1,none,none,no,lt35,0,,0,,2,1
2,2010000262947,1,none,none,yes,lt35,0,0.0,1,0.0,5,1
3,2010000726700,1,none,none,yes,lt35,0,15300.0,1,0.0,5,1
4,2010000726700,1,none,none,yes,lt35,0,15300.0,1,0.0,5,1
5,2010000262947,1,none,none,yes,lt35,0,0.0,1,0.0,5,1


In [13]:
all_stats.head()

Unnamed: 0,chi-square,geog,p-score
0,2.226034,1,0.9998402
1,159.166296,2,4.804389e-31
2,185.43982,3,1.314431e-32
3,97.302461,4,1.556224e-14
4,495.917462,5,6.048485e-103
