In [2]:
import pandas as pd
import numpy as np
from pandas import read_csv
from scipy.stats import spearmanr
from sklearn.preprocessing import scale

### The original dataset can be found here: http://microdata.worldbank.org/index.php/catalog/2783.  Users are required to register (free) to access the data, which is separated into several data files that correspond to different sections of the survey.  The csvs need to be downloaded, and then the code below can be used to select relevant features from each data file.  

In [3]:
mydata = "~/Documents/Georgetown/Data/ETH_2015/Household/sect_cover_hh_w3.csv"

In [4]:
#Read data as csv
mydata = pd.read_csv(mydata)


In [5]:
mydata.head()

Unnamed: 0.1,Unnamed: 0,household_id,household_id2,rural,saq01,saq02,saq03,saq06,saq08,hh_saq09,hh_saq11,hh_saq12
0,0,1010101601002,10101088801601002,1,1,1,1,16,2,1.0,**CONFIDENTIAL**,**CONFIDENTIAL**
1,1,1010101601017,10101088801601017,1,1,1,1,16,17,9.0,**CONFIDENTIAL**,**CONFIDENTIAL**
2,2,1010101601034,10101088801601034,1,1,1,1,16,34,1.0,**CONFIDENTIAL**,**CONFIDENTIAL**
3,3,1010101601049,10101088801601049,1,1,1,1,16,49,3.0,**CONFIDENTIAL**,**CONFIDENTIAL**
4,4,1010101601064,10101088801601064,1,1,1,1,16,64,2.0,**CONFIDENTIAL**,**CONFIDENTIAL**


In [6]:
mydata.describe()

Unnamed: 0.1,Unnamed: 0,household_id,household_id2,rural,saq01,saq02,saq03,saq06,saq08,hh_saq09
count,4954.0,4954.0,4954.0,4954.0,4954.0,4954.0,4954.0,4954.0,4954.0,4948.0
mean,2476.5,5664003000000.0,5.664008e+16,1.592854,5.59891,6.451554,5.766048,11.272709,86.294711,4.771019
std,1430.240947,3884338000000.0,3.884337e+16,0.86498,3.887423,5.48759,5.105229,22.56134,66.403212,2.406512
min,0.0,1010102000000.0,1.010109e+16,1.0,1.0,1.0,1.0,1.0,1.0,1.0
25%,1238.25,3050825000000.0,3.050826e+16,1.0,3.0,2.0,2.0,3.0,33.0,3.0
50%,2476.5,4140502000000.0,4.140509e+16,1.0,4.0,5.0,4.0,7.0,77.0,5.0
75%,3714.75,7101600000000.0,7.101601e+16,3.0,7.0,9.0,8.0,14.0,127.0,6.0
max,4953.0,15010200000000.0,1.501021e+17,3.0,15.0,22.0,25.0,403.0,572.0,17.0


### Below, I selected features from each data file that may be relevant to predicting our target, based on domain knowledge and relevant empirical research.

In [7]:
mydata_cover = mydata[['household_id', 'household_id2', 'rural', 'saq01', 'saq02', 'saq03', 'saq06', 'saq08', 'hh_saq09', 'hh_saq11', 'hh_saq12']].copy()

In [8]:
mydata_cover.head()

Unnamed: 0,household_id,household_id2,rural,saq01,saq02,saq03,saq06,saq08,hh_saq09,hh_saq11,hh_saq12
0,1010101601002,10101088801601002,1,1,1,1,16,2,1.0,**CONFIDENTIAL**,**CONFIDENTIAL**
1,1010101601017,10101088801601017,1,1,1,1,16,17,9.0,**CONFIDENTIAL**,**CONFIDENTIAL**
2,1010101601034,10101088801601034,1,1,1,1,16,34,1.0,**CONFIDENTIAL**,**CONFIDENTIAL**
3,1010101601049,10101088801601049,1,1,1,1,16,49,3.0,**CONFIDENTIAL**,**CONFIDENTIAL**
4,1010101601064,10101088801601064,1,1,1,1,16,64,2.0,**CONFIDENTIAL**,**CONFIDENTIAL**


In [9]:
mydata_cover.to_csv('~/Documents/Georgetown/Data/ETH_2015/Household/sect_cover_hh_w3.csv')
mydata_cover.head()

Unnamed: 0,household_id,household_id2,rural,saq01,saq02,saq03,saq06,saq08,hh_saq09,hh_saq11,hh_saq12
0,1010101601002,10101088801601002,1,1,1,1,16,2,1.0,**CONFIDENTIAL**,**CONFIDENTIAL**
1,1010101601017,10101088801601017,1,1,1,1,16,17,9.0,**CONFIDENTIAL**,**CONFIDENTIAL**
2,1010101601034,10101088801601034,1,1,1,1,16,34,1.0,**CONFIDENTIAL**,**CONFIDENTIAL**
3,1010101601049,10101088801601049,1,1,1,1,16,49,3.0,**CONFIDENTIAL**,**CONFIDENTIAL**
4,1010101601064,10101088801601064,1,1,1,1,16,64,2.0,**CONFIDENTIAL**,**CONFIDENTIAL**


In [11]:
import pandas as pd

In [12]:
# Link to section hh w3 
data_1 = "~/Documents/Georgetown/Data/ETH_2015/Household/sect1_hh_w3.csv"

In [13]:
mydata1 = pd.read_csv(data_1)

In [14]:
mydata_sect1_hh = mydata1[['household_id', 'household_id2', 'rural', 'individual_id', 'individual_id2', 'saq01', 'saq02', 'saq03', 'saq06', 'saq08', 'hh_s1q00', 'hh_s1q02', 'hh_s1q03', 'hh_s1q04e', 'hh_s1q04h', 'hh_s1q05', 'hh_s1q07', 'hh_s1q08', 'hh_s1q09', 'hh_s1q10', 'hh_s1q26', 'hh_s1q27', 'hh_s1q31', 'hh_s1q32_a', 'hh_s1q32_b', 'hh_s1q33', 'hh_s1q34', 'hh_s1q35']].copy()

In [15]:
mydata_sect1_hh.to_csv('~/Documents/Georgetown/Data/ETH_2015/Household/sect1_hh_w3.csv')
mydata_sect1_hh.head()


Unnamed: 0,household_id,household_id2,rural,individual_id,individual_id2,saq01,saq02,saq03,saq06,saq08,...,hh_s1q09,hh_s1q10,hh_s1q26,hh_s1q27,hh_s1q31,hh_s1q32_a,hh_s1q32_b,hh_s1q33,hh_s1q34,hh_s1q35
0,1010101601002,10101088801601002,1,101010160100201,1.010109e+18,1,1,1,16,2,...,,,,,,,,,,
1,1010101601002,10101088801601002,1,101010160100202,1.010109e+18,1,1,1,16,2,...,,,1.0,,1.0,GBRNA,6.0,1.0,9.0,6.0
2,1010101601002,10101088801601002,1,101010160100203,1.010109e+18,1,1,1,16,2,...,,,1.0,,1.0,GBRNA,6.0,1.0,9.0,6.0
3,1010101601002,10101088801601002,1,101010160100204,1.010109e+18,1,1,1,16,2,...,,,1.0,,2.0,,,,10.0,1.0
4,1010101601002,10101088801601002,1,101010160100205,1.010109e+18,1,1,1,16,2,...,,,1.0,,2.0,,,,10.0,1.0


In [16]:
#always run at the beginning of each session
import pandas as pd
import numpy as np
from pandas import read_csv
from scipy.stats import spearmanr
from sklearn.preprocessing import scale

In [17]:
#name your dataset
data_2 = "~/Documents/Georgetown/Data/ETH_2015/Household/sect2_hh_w3.csv"

In [18]:
#always convert dataset to csv; then use "head" to make sure it's there
#this dataset is only education variables
mydata2 = pd.read_csv(data_2)
mydata2.head()

Unnamed: 0,household_id,household_id2,rural,pw_w3,individual_id,individual_id2,ea_id,ea_id2,saq01,saq02,...,hh_s2q11,s2q11_others,hh_s2q12,hh_s2q13,hh_s2q14,hh_s2q15,hh_s2q16,hh_s2q17,hh_s2q18,obs
0,1010101601002,10101088801601002,1,2897.155029,101010160100201,1.010109e+18,1010101601,10101088801601,1,1,...,,,,,,,,,2.0,1
1,1010101601017,10101088801601017,1,2897.155029,101010160101701,1.010109e+18,1010101601,10101088801601,1,1,...,,,,,,,,,2.0,1
2,1010101601017,10101088801601017,1,2897.155029,101010160101702,1.010109e+18,1010101601,10101088801601,1,1,...,,,,,,,,,2.0,2
3,1010101601017,10101088801601017,1,2897.155029,101010160101703,1.010109e+18,1010101601,10101088801601,1,1,...,,,,,,,,,1.0,3
4,1010101601017,10101088801601017,1,2897.155029,101010160101704,1.010109e+18,1010101601,10101088801601,1,1,...,,,1.0,2.0,2.0,,22.0,80.0,1.0,4


In [19]:
data_4 = "~/Documents/Georgetown/Data/ETH_2015/Household/sect4_hh_w3.csv"

In [20]:
mydata4 = pd.read_csv(data_4)

In [21]:
mydata4.head()

Unnamed: 0.1,Unnamed: 0,household_id,household_id2,rural,pw_w3,individual_id,individual_id2,saq01,saq03,saq06,...,hh_s4q10_b,hh_s4q11_b,hh_s4q12,hh_s4q13,hh_s4q20,hh_s4q21_b,hh_s4q22_b,hh_s4q31,hh_s4q34,hh_s4q37
0,0,1010101601002,10101088801601002,1,2897.155029,101010160100201,1.010109e+18,1,1,16,...,,,,,,,,2.0,2.0,2.0
1,1,1010101601017,10101088801601017,1,2897.155029,101010160101701,1.010109e+18,1,1,16,...,,,,,,,,2.0,2.0,2.0
2,2,1010101601017,10101088801601017,1,2897.155029,101010160101702,1.010109e+18,1,1,16,...,,,,,,,,2.0,2.0,2.0
3,3,1010101601017,10101088801601017,1,2897.155029,101010160101703,1.010109e+18,1,1,16,...,,,,,,,,2.0,2.0,2.0
4,4,1010101601017,10101088801601017,1,2897.155029,101010160101704,1.010109e+18,1,1,16,...,,,,,,,,2.0,2.0,2.0


In [22]:
#create new dataset with selected variables only; rename with sect header' ]]
mydata_sect4_hh = mydata4[['household_id', 'household_id2', 'rural', 'pw_w3', 'individual_id', 'individual_id2', 'saq01', 'saq03', 'saq06', 'saq08', 'hh_s4q00', 'hh_s4q02_a', 'hh_s4q02_b', 'hh_s4q03_a', 'hh_s4q03_b', 'hh_s4q04', 'hh_s4q05', 'hh_s4q06', 'hh_s4q07', 'hh_s4q08', 'hh_s4q09', 'hh_s4q10_b', 'hh_s4q11_b', 'hh_s4q12', 'hh_s4q13', 'hh_s4q20', 'hh_s4q21_b', 'hh_s4q22_b', 'hh_s4q31', 'hh_s4q34', 'hh_s4q37']]

In [23]:
mydata_sect4_hh.to_csv('~/Documents/Georgetown/Data/ETH_2015/Household/sect4_hh_w3.csv')
mydata_sect4_hh.head()

Unnamed: 0,household_id,household_id2,rural,pw_w3,individual_id,individual_id2,saq01,saq03,saq06,saq08,...,hh_s4q10_b,hh_s4q11_b,hh_s4q12,hh_s4q13,hh_s4q20,hh_s4q21_b,hh_s4q22_b,hh_s4q31,hh_s4q34,hh_s4q37
0,1010101601002,10101088801601002,1,2897.155029,101010160100201,1.010109e+18,1,1,16,2,...,,,,,,,,2.0,2.0,2.0
1,1010101601017,10101088801601017,1,2897.155029,101010160101701,1.010109e+18,1,1,16,17,...,,,,,,,,2.0,2.0,2.0
2,1010101601017,10101088801601017,1,2897.155029,101010160101702,1.010109e+18,1,1,16,17,...,,,,,,,,2.0,2.0,2.0
3,1010101601017,10101088801601017,1,2897.155029,101010160101703,1.010109e+18,1,1,16,17,...,,,,,,,,2.0,2.0,2.0
4,1010101601017,10101088801601017,1,2897.155029,101010160101704,1.010109e+18,1,1,16,17,...,,,,,,,,2.0,2.0,2.0


In [24]:
#always run at the beginning of each session
import pandas as pd
import numpy as np
from pandas import read_csv
from scipy.stats import spearmanr
from sklearn.preprocessing import scale

In [25]:
data_5a = "~/Documents/Georgetown/Data/ETH_2015/Household/sect5a_hh_w3.csv"

In [26]:
mydata5a = pd.read_csv(data_5a)

  interactivity=interactivity, compiler=compiler, result=result)


In [27]:
mydata_sect5a_hh = mydata5a[['household_id', 'household_id2', 'rural', 'pw_w3', 'saq01', 'saq03', 'saq06', 'saq08', 'item_cd', 'hh_s5aq0a', 'hh_s5aq01', 'item_cd_cf', 'hh_s5aq01', 'hh_s5aq02_a', 'hh_s5aq02_b', 's5aq02b_others', 'hh_s5aq03_a', 'hh_s5aq03_b', 's5aq03b_others', 'hh_s5aq04', 'hh_s5aq05_a', 'hh_s5aq05_b', 's5aq05b_others', 'hh_s5aq06_a', 'hh_s5aq06_b', 's5aq06b_others']]

In [28]:
mydata_sect5a_hh.to_csv('~/Documents/Georgetown/Data/ETH_2015/Household/sect5a_hh_w3.csv')
mydata_sect5a_hh.head()

Unnamed: 0,household_id,household_id2,rural,pw_w3,saq01,saq03,saq06,saq08,item_cd,hh_s5aq0a,...,hh_s5aq03_a,hh_s5aq03_b,s5aq03b_others,hh_s5aq04,hh_s5aq05_a,hh_s5aq05_b,s5aq05b_others,hh_s5aq06_a,hh_s5aq06_b,s5aq06b_others
0,1010101601002,10101088801601002,1,2897.155029,1,1,16,2,1,Teff,...,,,,,,,,,,
1,1010101601002,10101088801601002,1,2897.155029,1,1,16,2,2,Wheat,...,,,,,,,,,,
2,1010101601002,10101088801601002,1,2897.155029,1,1,16,2,3,Barley,...,,,,,,,,,,
3,1010101601002,10101088801601002,1,2897.155029,1,1,16,2,4,Maize,...,,,,,,,,,,
4,1010101601002,10101088801601002,1,2897.155029,1,1,16,2,5,Sorghum,...,0.0,,,,6.0,181.0,,0.0,,


In [29]:
data_5b = "~/Documents/Georgetown/Data/ETH_2015/Household/sect5b_hh_w3.csv"

In [30]:
mydata5b = pd.read_csv(data_5b)

In [31]:
mydata_sect5b_hh = mydata5b[['household_id', 'household_id2', 'rural', 'pw_w3', 'saq01', 'saq03', 'saq06', 'saq08', 'hh_s5bq00', 'hh_s5bq0a', 'hh_s5bq01', 'hh_s5bq02']]

In [32]:
mydata_sect5b_hh.to_csv('~/Documents/Georgetown/Data/ETH_2015/Household/sect5b_hh_w3.csv')
mydata_sect5b_hh.head()

Unnamed: 0,household_id,household_id2,rural,pw_w3,saq01,saq03,saq06,saq08,hh_s5bq00,hh_s5bq0a,hh_s5bq01,hh_s5bq02
0,1010101601002,10101088801601002,1,2897.155029,1,1,16,2,1,Enjera (teff),2,
1,1010101601002,10101088801601002,1,2897.155029,1,1,16,2,2,Other cereal,1,7.0
2,1010101601002,10101088801601002,1,2897.155029,1,1,16,2,3,Potatoes and other root crops,2,
3,1010101601002,10101088801601002,1,2897.155029,1,1,16,2,4,"Pasta, Macaroni and Biscuits",2,
4,1010101601002,10101088801601002,1,2897.155029,1,1,16,2,5,Sugar or sugar products,2,


In [33]:
data_5c1 = "~/Documents/Georgetown/Data/ETH_2015/Household/sect5c1_hh_w3.csv"

In [34]:
mydata5c1 = pd.read_csv(data_5c1)

In [35]:
mydata_sect5c1_hh = mydata5c1[['household_id', 'household_id2', 'rural', 'pw_w3', 'saq01', 'saq03', 'saq06', 'saq08','hh_s5cq03']]

In [36]:
mydata_sect5c1_hh.to_csv('~/Documents/Georgetown/Data/ETH_2015/Household/sect5c1_hh_w3.csv')
mydata_sect5c1_hh.head()

Unnamed: 0,household_id,household_id2,rural,pw_w3,saq01,saq03,saq06,saq08,hh_s5cq03
0,1010101601002,10101088801601002,1,2897.155029,1,1,16,2,2
1,1010101601017,10101088801601017,1,2897.155029,1,1,16,17,2
2,1010101601034,10101088801601034,1,2897.155029,1,1,16,34,2
3,1010101601049,10101088801601049,1,2897.155029,1,1,16,49,2
4,1010101601064,10101088801601064,1,2897.155029,1,1,16,64,2


In [37]:
import pandas as pd
import numpy as np
from pandas import read_csv
from scipy.stats import spearmanr
from sklearn.preprocessing import scale

In [38]:
data_5c2 = "~/Documents/Georgetown/Data/ETH_2015/Household/sect5c2_hh_w3.csv"

In [39]:
mydata5c2 = pd.read_csv(data_5c2)

In [40]:
mydata_sect5c2_hh = mydata5c2[['household_id', 'household_id2', 'rural', 'pw_w3', 'saq01', 'saq03', 'saq06', 'saq08', 'hh_s5cq0a', 'hh_s5cq04', 'hh_s5cq05']]

In [41]:
mydata_sect5c2_hh.to_csv('~/Documents/Georgetown/Data/ETH_2015/Household/sect5c2_hh_w3.csv')
mydata_sect5c2_hh.head()

Unnamed: 0,household_id,household_id2,rural,pw_w3,saq01,saq03,saq06,saq08,hh_s5cq0a,hh_s5cq04,hh_s5cq05
0,1010101601116,10101088801601116,1,2897.155029,1,1,16,116,Children 0-5 yea,0,0
1,1010101601116,10101088801601116,1,2897.155029,1,1,16,116,Children 6-15 ye,0,0
2,1010101601116,10101088801601116,1,2897.155029,1,1,16,116,Adults 16-65 yea,7,63
3,1010101601116,10101088801601116,1,2897.155029,1,1,16,116,Adults over 65 y,0,0
4,1010101601131,10101088801601131,1,2897.155029,1,1,16,131,Children 0-5 yea,2,2


In [42]:
data_5d = "~/Documents/Georgetown/Data/ETH_2015/Household/sect5d_hh_w3.csv"

In [43]:
mydata5d = pd.read_csv(data_5d)

In [44]:
mydata_sect5d_hh = mydata5d[['household_id', 'household_id2', 'rural', 'pw_w3', 'saq01', 'saq03', 'saq06', 'saq08', 'hh_s5cq0b', 'hh_s5cq0c', 'hh_s5cq06', 'hh_s5cq07']]

In [45]:
mydata_sect5d_hh.to_csv('~/Documents/Georgetown/Data/ETH_2015/Household/sect5d_hh_w3.csv')
mydata_sect5d_hh.head()

Unnamed: 0,household_id,household_id2,rural,pw_w3,saq01,saq03,saq06,saq08,hh_s5cq0b,hh_s5cq0c,hh_s5cq06,hh_s5cq07
0,1010101601002,10101088801601002,1,2897.155029,1,1,16,2,A,Full meal Breakf,2,
1,1010101601002,10101088801601002,1,2897.155029,1,1,16,2,B,Full meal Luncht,2,
2,1010101601002,10101088801601002,1,2897.155029,1,1,16,2,C,Full meal Dinner,2,
3,1010101601002,10101088801601002,1,2897.155029,1,1,16,2,D,Snacks such as .,2,
4,1010101601002,10101088801601002,1,2897.155029,1,1,16,2,E,Dairy based beve,2,


In [46]:
data_5b = "~/Documents/Georgetown/Data/ETH_2015/Household/sect5b_hh_w3.csv"

In [47]:
mydata5b = pd.read_csv(data_5b)

In [48]:
mydata_sect5b_hh = mydata5b[['household_id', 'household_id2', 'rural', 'pw_w3', 'saq01', 'saq03', 'saq06', 'saq08', 'hh_s5bq00', 'hh_s5bq0a', 'hh_s5bq01', 'hh_s5bq02']]

In [49]:
mydata_sect5b_hh.to_csv('~/Documents/Georgetown/Data/ETH_2015/Household/sect5b_hh_w3.csv')
mydata_sect5b_hh.head()

Unnamed: 0,household_id,household_id2,rural,pw_w3,saq01,saq03,saq06,saq08,hh_s5bq00,hh_s5bq0a,hh_s5bq01,hh_s5bq02
0,1010101601002,10101088801601002,1,2897.155029,1,1,16,2,1,Enjera (teff),2,
1,1010101601002,10101088801601002,1,2897.155029,1,1,16,2,2,Other cereal,1,7.0
2,1010101601002,10101088801601002,1,2897.155029,1,1,16,2,3,Potatoes and other root crops,2,
3,1010101601002,10101088801601002,1,2897.155029,1,1,16,2,4,"Pasta, Macaroni and Biscuits",2,
4,1010101601002,10101088801601002,1,2897.155029,1,1,16,2,5,Sugar or sugar products,2,


In [50]:
data_6a = "~/Documents/Georgetown/Data/ETH_2015/Household/sect6a_hh_w3.csv"
mydata6a = pd.read_csv(data_6a)
mydata_sect6a_hh = mydata6a[['household_id', 'household_id2', 'rural', 'pw_w3', 'saq01', 'saq03', 'saq06', 'saq08', 'hh_s6aq0a', 'hh_s6aq01', 'hh_s6aq02']]
mydata_sect6a_hh.to_csv('~/Documents/Georgetown/Data/ETH_2015/Household/sect6a_hh_w3.csv')
mydata_sect6a_hh.head()


Unnamed: 0,household_id,household_id2,rural,pw_w3,saq01,saq03,saq06,saq08,hh_s6aq0a,hh_s6aq01,hh_s6aq02
0,1010101601002,10101088801601002,1,2897.155029,1,1,16,2,Matches,2,
1,1010101601002,10101088801601002,1,2897.155029,1,1,16,2,Batteries,1,9.0
2,1010101601002,10101088801601002,1,2897.155029,1,1,16,2,"Candles, incense",2,
3,1010101601002,10101088801601002,1,2897.155029,1,1,16,2,Laundry soap,1,18.0
4,1010101601002,10101088801601002,1,2897.155029,1,1,16,2,Hand soap,2,


In [51]:
data_6b = "~/Documents/Georgetown/Data/ETH_2015/Household/sect6b_hh_w3.csv"
mydata6b = pd.read_csv(data_6b)
mydata_sect6b_hh = mydata6b[['household_id', 'household_id2', 'rural', 'pw_w3', 'saq01', 'saq03', 'saq06', 'saq08', 'hh_s6bq00', 'hh_s6bq0a', 'hh_s6bq03', 'hh_s6bq04']]
mydata_sect6b_hh.to_csv('~/Documents/Georgetown/Data/ETH_2015/Household/sect6b_hh_w3.csv')
mydata_sect6b_hh.head()

Unnamed: 0,household_id,household_id2,rural,pw_w3,saq01,saq03,saq06,saq08,hh_s6bq00,hh_s6bq0a,hh_s6bq03,hh_s6bq04
0,1010101601002,10101088801601002,1,2897.155029,1,1,16,2,1,Clothes/shoes/fabric for MEN,2,
1,1010101601002,10101088801601002,1,2897.155029,1,1,16,2,2,Clothes/shoes/fabric for WOMEN,1,432.0
2,1010101601002,10101088801601002,1,2897.155029,1,1,16,2,3,Clothes/shoes/fabric for BOYS,2,
3,1010101601002,10101088801601002,1,2897.155029,1,1,16,2,4,Clothes/shoes/fabric for GIRLS,2,
4,1010101601002,10101088801601002,1,2897.155029,1,1,16,2,5,Kitchen equipment,2,


In [52]:
data_10 = "~/Documents/Georgetown/Data/ETH_2015/Household/sect10_hh_w3.csv"
mydata10 = pd.read_csv(data_10)
mydata_sect10_hh = mydata10[['household_id', 'household_id2', 'rural', 'pw_w3', 'saq01', 'saq03', 'saq06', 'saq08', 'hh_s10q00', 'hh_s10q0a', 'hh_s10q01', 'hh_s10q02_a', 'hh_s10q02_a']]
mydata_sect10_hh.to_csv('~/Documents/Georgetown/Data/ETH_2015/Household/sect10_hh_w3.csv')
mydata_sect10_hh.head()

Unnamed: 0,household_id,household_id2,rural,pw_w3,saq01,saq03,saq06,saq08,hh_s10q00,hh_s10q0a,hh_s10q01,hh_s10q02_a,hh_s10q02_a.1
0,1010101601002,10101088801601002,1,2897.155029,1,1,16,2,1,Kerosene stove,0,,
1,1010101601002,10101088801601002,1,2897.155029,1,1,16,2,2,Butance Gas stove,0,,
2,1010101601002,10101088801601002,1,2897.155029,1,1,16,2,3,Electric stove,0,,
3,1010101601002,10101088801601002,1,2897.155029,1,1,16,2,4,Blanket/Gabi,2,1.0,1.0
4,1010101601002,10101088801601002,1,2897.155029,1,1,16,2,5,Mattress and/or bed,1,1.0,1.0


In [53]:
data_12 = "~/Documents/Georgetown/Data/ETH_2015/Household/sect12_hh_w3.csv"
mydata12 = pd.read_csv(data_12)
mydata_sect12_hh = mydata12[['household_id', 'household_id2', 'rural', 'pw_w3', 'saq01', 'saq03', 'saq06', 'saq08', 'hh_s12q00', 'hh_s12q0a', 'hh_s12q01', 'hh_s12q02', 'hh_s12q03_a', 'hh_s12q03_b', 'hh_s12q04_a', 'hh_s12q04_b', 'hh_s12q04_c', 'hh_s12q05']]
mydata_sect12_hh.to_csv('~/Documents/Georgetown/Data/ETH_2015/Household/sect12_hh_w3.csv')
mydata_sect12_hh.head()

Unnamed: 0,household_id,household_id2,rural,pw_w3,saq01,saq03,saq06,saq08,hh_s12q00,hh_s12q0a,hh_s12q01,hh_s12q02,hh_s12q03_a,hh_s12q03_b,hh_s12q04_a,hh_s12q04_b,hh_s12q04_c,hh_s12q05
0,1010101601002,10101088801601002,1,2897.155029,1,1,16,2,101,Cash Transfers/Gifts,2,,,,,,,
1,1010101601002,10101088801601002,1,2897.155029,1,1,16,2,102,Food Transfers/Gifts,2,,,,,,,
2,1010101601002,10101088801601002,1,2897.155029,1,1,16,2,103,Non-Food In-Kind Transfers/Gifts,2,,,,,,,
3,1010101601002,10101088801601002,1,2897.155029,1,1,16,2,104,"Savings, Interest or Other Investment In",2,,,,,,,
4,1010101601002,10101088801601002,1,2897.155029,1,1,16,2,105,Pension,2,,,,,,,


In [54]:
data_cover_pp = "~/Documents/Georgetown/Data/ETH_2015/Post-Planting/sect_cover_pp_w3.csv"
mydata_ppcover = pd.read_csv(data_cover_pp)
mydata_cover_pp = mydata_ppcover[['holder_id', 'household_id', 'household_id2', 'rural', 'pw_w3', 'saq01', 'saq03', 'saq06', 'pp_saq07', 'region', 'woreda', 'kebele', 'pp_saq10', 'pp_saq12', 'pp_saq13', 'pp_saq13a']]
mydata_cover_pp.to_csv('~/Documents/Georgetown/Data/ETH_2015/Post-Planting/sect_cover_pp_w3.csv')
mydata_cover_pp.head()

Unnamed: 0,holder_id,household_id,household_id2,rural,pw_w3,saq01,saq03,saq06,pp_saq07,region,woreda,kebele,pp_saq10,pp_saq12,pp_saq13,pp_saq13a
0,101010200000000.0,1010101601002,10101088801601002,1,2897.155029,1,1,2,1.0,,,,9.0,,3,1.0
1,101010200000000.0,1010101601017,10101088801601017,1,2897.155029,1,1,17,1.0,,,,9.0,,3,2.0
2,101010200000000.0,1010101601034,10101088801601034,1,2897.155029,1,1,34,1.0,,,,1.0,,2,1.0
3,101010200000000.0,1010101601049,10101088801601049,1,2897.155029,1,1,49,1.0,,,,4.0,,1,1.0
4,101010200000000.0,1010101601064,10101088801601064,1,2897.155029,1,1,64,1.0,,,,4.0,,3,2.0


In [55]:
data_sect1_pp = "~/Documents/Georgetown/Data/ETH_2015/Post-Planting/sect1_pp_w3.csv"
mydata_sect1_pp = pd.read_csv(data_sect1_pp)
sect1_pp = mydata_sect1_pp[['holder_id', 'household_id', 'household_id2', 'rural', 'pw_w3', 'saq01', 'saq03', 'saq06', 'pp_saq07', 'pp_saq07', 'pp_s1q00', 'pp_s1q02', 'pp_s1q03', 'pp_s1q04']]
sect1_pp.to_csv('~/Documents/Georgetown/Data/ETH_2015/Post-Planting/sect1_pp_w3.csv')
sect1_pp.head()

Unnamed: 0,holder_id,household_id,household_id2,rural,pw_w3,saq01,saq03,saq06,pp_saq07,pp_saq07.1,pp_s1q00,pp_s1q02,pp_s1q03,pp_s1q04
0,101010200000000.0,1010101601002,10101088801601002,1,2897.155029,1,1,2,1.0,1.0,1.0,79.0,2.0,3.0
1,101010200000000.0,1010101601002,10101088801601002,1,2897.155029,1,1,2,1.0,1.0,2.0,,,
2,101010200000000.0,1010101601002,10101088801601002,1,2897.155029,1,1,2,1.0,1.0,3.0,,,
3,101010200000000.0,1010101601002,10101088801601002,1,2897.155029,1,1,2,1.0,1.0,4.0,,,
4,101010200000000.0,1010101601002,10101088801601002,1,2897.155029,1,1,2,1.0,1.0,5.0,,,


In [59]:
data_sect3_pp = "~/Documents/Georgetown/Data/ETH_2015/Post-Planting/sect3_pp_w3.csv"
mydata_sect3_pp = pd.read_csv(data_sect3_pp)
sect3_pp = mydata_sect3_pp[['holder_id', 'household_id', 'household_id2', 'rural', 'pw_w3', 'parcel_id', 'saq01', 'saq03', 'saq06', 'pp_saq07', 'pp_s3q00', 'pp_s3q0a', 'pp_s3q02_a', 'pp_s3q02_c', 'pp_s3q03', 'pp_s3q03b', 'pp_s3q04', 'pp_s3q05_a', 'pp_s3q06_a', 'pp_s3q06_b', 'pp_s3q10a', 'pp_s3q10b', 'pp_s3q11', 'pp_s3q12', 'pp_s3q13', 'pp_s3q14', 'pp_s3q15', 'pp_s3q18', 'pp_s3q20a_1', 'pp_s3q20a', 'pp_s3q14', 'pp_s3q21', 'pp_s3q23', 'pp_s3q25', 'pp_s3q27_a', 'pp_s3q27_b', 'pp_s3q27_c', 'pp_s3q27_d', 'pp_s3q27_e', 'pp_s3q27_f', 'pp_s3q27_g', 'pp_s3q27_h', 'pp_s3q27_i', 'pp_s3q27_j', 'pp_s3q27_k', 'pp_s3q27_l', 'pp_s3q27_m', 'pp_s3q27_n', 'pp_s3q27_o', 'pp_s3q27_p', 'pp_s3q28_a', 'pp_s3q28_b', 'pp_s3q28_d', 'pp_s3q28_e', 'pp_s3q29_a', 'pp_s3q29_b', 'pp_s3q29_c', 'pp_s3q29_d', 'pp_s3q31_a', 'pp_s3q31_b', 'pp_s3q31_c', 'pp_s3q31_d', 'pp_s3q33a', 'pp_s3q35']]
sect3_pp = pd.DataFrame(sect3_pp)
sect3_pp.to_csv('~/Documents/Georgetown/Data/ETH_2015/Post-Planting/sect2_pp_w3.csv')
sect3_pp.head()

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,holder_id,household_id,household_id2,rural,pw_w3,parcel_id,saq01,saq03,saq06,pp_saq07,...,pp_s3q29_a,pp_s3q29_b,pp_s3q29_c,pp_s3q29_d,pp_s3q31_a,pp_s3q31_b,pp_s3q31_c,pp_s3q31_d,pp_s3q33a,pp_s3q35
0,101010200000000.0,1010101601002,10101088801601002,1,2897.155029,1,1,1,2,1.0,...,,,,,,,,,,
1,101010200000000.0,1010101601002,10101088801601002,1,2897.155029,1,1,1,2,1.0,...,4.0,4.0,2.0,2.0,,,,,2.0,6.0
2,101010200000000.0,1010101601002,10101088801601002,1,2897.155029,1,1,1,2,1.0,...,0.0,,0.0,,,,,,2.0,6.0
3,101010200000000.0,1010101601002,10101088801601002,1,2897.155029,1,1,1,2,1.0,...,4.0,4.0,2.0,2.0,,,,,2.0,6.0
4,101010200000000.0,1010101601017,10101088801601017,1,2897.155029,1,1,1,17,1.0,...,,,,,,,,,,


In [60]:
data_sect7_pp = "~/Documents/Georgetown/Data/ETH_2015/Post-Planting/sect7_pp_w3.csv"
mydata_sect7_pp = pd.read_csv(data_sect7_pp)
sect7_pp = mydata_sect7_pp[['holder_id', 'household_id', 'household_id2', 'rural', 'pw_w3', 'saq01', 'saq03', 'saq06', 'pp_saq07', 'pp_s7q01', 'pp_s7q02', 'pp_s7q08', 'pp_s7q20a']]
sect7_pp.to_csv('~/Documents/Georgetown/Data/ETH_2015/Post-Planting/sect7_pp_w3.csv')
sect7_pp.head()

Unnamed: 0,holder_id,household_id,household_id2,rural,pw_w3,saq01,saq03,saq06,pp_saq07,pp_s7q01,pp_s7q02,pp_s7q08,pp_s7q20a
0,101010200000000.0,1010101601002,10101088801601002,1,2897.155029,1,1,2,1.0,2,1,2,2.0
1,101010200000000.0,1010101601017,10101088801601017,1,2897.155029,1,1,17,1.0,1,2,1,2.0
2,101010200000000.0,1010101601034,10101088801601034,1,2897.155029,1,1,34,1.0,2,2,2,2.0
3,101010200000000.0,1010101601049,10101088801601049,1,2897.155029,1,1,49,1.0,2,2,2,2.0
4,101010200000000.0,1010101601064,10101088801601064,1,2897.155029,1,1,64,1.0,2,2,2,2.0


In [61]:
data_cover_ph = "~/Documents/Georgetown/Data/ETH_2015/Post-Harvest/sect_cover_ph_w3.csv"
mydata_cover_ph = pd.read_csv(data_cover_ph)
sect_cover_ph = mydata_cover_ph[['holder_id', 'household_id', 'household_id2', 'rural', 'pw_w3', 'saq01', 'saq03', 'saq06', 'ph_saq07', 'ph_saq10', 'ph_saq13', 'ph_saq13b']]
sect_cover_ph.to_csv('~/Documents/Georgetown/Data/ETH_2015/Post-Harvest/sect_cover_ph_w3.csv')
sect_cover_ph.head()

Unnamed: 0,holder_id,household_id,household_id2,rural,pw_w3,saq01,saq03,saq06,ph_saq07,ph_saq10,ph_saq13,ph_saq13b
0,101010160100201,1010101601002,10101088801601002,1,2897.155029,1,1,2,1,1.0,3.0,1.0
1,101010160101701,1010101601017,10101088801601017,1,2897.155029,1,1,17,1,9.0,3.0,2.0
2,101010160104901,1010101601049,10101088801601049,1,2897.155029,1,1,49,1,3.0,1.0,1.0
3,101010160106401,1010101601064,10101088801601064,1,2897.155029,1,1,64,1,2.0,3.0,2.0
4,101010160108001,1010101601080,10101088801601080,1,2897.155029,1,1,80,1,4.0,3.0,10.0


In [62]:
data_sect1_ph_w3 = "~/Documents/Georgetown/Data/ETH_2015/Post-Harvest/sect1_ph_w3.csv"
mydata_sect1_ph = pd.read_csv(data_sect1_ph_w3)
sect1_ph = mydata_sect1_ph[['holder_id', 'household_id', 'household_id2', 'rural', 'pw_w3', 'saq01', 'saq03', 'saq06', 'ph_saq07', 'ph_s1q00', 'ph_s1q04', 'hh_saq01', 'hh_saq03', 'hh_saq08']]
sect1_ph.to_csv('~/Documents/Georgetown/Data/ETH_2015/Post-Harvest/sect1_ph_w3.csv')
sect1_ph.head()

Unnamed: 0,holder_id,household_id,household_id2,rural,pw_w3,saq01,saq03,saq06,ph_saq07,ph_s1q00,ph_s1q04,hh_saq01,hh_saq03,hh_saq08
0,101010160100201,1010101601002,10101088801601002,1,2897.155029,1,1,2,1,1.0,3.0,1,1.0,2.0
1,101010160100201,1010101601002,10101088801601002,1,2897.155029,1,1,2,1,2.0,,1,1.0,2.0
2,101010160100201,1010101601002,10101088801601002,1,2897.155029,1,1,2,1,3.0,,1,1.0,2.0
3,101010160100201,1010101601002,10101088801601002,1,2897.155029,1,1,2,1,4.0,,1,1.0,2.0
4,101010160100201,1010101601002,10101088801601002,1,2897.155029,1,1,2,1,5.0,,1,1.0,2.0


In [63]:
data_sect9_ph_w3 = "~/Documents/Georgetown/Data/ETH_2015/Post-Harvest/sect9_ph_w3.csv"
mydata_sect9_ph = pd.read_csv(data_sect9_ph_w3)
sect9_ph = mydata_sect9_ph[['holder_id', 'household_id', 'household_id2', 'rural', 'pw_w3', 'saq01', 'saq03', 'saq06', 'ph_saq07', 'parcel_id', 'field_id', 'crop_name', 'crop_code', 'ph_s9q01', 'ph_s9q02', 'ph_s9q03', 'ph_s9q04_b']] 
sect9_ph.to_csv('~/Documents/Georgetown/Data/ETH_2015/Post-Harvest/sect9_ph_w3.csv')
sect9_ph.head()

Unnamed: 0,holder_id,household_id,household_id2,rural,pw_w3,saq01,saq03,saq06,ph_saq07,parcel_id,field_id,crop_name,crop_code,ph_s9q01,ph_s9q02,ph_s9q03,ph_s9q04_b
0,101010160100201,1010102000000.0,1.010109e+16,1.0,2897.155029,1.0,1.0,2.0,1.0,1,2,MAIZE,2,1,,1,181.0
1,101010160100201,1010102000000.0,1.010109e+16,1.0,2897.155029,1.0,1.0,2.0,1.0,1,3,RED PEPPER,38,1,,1,1.0
2,101010160100201,1010102000000.0,1.010109e+16,1.0,2897.155029,1.0,1.0,2.0,1.0,1,4,SORGHUM,6,1,,1,1.0
3,101010160101701,1010102000000.0,1.010109e+16,1.0,2897.155029,1.0,1.0,17.0,1.0,1,2,MAIZE,2,1,,1,181.0
4,101010160101701,1010102000000.0,1.010109e+16,1.0,2897.155029,1.0,1.0,17.0,1.0,1,3,RED PEPPER,38,1,,1,181.0


In [64]:
data_sect10_ph_w3 = "~/Documents/Georgetown/Data/ETH_2015/Post-Harvest/sect10_ph_w3.csv"
mydata_sect10_ph = pd.read_csv(data_sect10_ph_w3)
sect10_ph = mydata_sect10_ph[['holder_id', 'household_id', 'household_id2', 'rural', 'pw_w3', 'saq01', 'saq03', 'saq06', 'ph_saq07', 'parcel_id', 'field_id', 'crop_name', 'crop_code', 'ph_s10q01_a', 'ph_s10q01_b', 'ph_s10q01_d', 'ph_s10q01_e', 'ph_s10q02_a', 'ph_s10q02_b', 'ph_s10q02_c', 'ph_s10q02_e', 'ph_s10q02_f', 'ph_s10q02_g', 'ph_s10q02_h', 'ph_s10q02_i', 'ph_s10q02_j', 'ph_s10q02_k', 'ph_s10q02_l', 'ph_s10q02_m', 'ph_s10q02_n', 'ph_s10q02_o', 'ph_s10q02_p']]
sect10_ph.to_csv('~/Documents/Georgetown/Data/ETH_2015/Post-Harvest/sect10_ph_w3.csv')
sect10_ph.head()

Unnamed: 0,holder_id,household_id,household_id2,rural,pw_w3,saq01,saq03,saq06,ph_saq07,parcel_id,...,ph_s10q02_g,ph_s10q02_h,ph_s10q02_i,ph_s10q02_j,ph_s10q02_k,ph_s10q02_l,ph_s10q02_m,ph_s10q02_n,ph_s10q02_o,ph_s10q02_p
0,101010160100201,1010102000000.0,1.010109e+16,1.0,2897.155029,1.0,1.0,2.0,1.0,1,...,,,,,,,,,,
1,101010160100201,1010102000000.0,1.010109e+16,1.0,2897.155029,1.0,1.0,2.0,1.0,1,...,,,,,,,,,,
2,101010160100201,1010102000000.0,1.010109e+16,1.0,2897.155029,1.0,1.0,2.0,1.0,1,...,,,,,,,,,,
3,101010160101701,1010102000000.0,1.010109e+16,1.0,2897.155029,1.0,1.0,17.0,1.0,1,...,2.0,2.0,1.0,7.0,1.0,1.0,2.0,0.0,2.0,2.0
4,101010160101701,1010102000000.0,1.010109e+16,1.0,2897.155029,1.0,1.0,17.0,1.0,1,...,,,,,,,,,,


In [None]:
combined = pd.concat([mydata_cover, mydata_sect1_hh], axis=1)