# Merge household income onto satellite data

General steps:
1. Import data
2. Prep BISP household-year level data for merge
3. Merge onto satellite data
4. Export to CSV

To do:
1. Merge on province/district/tehsil/UC names for each household

In [1]:
import os
import numpy as np
import pandas as pd 

# Display options 
pd.options.display.max_columns = 999
pd.options.display.max_colwidth = -1

### 1. Import data

In [2]:
# Import household income data
BISP_PATH = os.path.join('..', '..', '..', 'Data', 'FinalData', 'BISP', 'bisp_hh_income.csv')
bisp_df = pd.read_csv(BISP_PATH)
bisp_df.shape

(33794, 3)

In [3]:
bisp_df.head()

Unnamed: 0,uid,period,hh_inc
0,100215,2011,9999998.0
1,100215,2013,240000.0
2,100215,2014,0.0
3,100215,2016,0.0
4,100385,2016,0.0


In [4]:
# Import satellite imagery data
SAT_PATH = os.path.join('..', '..', '..', 'Data', 'FinalData', 'BISP', 'bisp_satellite_data.csv')
sat_df = pd.read_csv(SAT_PATH)
sat_df.shape

(5416, 37)

In [5]:
sat_df.head()

Unnamed: 0,uid,viirs_2012,viirs_2013,viirs_2014,viirs_2015,viirs_2016,viirs_2017,viirs_2018,dmspols_1992,dmspols_1993,...,dmspols_2011,dmspols_2012,dmspols_2013,l7_2011_1,l7_2011_2,l7_2011_3,l7_2011_4,l7_2011_5,l7_2011_6,l7_2011_7
0,100389,2.052018,2.141392,2.089507,2.307763,2.850603,3.653005,3.75,43.0,33.666667,...,32.666667,47.666667,45.333333,902.331348,1224.739396,1393.123911,2555.792708,2474.174317,3005.856769,1922.539802
1,100401,1.964332,2.133366,2.052437,2.296554,2.76996,3.702374,3.488333,43.0,33.666667,...,32.666667,47.666667,45.333333,885.841488,1200.54835,1366.253764,2512.672843,2451.849595,3004.616242,1890.566155
2,100581,1.824753,1.937131,1.875487,2.04754,2.557241,3.198625,3.286,43.0,32.5,...,30.5,47.5,44.5,886.021385,1206.745127,1373.031277,2550.999418,2462.90966,3006.164678,1900.64984
3,101101,1.964332,2.133366,2.052437,2.296554,2.76996,3.702374,3.488333,43.0,33.666667,...,32.666667,47.666667,45.333333,886.196798,1201.037263,1366.468559,2514.479913,2450.865939,3004.699563,1890.108734
4,101236,2.052018,2.141392,2.089507,2.307763,2.850603,3.653005,3.75,43.0,33.666667,...,32.666667,47.666667,45.333333,891.264553,1209.61309,1374.709528,2535.919345,2453.881552,3005.134086,1897.493484


### 2. Transform BISP household data

In [6]:
# Only keep years with income data
bisp_df = bisp_df.loc[(bisp_df['period'] == 2011) | (bisp_df['period'] == 2013)]

bisp_df.head()

Unnamed: 0,uid,period,hh_inc
0,100215,2011,9999998.0
1,100215,2013,240000.0
5,100389,2011,9000.0
6,100389,2013,73000.0
8,100401,2011,75000.0


In [7]:
# Reshape wide
bisp_df = bisp_df.pivot(index='uid', columns='period', values='hh_inc') \
    .reset_index() \
    .rename({2011: 'hhinc_2011', 2013: 'hhinc_2013'}, axis=1)

bisp_df.head()

period,uid,hhinc_2011,hhinc_2013
0,100215,9999998.0,240000.0
1,100389,9000.0,73000.0
2,100401,75000.0,159000.0
3,100581,48000.0,0.0
4,101000,0.0,0.0


### 3. Merge with satellite data

In [9]:
sat_df.shape

(5416, 37)

In [12]:
sat_df = sat_df.merge(right=bisp_df, how='left', on='uid')

sat_df.head()

Unnamed: 0,uid,viirs_2012,viirs_2013,viirs_2014,viirs_2015,viirs_2016,viirs_2017,viirs_2018,dmspols_1992,dmspols_1993,...,dmspols_2013,l7_2011_1,l7_2011_2,l7_2011_3,l7_2011_4,l7_2011_5,l7_2011_6,l7_2011_7,hhinc_2011,hhinc_2013
0,100389,2.052018,2.141392,2.089507,2.307763,2.850603,3.653005,3.75,43.0,33.666667,...,45.333333,902.331348,1224.739396,1393.123911,2555.792708,2474.174317,3005.856769,1922.539802,9000.0,73000.0
1,100401,1.964332,2.133366,2.052437,2.296554,2.76996,3.702374,3.488333,43.0,33.666667,...,45.333333,885.841488,1200.54835,1366.253764,2512.672843,2451.849595,3004.616242,1890.566155,75000.0,159000.0
2,100581,1.824753,1.937131,1.875487,2.04754,2.557241,3.198625,3.286,43.0,32.5,...,44.5,886.021385,1206.745127,1373.031277,2550.999418,2462.90966,3006.164678,1900.64984,48000.0,0.0
3,101101,1.964332,2.133366,2.052437,2.296554,2.76996,3.702374,3.488333,43.0,33.666667,...,45.333333,886.196798,1201.037263,1366.468559,2514.479913,2450.865939,3004.699563,1890.108734,31200.0,219000.0
4,101236,2.052018,2.141392,2.089507,2.307763,2.850603,3.653005,3.75,43.0,33.666667,...,45.333333,891.264553,1209.61309,1374.709528,2535.919345,2453.881552,3005.134086,1897.493484,14000.0,


In [13]:
sat_df.shape 
# No rows lost

(5416, 39)

### 4. Export to csv

In [14]:
OUTPUT_PATH = os.path.join('..', '..', '..', 'Data', 'FinalData', 'BISP', 'bisp_sat_inc_data.csv')
sat_df.to_csv(OUTPUT_PATH, index=False)