## Reading the DHS survey and GPS datasets

Importing necessary libraries

In [5]:
from geopandas import read_file
import pandas as pd

In [69]:
class GetData():
    def __init__(self, gps_path, survey_path):
        self.gps_path = gps_path
        self.survey_path = survey_path

    # Creating a function to get GPS data
    def gps_df(self):
        shapefile_df = read_file(self.gps_path)
        self.location_df = shapefile_df[['DHSCLUST', 'LATNUM', 'LONGNUM']]
        return self.location_df

    # Reading the survey (.dta/stata) file to get the wealth asset data
    def survey_df(self):
        dta_file = pd.read_stata(self.survey_path)
        survey_df = dta_file[['hv001', 'hv270']].rename(columns={'hv001': 'DHSCLUST', 'hv270':'wealth_asset_index'})
        survey_df['wealth_asset_index'] = survey_df['wealth_asset_index'].map({'poorest':1, 'poorer':2, 'middle':3, 'richer':4, 'richest':5})
        survey_df['wealth_asset_index'] = survey_df['wealth_asset_index'].astype('int16')
        self.survey_df = survey_df.groupby('DHSCLUST')[['wealth_asset_index']].median().reset_index()
        return self.survey_df

    # Merging location_df and survey_df to get wealth_df
    def merged_wealth(self):
        wealth_df = pd.merge(self.location_df, self.survey_df, on = 'DHSCLUST')
        wealth_df = wealth_df.rename(columns={'DHSCLUST':'Cluster Number', 'LATNUM':'Latitude', 'LONGNUM':'Longitude','wealth_asset_index':'Asset Index'})
        return wealth_df

In [70]:
data = GetData('../poverty_predictor/data/GPS/RWGE61FL.shp', '../poverty_predictor/data/Survey/RWHR61FL.dta')

In [71]:
gps_data = data.gps_df()

In [72]:
survey_data = data.survey_df()

In [73]:
wealthgps_df = data.merged_wealth()

In [75]:
wealthgps_df.head()

Unnamed: 0,Cluster Number,Latitude,Longitude,Asset Index
0,1.0,-2.532818,29.684726,2.0
1,2.0,-1.833858,30.310689,2.5
2,3.0,-1.888155,29.478298,2.0
3,4.0,-2.366763,30.521692,2.0
4,5.0,-2.171266,30.018541,2.0
