# Reading the DHS survey and GPS datasets

## Importing the package we created for getting DHS datasets

In [1]:
from poverty_predictor.get_dhs_data import GetDHSData

In [2]:
data = GetDHSData('../data/GPS/RWGE61FL.shp', '../data/Survey/RWHR61FL.dta')

## Reading the GPS dataset

In [3]:
gps_data = data.gps_df()

In [4]:
gps_data.head()

Unnamed: 0,DHSCLUST,LATNUM,LONGNUM
0,1.0,-2.532818,29.684726
1,2.0,-1.833858,30.310689
2,3.0,-1.888155,29.478298
3,4.0,-2.366763,30.521692
4,5.0,-2.171266,30.018541


## Glancing through the dataset

In [5]:
gps_data.shape

(492, 3)

In [6]:
gps_data.dtypes

DHSCLUST    float64
LATNUM      float64
LONGNUM     float64
dtype: object

In [7]:
gps_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 492 entries, 0 to 491
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   DHSCLUST  492 non-null    float64
 1   LATNUM    492 non-null    float64
 2   LONGNUM   492 non-null    float64
dtypes: float64(3)
memory usage: 11.7 KB


In [8]:
gps_data.isnull().sum()

DHSCLUST    0
LATNUM      0
LONGNUM     0
dtype: int64

## Reading the survey dataset

In [9]:
survey_data = data.survey_df()

In [10]:
survey_data.head()

Unnamed: 0,DHSCLUST,Average Wealth,wealth_asset_index
0,1,-0.531405,2.0
1,2,-0.40983,2.5
2,3,-0.478115,2.0
3,4,-0.43596,2.0
4,5,-0.44948,2.0


## Glancing through the dataset

In [11]:
survey_data.shape

(492, 3)

In [12]:
survey_data.dtypes

DHSCLUST                int64
Average Wealth        float64
wealth_asset_index    float64
dtype: object

Since, cluster number is an integer type here, we'll convert it to float to avoid any errors during merge.

In [13]:
survey_data['DHSCLUST'] = survey_data['DHSCLUST'].astype('float64')

In [14]:
survey_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 492 entries, 0 to 491
Data columns (total 3 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   DHSCLUST            492 non-null    float64
 1   Average Wealth      492 non-null    float64
 2   wealth_asset_index  492 non-null    float64
dtypes: float64(3)
memory usage: 11.7 KB


In [15]:
survey_data.isnull().sum()

DHSCLUST              0
Average Wealth        0
wealth_asset_index    0
dtype: int64

## Merging the GPS and survey datasets on cluster numbers to get the co-ordinates and wealth asset index of the clusters

In [16]:
wealthgps_df = data.merged_wealth()

In [17]:
wealthgps_df.head()

Unnamed: 0,Cluster Number,Latitude,Longitude,Average Wealth,Asset Index
0,1.0,-2.532818,29.684726,-0.531405,2.0
1,2.0,-1.833858,30.310689,-0.40983,2.5
2,3.0,-1.888155,29.478298,-0.478115,2.0
3,4.0,-2.366763,30.521692,-0.43596,2.0
4,5.0,-2.171266,30.018541,-0.44948,2.0
