#Expenditure Type vs Access Type

###This entry checks the survey for rows where the expenditure type does not correspond to the anticipated access type.

##General Code

In [2]:
%matplotlib inline
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import pysentani
import scipy as sp

In [3]:
survey = pd.read_excel('../data-clean/sentani-merged-cleaned-2015-06-10.xlsx',na_values=[''])
print('number of entries =', len(survey))
print('number of columns =', len(survey.columns))

number of entries = 1184
number of columns = 274


In [4]:
survey['access_type'] = pysentani.access_type(survey)
pysentani.similar_columns(survey,'app_now')

app_now
app_now/lighting
app_now/TV
app_now/radio
app_now/fridge
app_now/fan
app_now/rice_cooker
app_now/other_cooking
app_now/welder
app_now/grinder
app_now/saw
app_now/other_tools
app_now/other


####Numbers confirmed on page nine of report

##Raw access and expenditure data

In [5]:
survey[['access_type','PLN_expenditure','community_microgrid_expenditure','genset_expenditure']].head()

Unnamed: 0,access_type,PLN_expenditure,community_microgrid_expenditure,genset_expenditure
0,no_access,,,50000.0
1,no_access,,,
2,no_access,,,
3,no_access,,,
4,no_access,,,20000.0


##No Access 

###Expected to correspond to genset_expenditure

In [6]:
no_access_mask = survey['access_type'] == 'no_access'
#access_survey = temp_access_survey[pd.notnull(temp_access_survey['community_microgrid_expenditure']) | (pd.notnull(temp_access_survey['PLN_expenditure']))
cp_exp_mask = (pd.notnull(survey['community_microgrid_expenditure']) |
            pd.notnull(survey['PLN_expenditure'])
            )

survey['sim_key'] = survey['simserial'].str[-5:]
survey[no_access_mask & cp_exp_mask][['village_name',
               'access_type',
               'PLN_expenditure',
               'community_microgrid_expenditure',
               'genset_expenditure',
               'demand_point',
               'sim_key']]

Unnamed: 0,village_name,access_type,PLN_expenditure,community_microgrid_expenditure,genset_expenditure,demand_point,sim_key
137,Yokiwa,no_access,,240000,,school,5028f
287,Puai,no_access,,500000,,school,5028f
288,Puai,no_access,,100000,,household,5028f
298,Puai,no_access,,60000,,household,5028f
306,Yokiwa,no_access,,300000,,household,5028f
364,Yokiwa,no_access,,100000,,household,5028f
368,Puai,no_access,,100000,,household,5028f
399,Yokiwa,no_access,,100000,,household,5028f
463,Puai,no_access,,20000,,household,5028f
507,Yokiwa,no_access,,100000,,household,5028f


##Community Microgrid Access

###Expected to correspond to community_microgrid_expenditure

In [7]:
community_access_mask = survey['access_type'] == 'community_microgrid'
#access_survey = temp_access_survey[pd.notnull(temp_access_survey['community_microgrid_expenditure']) | (pd.notnull(temp_access_survey['PLN_expenditure']))
gp_exp_mask = (pd.notnull(survey['genset_expenditure']) |
            pd.notnull(survey['PLN_expenditure'])
           )
survey[community_access_mask & gp_exp_mask][['village_name',
               'access_type',
               'PLN_expenditure',
               'community_microgrid_expenditure',
               'genset_expenditure',
               'demand_point',
               'sim_key']]

Unnamed: 0,village_name,access_type,PLN_expenditure,community_microgrid_expenditure,genset_expenditure,demand_point,sim_key
126,Kensio,community_microgrid,34000.0,,,household,5036f
152,Atamali,community_microgrid,,,150000.0,church,5028f


##PLN Grid Access

###Expected to correspond to PLN_expenditure and possibly genset_expenditure

(Thus this is the only category with no clearly anomalous data)

In [8]:
pln_access_mask = survey['access_type'] == 'PLN_grid'
#access_survey = temp_access_survey[pd.notnull(temp_access_survey['community_microgrid_expenditure']) | (pd.notnull(temp_access_survey['PLN_expenditure']))
cg_exp_mask = (pd.notnull(survey['community_microgrid_expenditure']) |
            pd.notnull(survey['genset_expenditure'])
           )
survey[pln_access_mask & cg_exp_mask][['village_name',
               'access_type',
               'PLN_expenditure',
               'community_microgrid_expenditure',
               'genset_expenditure',
               'demand_point',
               'sim_key']]

Unnamed: 0,village_name,access_type,PLN_expenditure,community_microgrid_expenditure,genset_expenditure,demand_point,sim_key
120,Hobong,PLN_grid,250000,,500000,church,5036f
865,Khamayakha,PLN_grid,720000,,50000,household,1217f
1120,Khamayakha,PLN_grid,4800000,,45000,other,4027f


In [13]:
total_exp_mask = (pd.notnull(survey['community_microgrid_expenditure']).astype(int) + 
                  pd.notnull(survey['genset_expenditure']).astype(int) + 
                  pd.notnull(survey['PLN_expenditure']).astype(int) >= 2)

survey[total_exp_mask][['village_name',
               'access_type',
               'PLN_expenditure',
               'community_microgrid_expenditure',
               'genset_expenditure',
               'demand_point',
               'sim_key']]

Unnamed: 0,village_name,access_type,PLN_expenditure,community_microgrid_expenditure,genset_expenditure,demand_point,sim_key
76,Ayapo,PLN_microgrid,,19000.0,20000,household,5076f
77,Ayapo,PLN_microgrid,,50000.0,20000,household,5076f
85,Ayapo,PLN_microgrid,,30000.0,20000,household,5076f
86,Ayapo,PLN_microgrid,,200000.0,50000,household,5076f
87,Ayapo,PLN_microgrid,,20000.0,50000,household,5076f
120,Hobong,PLN_grid,250000.0,,500000,church,5036f
865,Khamayakha,PLN_grid,720000.0,,50000,household,1217f
1120,Khamayakha,PLN_grid,4800000.0,,45000,other,4027f
