In [None]:
import os

import pandas as pd
from keras.preprocessing import image
from imagenet_utils import preprocess_input, decode_predictions
from keras.layers.convolutional import Convolution2D, AveragePooling2D
from keras.optimizers import SGD
from keras.layers.core import Activation, Flatten, Dense
from keras.layers import Dropout
from keras.models import Model, Sequential

### Prepare DHS wealth index survey file

#### Input:
- `<country>_cluster_wealth.csv`
- this file should already have been created by running `process_dhs.py`

#### Output:
1. calculate the wealth category min and max to use as bucket boundaries
2. calculate per cluster mean wealth index
3. append cluster category to per cluster mean welath index
4. write file to disk: `<country>_cluster_wealth_grouped.csv`

In [41]:
country = 'burindi'

In [42]:
dhs_wealth_file = f'{country}_cluster_wealth.csv'

In [43]:
dhs_wealth_dir = os.path.join(os.path.expanduser('~'),
                              'workspace/dhs-landsat-poverty-prediction/data',
                              country,
                              'dhs')

In [44]:
dhs_df = dhs_df = pd.read_csv(os.path.join(dhs_wealth_dir, dhs_wealth_file))

#### 1. calculate the wealth category min and max to use as bucket boundaries

In [45]:
dhs_df[['wealth_category', 'wealth_index']].groupby('wealth_category').agg({'wealth_index': ['mean', 'min', 'max']})

Unnamed: 0_level_0,wealth_index,wealth_index,wealth_index
Unnamed: 0_level_1,mean,min,max
wealth_category,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
1,-0.801418,-1.38394,-0.66107
2,-0.562124,-0.66106,-0.45802
3,-0.35243,-0.458,-0.22984
4,-0.022235,-0.22979,0.29348
5,1.539051,0.29412,4.32413


In [46]:
wealth_index_buckets = [-1.38394, -0.66107, -0.45802, 0.22984, 0.29348, 4.32413]

#### 2. calculate per cluster mean wealth index

In [47]:
dhs_df_mean = dhs_df[['cluster_id', 'wealth_index']].groupby('cluster_id').mean().reset_index()

In [48]:
dhs_df_mean.head()

Unnamed: 0,cluster_id,wealth_index
0,1,-0.462795
1,2,-0.523744
2,3,0.390402
3,4,-0.670847
4,5,2.065443


#### 3. append cluster category to per cluster mean welath index

In [37]:
dhs_df_mean['wealth_category'] = (pd.cut(dhs_df_mean['wealth_index'], 
                                         bins=wealth_index_buckets,
                                         labels=['1', '2', '3', '4', '5']))

In [39]:
dhs_df_mean.to_csv(os.path.join(dhs_wealth_dir, f'{country}_cluster_wealth_grouped.csv'), index=False)

### Build a model

#### Input:


#### Output:

In [51]:
tif_dir = os.path.join(os.path.expanduser('~'), 
                       'workspace/dhs-landsat-poverty-prediction/data',
                       country,
                       'landsat_tif')

In [52]:
daylight_dir = os.path.join(os.path.expanduser('~'),
                            'workspace/dhs-landsat-poverty-prediction/data',
                            country,
                            'landsat_daylight')

In [53]:
model = Sequential()
model.add(Convolution2D(64, 6, 6, activation='relu', input_shape=(12, 12, 512), subsample=(6, 6), name='input'))
model.add(Dropout(0.5))
model.add(Convolution2D(64, 1, 1, activation='relu', subsample=(1, 1), name='conv_7'))
model.add(Dropout(0.5))
model.add(Convolution2D(64, 1, 1, subsample=(1, 1), name='conv_8'))
model.add(AveragePooling2D((2, 2), strides=(1, 1), name='add_pool'))

model.add(Flatten(name='flatten'))
model.add(Dense(3))
model.add(Activation("softmax"))

opt = SGD(lr=1e-2)
model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"])

NameError: name 'Sequential' is not defined