# Prepare CNN Data

## Setup

In [1]:
# USE ACCOUNT: robmarty3@gmail.com
import ee
#ee.Authenticate()
ee.Initialize()

In [8]:
import numpy as np
import os, datetime
import pandas as pd
import itertools
import tensorflow as tf
import config as cf
import ee_utils as utils
import eeconvert
import time
from datetime import datetime

#import geetools
#from geetools import ui, cloud_mask
#cloud_mask_landsatSR = cloud_mask.landsatSR()
#cloud_mask_sentinel2 = cloud_mask.sentinel2()

## Parameters

In [9]:
#224
SURVEY_NAME = 'DHS'
SATELLITE = 'landsat_7' # 's2'; 'landsat'; 'landsat_7'
OUTCOME_VAR = "ntlharmon" # "ntlharmon" OR "viirs"
SKIP_IF_SCRAPED = True
CHUNK_SIZE = 1 # Number of observtaions to scrape in GEE at any given time
DROPBOX_DIR = cf.DROPBOX_DIRECTORY
GOOGLEDRIVE_DIR = cf.GOOGLEDRIVE_DIRECTORY

if SATELLITE == 's2':
    KERNEL_SIZE = 224
elif SATELLITE == 'landsat':
    KERNEL_SIZE = 224 #167
elif SATELLITE == 'landsat_7':
    KERNEL_SIZE = 224 #167

print(KERNEL_SIZE)

224


In [10]:
# Directory to store tfrecords
out_path = os.path.join(GOOGLEDRIVE_DIR, 
            'Data', 
            SURVEY_NAME, 
            'FinalData',
            'Individual Datasets',
            'cnn_' + SATELLITE,
            'tfrecords')

out_path_errors = os.path.join(GOOGLEDRIVE_DIR, 
            'Data', 
            SURVEY_NAME, 
            'FinalData',
            'Individual Datasets',
            'cnn_' + SATELLITE)

## Load Data

In [11]:
# Relies on VIIRs Values
if OUTCOME_VAR == 'viirs':
    survey_df = pd.read_csv(os.path.join(DROPBOX_DIR, 'Data', SURVEY_NAME, 'FinalData', 'Individual Datasets', 'data_for_cnn.csv'))
elif OUTCOME_VAR == 'ntlharmon':
    survey_df = pd.read_csv(os.path.join(DROPBOX_DIR, 'Data', SURVEY_NAME, 'FinalData', 'Individual Datasets', 'data_for_cnn_ntlharmon.csv'))
    
print(survey_df.shape)
print(survey_df.ntl_group.value_counts())

(128177, 8)
1    57833
0    41292
2    29052
Name: ntl_group, dtype: int64


In [12]:
# Remove observations that had issues
if SATELLITE == 'landsat_7':
    survey_df = survey_df[survey_df['uid'] != 'BO200800002050']
    survey_df = survey_df[survey_df['uid'] != 'CO201000000592']

if SATELLITE == 's2':
    #survey_df = survey_df[survey_df['GID_2'] != "IND.18.1_1"]
    survey_df = survey_df[survey_df['uid'] != 'CO201000003682']
    survey_df = survey_df[survey_df['uid'] != 'CO201000003683']
    survey_df = survey_df[survey_df['uid'] != 'CO201000004885']
    survey_df = survey_df[survey_df['uid'] != 'IA201400180058']
    survey_df = survey_df[survey_df['uid'] != 'IA201400180040']
    survey_df = survey_df[survey_df['uid'] != 'IA201400300032']
    survey_df = survey_df[survey_df['uid'] != 'IA201400010147']
    survey_df = survey_df[survey_df['uid'] != 'NM201300000001']
    survey_df = survey_df[survey_df['uid'] != 'CO201000002924']
    survey_df = survey_df[survey_df['uid'] != 'CO201000002524']
    survey_df = survey_df[survey_df['uid'] != 'CO201000001170']
    survey_df = survey_df[survey_df['uid'] != 'CO201000003554']
    survey_df = survey_df[survey_df['uid'] != 'GA201200000310']
    survey_df = survey_df[survey_df['uid'] != 'IA201400180104'] 
    survey_df = survey_df[survey_df['uid'] != 'IA201400180012']
    survey_df = survey_df[survey_df['uid'] != 'IA201400180011']
    survey_df = survey_df[survey_df['uid'] != 'IA201400180048']
    survey_df = survey_df[survey_df['uid'] != 'IA201400300002']
    survey_df = survey_df[survey_df['uid'] != 'IA201400130192']
    survey_df = survey_df[survey_df['uid'] != 'IA201400140845']
    survey_df = survey_df[survey_df['uid'] != 'IA201400010076']
    survey_df = survey_df[survey_df['uid'] != 'IA201400180133']
    survey_df = survey_df[survey_df['uid'] != 'IA201400180123']
    survey_df = survey_df[survey_df['uid'] != 'IA201400180047']
    survey_df = survey_df[survey_df['uid'] != 'IA201400180138']
    survey_df = survey_df[survey_df['uid'] != 'IA201400180052']
    survey_df = survey_df[survey_df['uid'] != 'IA201400180055']
    survey_df = survey_df[survey_df['uid'] != 'IA201400180116']
    survey_df = survey_df[survey_df['uid'] != 'IA201400180030']
    survey_df = survey_df[survey_df['uid'] != 'IA201400180086']
    survey_df = survey_df[survey_df['uid'] != 'IA201400180050']
    survey_df = survey_df[survey_df['uid'] != 'IA201400180027']
    survey_df = survey_df[survey_df['uid'] != 'IA201400180072']
    survey_df = survey_df[survey_df['uid'] != 'IA201400180080']
    survey_df = survey_df[survey_df['uid'] != 'IA201400180079']
    survey_df = survey_df[survey_df['uid'] != 'IA201400180064']
    survey_df = survey_df[survey_df['uid'] != 'IA201400180081']
    survey_df = survey_df[survey_df['uid'] != 'IA201400180062']
    survey_df = survey_df[survey_df['uid'] != 'UG201800000335']
    survey_df = survey_df[survey_df['uid'] != 'UG201800000330']
    survey_df = survey_df[survey_df['uid'] != 'UG201800000322']
    survey_df = survey_df[survey_df['uid'] != 'IA201400180062']
    survey_df = survey_df[survey_df['uid'] != 'IA201400180140']
    survey_df = survey_df[survey_df['uid'] != 'IA201400180100']
    survey_df = survey_df[survey_df['uid'] != 'UG201800000342']
    survey_df = survey_df[survey_df['uid'] != 'IA201400180081']
    survey_df = survey_df[survey_df['uid'] != 'IA201400180028']
    survey_df = survey_df[survey_df['uid'] != 'UG201800000340']
    survey_df = survey_df[survey_df['uid'] != 'IA201400180091']
    survey_df = survey_df[survey_df['uid'] != 'IA201400180112']
    survey_df = survey_df[survey_df['uid'] != 'UG201800000331']
    survey_df = survey_df[survey_df['uid'] != 'UG201800000336']
    survey_df = survey_df[survey_df['uid'] != 'UG201800000334']
    survey_df = survey_df[survey_df['uid'] != 'IA201400180083']
    survey_df = survey_df[survey_df['uid'] != 'GY200900000293']
    survey_df = survey_df[survey_df['uid'] != 'UG201800000328']
    survey_df = survey_df[survey_df['uid'] != 'UG201800000327']
    survey_df = survey_df[survey_df['uid'] != 'UG201800000333']
    survey_df = survey_df[survey_df['uid'] != 'UG201800000332']
    survey_df = survey_df[survey_df['uid'] != 'UG201800000329']
    survey_df = survey_df[survey_df['uid'] != 'UG201800000339']
    survey_df = survey_df[survey_df['uid'] != 'UG201800000326']
    survey_df = survey_df[survey_df['uid'] != 'UG201800000323']
    survey_df = survey_df[survey_df['uid'] != 'UG201800000321']
    survey_df = survey_df[survey_df['uid'] != 'UG201800000338']
    survey_df = survey_df[survey_df['uid'] != 'UG201800000324']
    survey_df = survey_df[survey_df['uid'] != 'UG201800000325']
    survey_df = survey_df[survey_df['uid'] != 'UG201800000337']
    survey_df = survey_df[survey_df['uid'] != 'UG201800000341']

In [13]:
# List of TF Records
tf_record_list = list(np.unique(survey_df.tfrecord_name))

len(tf_record_list)

1104

In [14]:
# If skip already scraped, remove existing tfrecords from tf_record_list
if SKIP_IF_SCRAPED:
    tf_records_exist = os.listdir(out_path)
    tf_record_list = [x for x in tf_record_list if x not in tf_records_exist]
    
print(len(tf_record_list))

799


In [None]:
## Blank error dataframe
errors_df = pd.DataFrame()

## Error file name
now = datetime.now()
current_time = now.strftime("%d_%m_%y_%H_%M_%S")
error_file_name = 'errors_' + current_time + '.csv'

if True:
    ### Loop through all tfrecords
    for tfr_i in tf_record_list:

        # Sometimes we get computational time out errors. If occurs, just skip and go to next.
        # We can then go back and rescrape missed ones.

        survey_df_yeari = survey_df[survey_df['tfrecord_name'] == tfr_i]
        year_i = survey_df_yeari['year'].iloc[0]

        ### Loop through chunks within tfrecord (can only pull so much data from GEE at a time)
        survey_df_yeari['chunk_id'] = utils.chunk_ids(survey_df_yeari.shape[0], CHUNK_SIZE)

        print("Putting " + str(survey_df_yeari.shape[0]) + " observations into " + tfr_i)

        proto_examples_all = []
        for chunk_i in list(np.unique(survey_df_yeari.chunk_id)):
            
            try:
            
                time.sleep(3)
                print("Observation: " + str(len(proto_examples_all)) + "/" + str(survey_df_yeari.shape[0]))

                survey_df_yeari_chunki = survey_df_yeari[survey_df_yeari['chunk_id'] == chunk_i]

                proto_examples_i = utils.prep_cnn_np(survey_df_yeari_chunki, SATELLITE, KERNEL_SIZE, year_i)
                proto_examples_all.extend(proto_examples_i)
                
            except:
                
                print("Error ---")
                print(survey_df_yeari_chunki['uid'])
                
                errors_df = errors_df.append(survey_df_yeari_chunki[['uid']], ignore_index = True)
                errors_df.to_csv(os.path.join(out_path_errors, error_file_name))
                                 
                time.sleep(15)
                pass

        ### Save data as tf record
        out_path_i = os.path.join(out_path, tfr_i)
        print(out_path_i)
        with tf.io.TFRecordWriter(out_path_i) as writer:
            for tf_example in proto_examples_all:
                writer.write(tf_example.SerializeToString())

        print("Success \o/")



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_df_yeari['chunk_id'] = utils.chunk_ids(survey_df_yeari.shape[0], CHUNK_SIZE)


Putting 190 observations into forcnn_train_AL_1_1_all.tfrecord
Observation: 0/190


2022-06-03 09:35:23.777033: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Observation: 1/190
Observation: 2/190
Observation: 3/190
Observation: 4/190
Observation: 5/190
Observation: 6/190
Observation: 7/190
Observation: 8/190
Observation: 9/190
Observation: 10/190
Observation: 11/190
Observation: 12/190
Observation: 13/190
Observation: 14/190
Observation: 15/190
Observation: 16/190
Observation: 17/190
Observation: 18/190
Observation: 19/190
Observation: 20/190
Observation: 21/190
Observation: 22/190
Observation: 23/190
Observation: 24/190
Observation: 25/190
Observation: 26/190
Observation: 27/190
Observation: 28/190
Observation: 29/190
Observation: 30/190
Observation: 31/190
Observation: 32/190
Observation: 33/190
Observation: 34/190
Observation: 35/190
Observation: 36/190
Observation: 37/190
Observation: 38/190
Observation: 39/190
Observation: 40/190
Observation: 41/190
Observation: 42/190
Observation: 43/190
Observation: 44/190
Observation: 45/190
Observation: 46/190
Observation: 47/190
Observation: 48/190
Observation: 49/190
Observation: 50/190
Observati

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_df_yeari['chunk_id'] = utils.chunk_ids(survey_df_yeari.shape[0], CHUNK_SIZE)


Observation: 0/152
Observation: 1/152
Observation: 2/152
Observation: 3/152
Observation: 4/152
Observation: 5/152
Observation: 6/152
Observation: 7/152
Observation: 8/152
Observation: 9/152
Observation: 10/152
Observation: 11/152
Observation: 12/152
Observation: 13/152
Observation: 14/152
Observation: 15/152
Observation: 16/152
Observation: 17/152
Observation: 18/152
Observation: 19/152
Observation: 20/152
Observation: 21/152
Observation: 22/152
Observation: 23/152
Observation: 24/152
Observation: 25/152
Observation: 26/152
Observation: 27/152
Observation: 28/152
Observation: 29/152
Observation: 30/152
Observation: 31/152
Observation: 32/152
Observation: 33/152
Observation: 34/152
Observation: 35/152
Observation: 36/152
Observation: 37/152
Observation: 38/152
Observation: 39/152
Observation: 40/152
Observation: 41/152
Observation: 42/152
Observation: 43/152
Observation: 44/152
Observation: 45/152
Observation: 46/152
Observation: 47/152
Observation: 48/152
Observation: 49/152
Observatio

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_df_yeari['chunk_id'] = utils.chunk_ids(survey_df_yeari.shape[0], CHUNK_SIZE)


Observation: 0/70
Observation: 1/70
Observation: 2/70
Observation: 3/70
Observation: 4/70
Observation: 5/70
Observation: 6/70
Observation: 7/70
Observation: 8/70
Observation: 9/70
Observation: 10/70
Observation: 11/70
Observation: 12/70
Observation: 13/70
Observation: 14/70
Observation: 15/70
Observation: 16/70
Observation: 17/70
Observation: 18/70
Observation: 19/70
Observation: 20/70
Observation: 21/70
Observation: 22/70
Observation: 23/70
Observation: 24/70
Observation: 25/70
Observation: 26/70
Observation: 27/70
Observation: 28/70
Observation: 29/70
Observation: 30/70
Observation: 31/70
Observation: 32/70
Observation: 33/70
Observation: 34/70
Observation: 35/70
Observation: 36/70
Observation: 37/70
Observation: 38/70
Observation: 39/70
Observation: 40/70
Observation: 41/70
Observation: 42/70
Observation: 43/70
Observation: 44/70
Observation: 45/70
Observation: 46/70
Observation: 47/70
Observation: 48/70
Observation: 49/70
Observation: 50/70
Observation: 51/70
Observation: 52/70
Obs

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_df_yeari['chunk_id'] = utils.chunk_ids(survey_df_yeari.shape[0], CHUNK_SIZE)


Observation: 0/161
Observation: 1/161
Observation: 2/161
Observation: 3/161
Observation: 4/161
Observation: 5/161
Observation: 6/161
Observation: 7/161
Observation: 8/161
Observation: 9/161
Observation: 10/161
Observation: 11/161
Observation: 12/161
Observation: 13/161
Observation: 14/161
Observation: 15/161
Observation: 16/161
Observation: 17/161
Observation: 18/161
Observation: 19/161
Observation: 20/161
Observation: 21/161
Observation: 22/161
Observation: 23/161
Observation: 24/161
Observation: 25/161
Observation: 26/161
Observation: 27/161
Observation: 28/161
Observation: 29/161
Observation: 30/161
Observation: 31/161
Observation: 32/161
Observation: 33/161
Observation: 34/161
Observation: 35/161
Observation: 36/161
Observation: 37/161
Observation: 38/161
Observation: 39/161
Observation: 40/161
Observation: 41/161
Observation: 42/161
Observation: 43/161
Observation: 44/161
Observation: 45/161
Observation: 46/161
Observation: 47/161
Observation: 48/161
Observation: 49/161
Observatio

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_df_yeari['chunk_id'] = utils.chunk_ids(survey_df_yeari.shape[0], CHUNK_SIZE)


Observation: 0/68
Observation: 1/68
Observation: 2/68
Observation: 3/68
Observation: 4/68
Observation: 5/68
Observation: 6/68
Observation: 7/68
Observation: 8/68
Observation: 9/68
Observation: 10/68
Observation: 11/68
Observation: 12/68
Observation: 13/68
Observation: 14/68
Observation: 15/68
Observation: 16/68
Observation: 17/68
Observation: 18/68
Observation: 19/68
Observation: 20/68
Observation: 21/68
Observation: 22/68
Observation: 23/68
Observation: 24/68
Observation: 25/68
Observation: 26/68
Observation: 27/68
Observation: 28/68
Observation: 29/68
Observation: 30/68
Observation: 31/68
Observation: 32/68
Observation: 33/68
Observation: 34/68
Observation: 35/68
Observation: 36/68
Observation: 37/68
Observation: 38/68
Observation: 39/68
Observation: 40/68
Observation: 41/68
Observation: 42/68
Observation: 43/68
Observation: 44/68
Observation: 45/68
Observation: 46/68
Observation: 47/68
Observation: 48/68
Observation: 49/68
Observation: 50/68
Observation: 51/68
Observation: 52/68
Obs

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_df_yeari['chunk_id'] = utils.chunk_ids(survey_df_yeari.shape[0], CHUNK_SIZE)


Observation: 0/66
Observation: 1/66
Observation: 2/66
Observation: 3/66
Observation: 4/66
Observation: 5/66
Observation: 6/66
Observation: 7/66
Observation: 8/66
Observation: 9/66
Observation: 10/66
Observation: 11/66
Observation: 12/66
Observation: 13/66
Observation: 14/66
Observation: 15/66
Observation: 16/66
Observation: 17/66
Observation: 18/66
Observation: 19/66
Observation: 20/66
Observation: 21/66
Observation: 22/66
Observation: 23/66
Observation: 24/66
Observation: 25/66
Observation: 26/66
Observation: 27/66
Observation: 28/66
Observation: 29/66
Observation: 30/66
Observation: 31/66
Observation: 32/66
Observation: 33/66
Observation: 34/66
Observation: 35/66
Observation: 36/66
Observation: 37/66
Observation: 38/66
Observation: 39/66
Observation: 40/66
Observation: 41/66
Observation: 42/66
Observation: 43/66
Observation: 44/66
Observation: 45/66
Observation: 46/66
Observation: 47/66
Observation: 48/66
Observation: 49/66
Observation: 50/66
Observation: 51/66
Observation: 52/66
Obs

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_df_yeari['chunk_id'] = utils.chunk_ids(survey_df_yeari.shape[0], CHUNK_SIZE)


Observation: 0/52
Observation: 1/52
Observation: 2/52
Observation: 3/52
Observation: 4/52
Observation: 5/52
Observation: 6/52
Observation: 7/52
Observation: 8/52
Observation: 9/52
Observation: 10/52
Observation: 11/52
Observation: 12/52
Observation: 13/52
Observation: 14/52
Observation: 15/52
Observation: 16/52
Observation: 17/52
Observation: 18/52
Observation: 19/52
Observation: 20/52
Observation: 21/52
Observation: 22/52
Observation: 23/52
Observation: 24/52
Observation: 25/52
Observation: 26/52
Observation: 27/52
Observation: 28/52
Observation: 29/52
Observation: 30/52
Observation: 31/52
Observation: 32/52
Observation: 33/52
Observation: 34/52
Observation: 35/52
Observation: 36/52
Observation: 37/52
Observation: 38/52
Observation: 39/52
Observation: 40/52
Observation: 41/52
Observation: 42/52
Observation: 43/52
Observation: 44/52
Observation: 45/52
Observation: 46/52
Observation: 47/52
Observation: 48/52
Observation: 49/52
Observation: 50/52
Observation: 51/52
/Users/robmarty/Google

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_df_yeari['chunk_id'] = utils.chunk_ids(survey_df_yeari.shape[0], CHUNK_SIZE)


Observation: 0/98
Observation: 1/98
Observation: 2/98
Observation: 3/98
Observation: 4/98
Observation: 5/98
Observation: 6/98
Observation: 7/98
Observation: 8/98
Observation: 9/98
Observation: 10/98
Observation: 11/98
Observation: 12/98
Observation: 13/98
Observation: 14/98
Observation: 15/98
Observation: 16/98
Observation: 17/98
Observation: 18/98
Observation: 19/98
Observation: 20/98
Observation: 21/98
Observation: 22/98
Observation: 23/98
Observation: 24/98
Observation: 25/98
Observation: 26/98
Observation: 27/98
Observation: 28/98
Observation: 29/98
Observation: 30/98
Observation: 31/98
Observation: 32/98
Observation: 33/98
Observation: 34/98
Observation: 35/98
Observation: 36/98
Observation: 37/98
Observation: 38/98
Observation: 39/98
Observation: 40/98
Observation: 41/98
Observation: 42/98
Observation: 43/98
Observation: 44/98
Observation: 45/98
Observation: 46/98
Observation: 47/98
Observation: 48/98
Observation: 49/98
Observation: 50/98
Observation: 51/98
Observation: 52/98
Obs

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_df_yeari['chunk_id'] = utils.chunk_ids(survey_df_yeari.shape[0], CHUNK_SIZE)


Observation: 0/43
Observation: 1/43
Observation: 2/43
Observation: 3/43
Observation: 4/43
Observation: 5/43
Observation: 6/43
Observation: 7/43
Observation: 8/43
Observation: 9/43
Observation: 10/43
Observation: 11/43
Observation: 12/43
Observation: 13/43
Observation: 14/43
Observation: 15/43
Observation: 16/43
Observation: 17/43
Observation: 18/43
Observation: 19/43
Observation: 20/43
Observation: 21/43
Observation: 22/43
Observation: 23/43
Observation: 24/43
Observation: 25/43
Observation: 26/43
Observation: 27/43
Observation: 28/43
Observation: 29/43
Observation: 30/43
Observation: 31/43
Observation: 32/43
Observation: 33/43
Observation: 34/43
Observation: 35/43
Observation: 36/43
Observation: 37/43
Observation: 38/43
Observation: 39/43
Observation: 40/43
Observation: 41/43
Observation: 42/43
/Users/robmarty/Google Drive/World Bank/IEs/Pakistan Poverty Estimation/Data/DHS/FinalData/Individual Datasets/cnn_landsat_7/tfrecords/forcnn_train_AM_4_1_all.tfrecord
Success \o/
Putting 37 ob

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_df_yeari['chunk_id'] = utils.chunk_ids(survey_df_yeari.shape[0], CHUNK_SIZE)


Observation: 0/37
Observation: 1/37
Observation: 2/37
Observation: 3/37
Observation: 4/37
Observation: 5/37
Observation: 6/37
Observation: 7/37
Observation: 8/37
Observation: 9/37
Observation: 10/37
Observation: 11/37
Observation: 12/37
Observation: 13/37
Observation: 14/37
Observation: 15/37
Observation: 16/37
Observation: 17/37
Observation: 18/37
Observation: 19/37
Observation: 20/37
Observation: 21/37
Observation: 22/37
Observation: 23/37
Observation: 24/37
Observation: 25/37
Observation: 26/37
Observation: 27/37
Observation: 28/37
Observation: 29/37
Observation: 30/37
Observation: 31/37
Observation: 32/37
Observation: 33/37
Observation: 34/37
Observation: 35/37
Observation: 36/37
/Users/robmarty/Google Drive/World Bank/IEs/Pakistan Poverty Estimation/Data/DHS/FinalData/Individual Datasets/cnn_landsat_7/tfrecords/forcnn_train_AM_5_1_all.tfrecord
Success \o/
Putting 153 observations into forcnn_train_AO_1_1_all.tfrecord


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_df_yeari['chunk_id'] = utils.chunk_ids(survey_df_yeari.shape[0], CHUNK_SIZE)


Observation: 0/153
Observation: 1/153
Observation: 2/153
Observation: 3/153
Observation: 4/153
Observation: 5/153
Observation: 6/153
Observation: 7/153
Observation: 8/153
Observation: 9/153
Observation: 10/153
Observation: 11/153
Observation: 12/153
Observation: 13/153
Observation: 14/153
Observation: 15/153
Observation: 16/153
Observation: 17/153
Observation: 18/153
Observation: 19/153
Observation: 20/153
Observation: 21/153
Observation: 22/153
Observation: 23/153
Observation: 24/153
Observation: 25/153
Observation: 26/153
Observation: 27/153
Observation: 28/153
Observation: 29/153
Observation: 30/153
Observation: 31/153
Observation: 32/153
Observation: 33/153
Observation: 34/153
Observation: 35/153
Observation: 36/153
Observation: 37/153
Observation: 38/153
Observation: 39/153
Observation: 40/153
Observation: 41/153
Observation: 42/153
Observation: 43/153
Observation: 44/153
Observation: 45/153
Observation: 46/153
Observation: 47/153
Observation: 48/153
Observation: 49/153
Observatio

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_df_yeari['chunk_id'] = utils.chunk_ids(survey_df_yeari.shape[0], CHUNK_SIZE)


Observation: 0/128
Observation: 1/128
Observation: 2/128
Observation: 3/128
Observation: 4/128
Observation: 5/128
Observation: 6/128
Observation: 7/128
Observation: 8/128
Observation: 9/128
Observation: 10/128
Observation: 11/128
Observation: 12/128
Observation: 13/128
Observation: 14/128
Observation: 15/128
Observation: 16/128
Observation: 17/128
Observation: 18/128
Observation: 19/128
Observation: 20/128
Observation: 21/128
Observation: 22/128
Observation: 23/128
Observation: 24/128
Observation: 25/128
Observation: 26/128
Observation: 27/128
Observation: 28/128
Observation: 29/128
Observation: 30/128
Observation: 31/128
Observation: 32/128
Observation: 33/128
Observation: 34/128
Observation: 35/128
Observation: 36/128
Observation: 37/128
Observation: 38/128
Observation: 39/128
Observation: 40/128
Observation: 41/128
Observation: 42/128
Observation: 43/128
Observation: 44/128
Observation: 45/128
Observation: 46/128
Observation: 47/128
Observation: 48/128
Observation: 49/128
Observatio

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_df_yeari['chunk_id'] = utils.chunk_ids(survey_df_yeari.shape[0], CHUNK_SIZE)


Observation: 0/125
Observation: 1/125
Observation: 2/125
Observation: 3/125
Observation: 4/125
Observation: 5/125
Observation: 6/125
Observation: 7/125
Observation: 8/125
Observation: 9/125
Observation: 10/125
Observation: 11/125
Observation: 12/125
Observation: 13/125
Observation: 14/125
Observation: 15/125
Observation: 16/125
Observation: 17/125
Observation: 18/125
Observation: 19/125
Observation: 20/125
Observation: 21/125
Observation: 22/125
Observation: 23/125
Observation: 24/125
Observation: 25/125
Observation: 26/125
Observation: 27/125
Observation: 28/125
Observation: 29/125
Observation: 30/125
Observation: 31/125
Observation: 32/125
Observation: 33/125
Observation: 34/125
Observation: 35/125
Observation: 36/125
Observation: 37/125
Observation: 38/125
Observation: 39/125
Observation: 40/125
Observation: 41/125
Observation: 42/125
Observation: 43/125
Observation: 44/125
Observation: 45/125
Observation: 46/125
Observation: 47/125
Observation: 48/125
Observation: 49/125
Observatio

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_df_yeari['chunk_id'] = utils.chunk_ids(survey_df_yeari.shape[0], CHUNK_SIZE)


Observation: 0/114
Observation: 1/114
Observation: 2/114
Observation: 3/114
Observation: 4/114
Observation: 5/114
Observation: 6/114
Observation: 7/114
Observation: 8/114
Observation: 9/114
Observation: 10/114
Observation: 11/114
Observation: 12/114
Observation: 13/114
Observation: 14/114
Observation: 15/114
Observation: 16/114
Observation: 17/114
Observation: 18/114
Observation: 19/114
Observation: 20/114
Observation: 21/114
Observation: 22/114
Observation: 23/114
Observation: 24/114
Observation: 25/114
Observation: 26/114
Observation: 27/114
Observation: 28/114
Observation: 29/114
Observation: 30/114
Observation: 31/114
Observation: 32/114
Observation: 33/114
Observation: 34/114
Observation: 35/114
Observation: 36/114
Observation: 37/114
Observation: 38/114
Observation: 39/114
Observation: 40/114
Observation: 41/114
Observation: 42/114
Observation: 43/114
Observation: 44/114
Observation: 45/114
Observation: 46/114
Observation: 47/114
Observation: 48/114
Observation: 49/114
Observatio

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_df_yeari['chunk_id'] = utils.chunk_ids(survey_df_yeari.shape[0], CHUNK_SIZE)


Observation: 0/88
Observation: 1/88
Observation: 2/88
Observation: 3/88
Observation: 4/88
Observation: 5/88
Observation: 6/88
Observation: 7/88
Observation: 8/88
Observation: 9/88
Observation: 10/88
Observation: 11/88
Observation: 12/88
Observation: 13/88
Observation: 14/88
Observation: 15/88
Observation: 16/88
Observation: 17/88
Observation: 18/88
Observation: 19/88
Observation: 20/88
Observation: 21/88
Observation: 22/88
Observation: 23/88
Observation: 24/88
Observation: 25/88
Observation: 26/88
Observation: 27/88
Observation: 28/88
Observation: 29/88
Observation: 30/88
Observation: 31/88
Observation: 32/88
Observation: 33/88
Observation: 34/88
Observation: 35/88
Observation: 36/88
Observation: 37/88
Observation: 38/88
Observation: 39/88
Observation: 40/88
Observation: 41/88
Observation: 42/88
Observation: 43/88
Observation: 44/88
Observation: 45/88
Observation: 46/88
Observation: 47/88
Observation: 48/88
Observation: 49/88
Observation: 50/88
Observation: 51/88
Observation: 52/88
Obs

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_df_yeari['chunk_id'] = utils.chunk_ids(survey_df_yeari.shape[0], CHUNK_SIZE)


Observation: 0/223
Observation: 1/223
Observation: 2/223
Observation: 3/223
Observation: 4/223
Observation: 5/223
Observation: 6/223
Observation: 7/223
Observation: 8/223
Observation: 9/223
Observation: 10/223
Observation: 11/223
Observation: 12/223
Observation: 13/223
Observation: 14/223
Observation: 15/223
Observation: 16/223
Observation: 17/223
Observation: 18/223
Observation: 19/223
Observation: 20/223
Observation: 21/223
Observation: 22/223
Observation: 23/223
Observation: 24/223
Observation: 25/223
Observation: 26/223
Observation: 27/223
Observation: 28/223
Observation: 29/223
Observation: 30/223
Observation: 31/223
Observation: 32/223
Observation: 33/223
Observation: 34/223
Observation: 35/223
Observation: 36/223
Observation: 37/223
Observation: 38/223
Observation: 39/223
Observation: 40/223
Observation: 41/223
Observation: 42/223
Observation: 43/223
Observation: 44/223
Observation: 45/223
Observation: 46/223
Observation: 47/223
Observation: 48/223
Observation: 49/223
Observatio

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_df_yeari['chunk_id'] = utils.chunk_ids(survey_df_yeari.shape[0], CHUNK_SIZE)


Observation: 0/250
Observation: 1/250
Observation: 2/250
Observation: 3/250
Observation: 4/250
Observation: 5/250
Observation: 6/250
Observation: 7/250
Observation: 8/250
Observation: 9/250
Observation: 10/250
Observation: 11/250
Observation: 12/250
Observation: 13/250
Observation: 14/250
Observation: 15/250
Observation: 16/250
Observation: 17/250
Observation: 18/250
Observation: 19/250
Observation: 20/250
Observation: 21/250
Observation: 22/250
Observation: 23/250
Observation: 24/250
Observation: 25/250
Observation: 26/250
Observation: 27/250
Observation: 28/250
Observation: 29/250
Observation: 30/250
Observation: 31/250
Observation: 32/250
Observation: 33/250
Observation: 34/250
Observation: 35/250
Observation: 36/250
Observation: 37/250
Observation: 38/250
Observation: 39/250
Observation: 40/250
Observation: 41/250
Observation: 42/250
Observation: 43/250
Observation: 44/250
Observation: 45/250
Observation: 46/250
Observation: 47/250
Observation: 48/250
Observation: 49/250
Observatio

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_df_yeari['chunk_id'] = utils.chunk_ids(survey_df_yeari.shape[0], CHUNK_SIZE)


Observation: 0/19
Observation: 1/19
Observation: 2/19
Observation: 3/19
Observation: 4/19
Observation: 5/19
Observation: 6/19
Observation: 7/19
Observation: 8/19
Observation: 9/19
Observation: 10/19
Observation: 11/19
Observation: 12/19
Observation: 13/19
Observation: 14/19
Observation: 15/19
Observation: 16/19
Observation: 17/19
Observation: 18/19
/Users/robmarty/Google Drive/World Bank/IEs/Pakistan Poverty Estimation/Data/DHS/FinalData/Individual Datasets/cnn_landsat_7/tfrecords/forcnn_train_BD_2_2_all.tfrecord
Success \o/
Putting 194 observations into forcnn_train_BD_3_1_all.tfrecord


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_df_yeari['chunk_id'] = utils.chunk_ids(survey_df_yeari.shape[0], CHUNK_SIZE)


Observation: 0/194
Observation: 1/194
Observation: 2/194
Observation: 3/194
Observation: 4/194
Observation: 5/194
Observation: 6/194
Observation: 7/194
Observation: 8/194
Observation: 9/194
Observation: 10/194
Observation: 11/194
Observation: 12/194
Observation: 13/194
Observation: 14/194
Observation: 15/194
Observation: 16/194
Observation: 17/194
Observation: 18/194
Observation: 19/194
Observation: 20/194
Observation: 21/194
Observation: 22/194
Observation: 23/194
Observation: 24/194
Observation: 25/194
Observation: 26/194
Observation: 27/194
Observation: 28/194
Observation: 29/194
Observation: 30/194
Observation: 31/194
Observation: 32/194
Observation: 33/194
Observation: 34/194
Observation: 35/194
Observation: 36/194
Observation: 37/194
Observation: 38/194
Observation: 39/194
Observation: 40/194
Observation: 41/194
Observation: 42/194
Observation: 43/194
Observation: 44/194
Observation: 45/194
Observation: 46/194
Observation: 47/194
Observation: 48/194
Observation: 49/194
Observatio

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_df_yeari['chunk_id'] = utils.chunk_ids(survey_df_yeari.shape[0], CHUNK_SIZE)


Observation: 0/250
Observation: 1/250
Observation: 2/250
Observation: 3/250
Observation: 4/250
Observation: 5/250
Observation: 6/250
Observation: 7/250
Observation: 8/250
Observation: 9/250
Observation: 10/250
Observation: 11/250
Observation: 12/250
Observation: 13/250
Observation: 14/250
Observation: 15/250
Observation: 16/250
Observation: 17/250
Observation: 18/250
Observation: 19/250
Observation: 20/250
Observation: 21/250
Observation: 22/250
Observation: 23/250
Observation: 24/250
Observation: 25/250
Observation: 26/250
Observation: 27/250
Observation: 28/250
Observation: 29/250
Observation: 30/250
Observation: 31/250
Observation: 32/250
Observation: 33/250
Observation: 34/250
Observation: 35/250
Observation: 36/250
Observation: 37/250
Observation: 38/250
Observation: 39/250
Observation: 40/250
Observation: 41/250
Observation: 42/250
Observation: 43/250
Observation: 44/250
Observation: 45/250
Observation: 46/250
Observation: 47/250
Observation: 48/250
Observation: 49/250
Observatio

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_df_yeari['chunk_id'] = utils.chunk_ids(survey_df_yeari.shape[0], CHUNK_SIZE)


Observation: 0/37
Observation: 1/37
Observation: 2/37
Observation: 3/37
Observation: 4/37
Observation: 5/37
Observation: 6/37
Observation: 7/37
Observation: 8/37
Observation: 9/37
Observation: 10/37
Observation: 11/37
Observation: 12/37
Observation: 13/37
Observation: 14/37
Observation: 15/37
Observation: 16/37
Observation: 17/37
Observation: 18/37
Observation: 19/37
Observation: 20/37
Observation: 21/37
Observation: 22/37
Observation: 23/37
Observation: 24/37
Observation: 25/37
Observation: 26/37
Observation: 27/37
Observation: 28/37
Observation: 29/37
Observation: 30/37
Observation: 31/37
Observation: 32/37
Observation: 33/37
Observation: 34/37
Observation: 35/37
Observation: 36/37
/Users/robmarty/Google Drive/World Bank/IEs/Pakistan Poverty Estimation/Data/DHS/FinalData/Individual Datasets/cnn_landsat_7/tfrecords/forcnn_train_BD_4_2_all.tfrecord
Success \o/
Putting 250 observations into forcnn_train_BD_5_1_all.tfrecord


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_df_yeari['chunk_id'] = utils.chunk_ids(survey_df_yeari.shape[0], CHUNK_SIZE)


Observation: 0/250
Observation: 1/250
Observation: 2/250
Observation: 3/250
Observation: 4/250
Observation: 5/250
Observation: 6/250
Observation: 7/250
Observation: 8/250
Observation: 9/250
Observation: 10/250
Observation: 11/250
Observation: 12/250
Observation: 13/250
Observation: 14/250
Observation: 15/250
Observation: 16/250
Observation: 17/250
Observation: 18/250
Observation: 19/250
Observation: 20/250
Observation: 21/250
Observation: 22/250
Observation: 23/250
Observation: 24/250
Observation: 25/250
Observation: 26/250
Observation: 27/250
Observation: 28/250
Observation: 29/250
Observation: 30/250
Observation: 31/250
Observation: 32/250
Observation: 33/250
Observation: 34/250
Observation: 35/250
Observation: 36/250
Observation: 37/250
Observation: 38/250
Observation: 39/250
Observation: 40/250
Observation: 41/250
Observation: 42/250
Observation: 43/250
Observation: 44/250
Observation: 45/250
Observation: 46/250
Observation: 47/250
Observation: 48/250
Observation: 49/250
Observatio

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_df_yeari['chunk_id'] = utils.chunk_ids(survey_df_yeari.shape[0], CHUNK_SIZE)


Observation: 0/228
Observation: 1/228
Observation: 2/228
Observation: 3/228
Observation: 4/228
Observation: 5/228
Observation: 6/228
Observation: 7/228
Observation: 8/228
Observation: 9/228
Observation: 10/228
Observation: 11/228
Observation: 12/228
Observation: 13/228
Observation: 14/228
Observation: 15/228
Observation: 16/228
Observation: 17/228
Observation: 18/228
Observation: 19/228
Observation: 20/228
Observation: 21/228
Observation: 22/228
Observation: 23/228
Observation: 24/228
Observation: 25/228
Observation: 26/228
Observation: 27/228
Observation: 28/228
Observation: 29/228
Observation: 30/228
Observation: 31/228
Observation: 32/228
Observation: 33/228
Observation: 34/228
Observation: 35/228
Observation: 36/228
Observation: 37/228
Observation: 38/228
Observation: 39/228
Observation: 40/228
Observation: 41/228
Observation: 42/228
Observation: 43/228
Observation: 44/228
Observation: 45/228
Observation: 46/228
Observation: 47/228
Observation: 48/228
Observation: 49/228
Observatio

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_df_yeari['chunk_id'] = utils.chunk_ids(survey_df_yeari.shape[0], CHUNK_SIZE)


Observation: 0/147
Observation: 1/147
Observation: 2/147
Observation: 3/147
Observation: 4/147
Observation: 5/147
Observation: 6/147
Observation: 7/147
Observation: 8/147
Observation: 9/147
Observation: 10/147
Observation: 11/147
Observation: 12/147
Observation: 13/147
Observation: 14/147
Observation: 15/147
Observation: 16/147
Observation: 17/147
Observation: 18/147
Observation: 19/147
Observation: 20/147
Observation: 21/147
Observation: 22/147
Observation: 23/147
Observation: 24/147
Observation: 25/147
Observation: 26/147
Observation: 27/147
Observation: 28/147
Observation: 29/147
Observation: 30/147
Observation: 31/147
Observation: 32/147
Observation: 33/147
Observation: 34/147
Observation: 35/147
Observation: 36/147
Observation: 37/147
Observation: 38/147
Observation: 39/147
Observation: 40/147
Observation: 41/147
Observation: 42/147
Observation: 43/147
Observation: 44/147
Observation: 45/147
Observation: 46/147
Observation: 47/147
Observation: 48/147
Observation: 49/147
Observatio

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_df_yeari['chunk_id'] = utils.chunk_ids(survey_df_yeari.shape[0], CHUNK_SIZE)


Observation: 0/230
Observation: 1/230
Observation: 2/230
Observation: 3/230
Observation: 4/230
Observation: 5/230
Observation: 6/230
Observation: 7/230
Observation: 8/230
Observation: 9/230
Observation: 10/230
Observation: 11/230
Observation: 12/230
Observation: 13/230
Observation: 14/230
Observation: 15/230
Observation: 16/230
Observation: 17/230
Observation: 18/230
Observation: 19/230
Observation: 20/230
Observation: 21/230
Observation: 22/230
Observation: 23/230
Observation: 24/230
Observation: 25/230
Observation: 26/230
Observation: 27/230
Observation: 28/230
Observation: 29/230
Observation: 30/230
Observation: 31/230
Observation: 32/230
Observation: 33/230
Observation: 34/230
Observation: 35/230
Observation: 36/230
Observation: 37/230
Observation: 38/230
Observation: 39/230
Observation: 40/230
Observation: 41/230
Observation: 42/230
Observation: 43/230
Observation: 44/230
Observation: 45/230
Observation: 46/230
Observation: 47/230
Observation: 48/230
Observation: 49/230
Observatio

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_df_yeari['chunk_id'] = utils.chunk_ids(survey_df_yeari.shape[0], CHUNK_SIZE)


Observation: 0/133
Observation: 1/133
Observation: 2/133
Observation: 3/133
Observation: 4/133
Observation: 5/133
Observation: 6/133
Observation: 7/133
Observation: 8/133
Observation: 9/133
Observation: 10/133
Observation: 11/133
Observation: 12/133
Observation: 13/133
Observation: 14/133
Observation: 15/133
Observation: 16/133
Observation: 17/133
Observation: 18/133
Observation: 19/133
Observation: 20/133
Observation: 21/133
Observation: 22/133
Observation: 23/133
Observation: 24/133
Observation: 25/133
Observation: 26/133
Observation: 27/133
Observation: 28/133
Observation: 29/133
Observation: 30/133
Observation: 31/133
Observation: 32/133
Observation: 33/133
Observation: 34/133
Observation: 35/133
Observation: 36/133
Observation: 37/133
Observation: 38/133
Observation: 39/133
Observation: 40/133
Observation: 41/133
Observation: 42/133
Observation: 43/133
Observation: 44/133
Observation: 45/133
Observation: 46/133
Observation: 47/133
Observation: 48/133
Observation: 49/133
Observatio

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_df_yeari['chunk_id'] = utils.chunk_ids(survey_df_yeari.shape[0], CHUNK_SIZE)


Observation: 0/156
Observation: 1/156
Observation: 2/156
Observation: 3/156
Observation: 4/156
Observation: 5/156
Observation: 6/156
Observation: 7/156
Observation: 8/156
Observation: 9/156
Observation: 10/156
Observation: 11/156
Observation: 12/156
Observation: 13/156
Observation: 14/156
Observation: 15/156
Observation: 16/156
Observation: 17/156
Observation: 18/156
Observation: 19/156
Observation: 20/156
Observation: 21/156
Observation: 22/156
Observation: 23/156
Observation: 24/156
Observation: 25/156
Observation: 26/156
Observation: 27/156
Observation: 28/156
Observation: 29/156
Observation: 30/156
Observation: 31/156
Observation: 32/156
Observation: 33/156
Observation: 34/156
Observation: 35/156
Observation: 36/156
Observation: 37/156
Observation: 38/156
Observation: 39/156
Observation: 40/156
Observation: 41/156
Observation: 42/156
Observation: 43/156
Observation: 44/156
Observation: 45/156
Observation: 46/156
Observation: 47/156
Observation: 48/156
Observation: 49/156
Observatio

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_df_yeari['chunk_id'] = utils.chunk_ids(survey_df_yeari.shape[0], CHUNK_SIZE)


Observation: 0/116
Observation: 1/116
Observation: 2/116
Observation: 3/116
Observation: 4/116
Observation: 5/116
Observation: 6/116
Observation: 7/116
Observation: 8/116
Observation: 9/116
Observation: 10/116
Observation: 11/116
Observation: 12/116
Observation: 13/116
Observation: 14/116
Observation: 15/116
Observation: 16/116
Observation: 17/116
Observation: 18/116
Observation: 19/116
Observation: 20/116
Observation: 21/116
Observation: 22/116
Observation: 23/116
Observation: 24/116
Observation: 25/116
Observation: 26/116
Observation: 27/116
Observation: 28/116
Observation: 29/116
Observation: 30/116
Observation: 31/116
Observation: 32/116
Observation: 33/116
Observation: 34/116
Observation: 35/116
Observation: 36/116
Observation: 37/116
Observation: 38/116
Observation: 39/116
Observation: 40/116
Observation: 41/116
Observation: 42/116
Observation: 43/116
Observation: 44/116
Observation: 45/116
Observation: 46/116
Observation: 47/116
Observation: 48/116
Observation: 49/116
Observatio

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_df_yeari['chunk_id'] = utils.chunk_ids(survey_df_yeari.shape[0], CHUNK_SIZE)


Observation: 0/150
Observation: 1/150
Observation: 2/150
Observation: 3/150
Observation: 4/150
Observation: 5/150
Observation: 6/150
Observation: 7/150
Observation: 8/150
Observation: 9/150
Observation: 10/150
Observation: 11/150
Observation: 12/150
Observation: 13/150
Observation: 14/150
Observation: 15/150
Observation: 16/150
Observation: 17/150
Observation: 18/150
Observation: 19/150
Observation: 20/150
Observation: 21/150
Observation: 22/150
Observation: 23/150
Observation: 24/150
Observation: 25/150
Observation: 26/150
Observation: 27/150
Observation: 28/150
Observation: 29/150
Observation: 30/150
Observation: 31/150
Observation: 32/150
Observation: 33/150
Observation: 34/150
Observation: 35/150
Observation: 36/150
Observation: 37/150
Observation: 38/150
Observation: 39/150
Observation: 40/150
Observation: 41/150
Observation: 42/150
Observation: 43/150
Observation: 44/150
Observation: 45/150
Observation: 46/150
Observation: 47/150
Observation: 48/150
Observation: 49/150
Observatio

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_df_yeari['chunk_id'] = utils.chunk_ids(survey_df_yeari.shape[0], CHUNK_SIZE)


Observation: 0/113
Observation: 1/113
Observation: 2/113
Observation: 3/113
Observation: 4/113
Observation: 5/113
Observation: 6/113
Observation: 7/113
Observation: 8/113
Observation: 9/113
Observation: 10/113
Observation: 11/113
Observation: 12/113
Observation: 13/113
Observation: 14/113
Observation: 15/113
Observation: 16/113
Observation: 17/113
Observation: 18/113
Observation: 19/113
Observation: 20/113
Observation: 21/113
Observation: 22/113
Observation: 23/113
Observation: 24/113
Observation: 25/113
Observation: 26/113
Observation: 27/113
Observation: 28/113
Observation: 29/113
Observation: 30/113
Observation: 31/113
Observation: 32/113
Observation: 33/113
Observation: 34/113
Observation: 35/113
Observation: 36/113
Observation: 37/113
Observation: 38/113
Observation: 39/113
Observation: 40/113
Observation: 41/113
Observation: 42/113
Observation: 43/113
Observation: 44/113
Observation: 45/113
Observation: 46/113
Observation: 47/113
Observation: 48/113
Observation: 49/113
Observatio

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_df_yeari['chunk_id'] = utils.chunk_ids(survey_df_yeari.shape[0], CHUNK_SIZE)


Putting 250 observations into forcnn_train_BJ_3_1_all.tfrecord
Observation: 0/250
Observation: 1/250
Observation: 2/250
Observation: 3/250
Observation: 4/250
Observation: 5/250
Observation: 6/250
Observation: 7/250
Observation: 8/250
Observation: 9/250
Observation: 10/250
Observation: 11/250
Observation: 12/250
Observation: 13/250
Observation: 14/250
Observation: 15/250
Observation: 16/250
Observation: 17/250
Observation: 18/250
Observation: 19/250
Observation: 20/250
Observation: 21/250
Observation: 22/250
Observation: 23/250
Observation: 24/250
Observation: 25/250
Observation: 26/250
Observation: 27/250
Observation: 28/250
Observation: 29/250
Observation: 30/250
Observation: 31/250
Observation: 32/250
Observation: 33/250
Observation: 34/250
Observation: 35/250
Observation: 36/250
Observation: 37/250
Observation: 38/250
Observation: 39/250
Observation: 40/250
Observation: 41/250
Observation: 42/250
Observation: 43/250
Observation: 44/250
Observation: 45/250
Observation: 46/250
Observa

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_df_yeari['chunk_id'] = utils.chunk_ids(survey_df_yeari.shape[0], CHUNK_SIZE)


Observation: 0/3
Observation: 1/3
Observation: 2/3
/Users/robmarty/Google Drive/World Bank/IEs/Pakistan Poverty Estimation/Data/DHS/FinalData/Individual Datasets/cnn_landsat_7/tfrecords/forcnn_train_BJ_3_2_all.tfrecord
Success \o/
Putting 124 observations into forcnn_train_BJ_4_1_all.tfrecord


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_df_yeari['chunk_id'] = utils.chunk_ids(survey_df_yeari.shape[0], CHUNK_SIZE)


Observation: 0/124
Observation: 1/124
Observation: 2/124
Observation: 3/124
Observation: 4/124
Observation: 5/124
Observation: 6/124
Observation: 7/124
Observation: 8/124
Observation: 9/124
Observation: 10/124
Observation: 11/124
Observation: 12/124
Observation: 13/124
Observation: 14/124
Observation: 15/124
Observation: 16/124
Observation: 17/124
Observation: 18/124
Observation: 19/124
Observation: 20/124
Observation: 21/124
Observation: 22/124
Observation: 23/124
Observation: 24/124
Observation: 25/124
Observation: 26/124
Observation: 27/124
Observation: 28/124
Observation: 29/124
Observation: 30/124
Observation: 31/124
Observation: 32/124
Observation: 33/124
Observation: 34/124
Observation: 35/124
Observation: 36/124
Observation: 37/124
Observation: 38/124
Observation: 39/124
Observation: 40/124
Observation: 41/124
Observation: 42/124
Observation: 43/124
Observation: 44/124
Observation: 45/124
Observation: 46/124
Observation: 47/124
Observation: 48/124
Observation: 49/124
Observatio

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_df_yeari['chunk_id'] = utils.chunk_ids(survey_df_yeari.shape[0], CHUNK_SIZE)


Observation: 0/175
Observation: 1/175
Observation: 2/175
Observation: 3/175
Observation: 4/175
Observation: 5/175
Observation: 6/175
Observation: 7/175
Observation: 8/175
Observation: 9/175
Observation: 10/175
Observation: 11/175
Observation: 12/175
Observation: 13/175
Observation: 14/175
Observation: 15/175
Observation: 16/175
Observation: 17/175
Observation: 18/175
Observation: 19/175
Observation: 20/175
Observation: 21/175
Observation: 22/175
Observation: 23/175
Observation: 24/175
Observation: 25/175
Observation: 26/175
Observation: 27/175
Observation: 28/175
Observation: 29/175
Observation: 30/175
Observation: 31/175
Observation: 32/175
Observation: 33/175
Observation: 34/175
Observation: 35/175
Observation: 36/175
Observation: 37/175
Observation: 38/175
Observation: 39/175
Observation: 40/175
Observation: 41/175
Observation: 42/175
Observation: 43/175
Observation: 44/175
Observation: 45/175
Observation: 46/175
Observation: 47/175
Observation: 48/175
Observation: 49/175
Observatio

In [7]:
if False:
    ### Loop through all tfrecords
    for tfr_i in tf_record_list:

        # Sometimes we get computational time out errors. If occurs, just skip and go to next.
        # We can then go back and rescrape missed ones.
        try:

            survey_df_yeari = survey_df[survey_df['tfrecord_name'] == tfr_i]
            year_i = survey_df_yeari['year'].iloc[0]

            ### Loop through chunks within tfrecord (can only pull so much data from GEE at a time)
            survey_df_yeari['chunk_id'] = utils.chunk_ids(survey_df_yeari.shape[0], CHUNK_SIZE)

            print("Putting " + str(survey_df_yeari.shape[0]) + " observations into " + tfr_i)

            proto_examples_all = []
            for chunk_i in list(np.unique(survey_df_yeari.chunk_id)):
                time.sleep(3)
                print("Observation: " + str(len(proto_examples_all)) + "/" + str(survey_df_yeari.shape[0]))

                survey_df_yeari_chunki = survey_df_yeari[survey_df_yeari['chunk_id'] == chunk_i]

                proto_examples_i = utils.prep_cnn_np(survey_df_yeari_chunki, SATELLITE, KERNEL_SIZE, year_i)
                proto_examples_all.extend(proto_examples_i)

            ### Save data as tf record
            out_path_i = os.path.join(out_path, tfr_i)
            print(out_path_i)
            with tf.io.TFRecordWriter(out_path_i) as writer:
                for tf_example in proto_examples_all:
                    writer.write(tf_example.SerializeToString())

            print("Success \o/")

        except:
            print("Error ---")
            print(survey_df_yeari_chunki['uid'])
            time.sleep(15)
            pass

## Troubleshooting

In [None]:
survey_df = survey_df_yeari_chunki
satellite_name = SATELLITE
kernel_size = KERNEL_SIZE
year = year_i

In [11]:
import ee
import numpy as np
import geetools
from geetools import ui, cloud_mask
import os, datetime
import pandas as pd
import itertools
import tensorflow as tf

cloud_mask_landsatSR = cloud_mask.landsatSR()
cloud_mask_sentinel2 = cloud_mask.sentinel2()

# tfrecord helper functions ----------------------------------------------------
# https://stackoverflow.com/questions/52324515/passing-multiple-inputs-to-keras-model-from-tf-dataset-api
# https://www.tensorflow.org/tutorials/load_data/tfrecord

def _bytes_feature(value):
    """Returns a bytes_list from a string / byte."""
    # If the value is an eager tensor BytesList won't unpack a string from an EagerTensor.
    if isinstance(value, type(tf.constant(0))):
        value = value.numpy() 
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def _float_feature(value):
    """Returns a float_list from a float / double."""
    return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))

def _int64_feature(value):
    """Returns an int64_list from a bool / enum / int / uint."""
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

def chunk_ids(total_length, chunk_size):
    n_numbers = np.ceil(total_length / chunk_size)
    n_numbers = int(n_numbers)
    
    chunk_ids = list(range(0,n_numbers)) * chunk_size
    chunk_ids.sort()
    chunk_ids = chunk_ids[:total_length]
    
    return chunk_ids

# Main Functions -----------------------------------------------------------------
def survey_to_fc(survey_df):
    '''
    Convert pandas dataframe of survey locations to a feature collection. 
    
    Inputs:
        survey_df: pandas dataframe of survey locations. Function assumes 
                   the dataframe contains (1) latitude, (2) longitude and
                   (3) uid variables. Assumes coordinates in WGS84.
    Returns:
        (feature collection)
    '''
    
    survey_fc_list = []
    
    n_rows = survey_df.shape[0]
    for i in range(0, n_rows):
        survey_df_i = survey_df.iloc[[i]]

        f_i = ee.Feature(ee.Geometry.Point([survey_df_i['longitude'].iloc[0], 
                                            survey_df_i['latitude'].iloc[0]]), 
                         {'uid': survey_df_i['uid'].iloc[0]})

        survey_fc_list.append(f_i)
        
    survey_fc = ee.FeatureCollection(survey_fc_list)
    
    return survey_fc

def normalized_diff(values1, values2):
    '''
    Normalized Difference Value

    Input:  values1, values2 (must be same dimensions)

    Output: np array
    '''

    return (values2 - values1)/(values2 + values1)

def ee_to_np_daytime(daytime_f, survey_df, n_rows, b_b, g_b, r_b): # nir_b, swir_b
    '''
    Transforms feature collection from neighborhood array to np array. Stacks bands
    so that they are: NTL, blue, green, red, NDVI, other single daytime bands

    Input:  
      f (features)
      n_rows (number of features)

    Output: np array
    '''
    
    example_proto_list = []

    for i in range(0, n_rows):
        survey_uid = survey_df['uid'].iloc[i]
        #folder_name = survey_df['tf_folder_name'].iloc[i]
        viirs_ntl_group = int(survey_df['ntl_group'].iloc[i])
        survey_year_i = int(survey_df['year'].iloc[i])
        uid_i = survey_df['uid'].iloc[i].encode()
        
        d_f_i = daytime_f[i]['properties']
        #n_f_i = ntl_f[i]['properties']

        # SAVE AS TFRECORD

        # Prep Files
        ### RGB
        brgb_l = [np.array(d_f_i[r_b]), np.array(d_f_i[g_b]), np.array(d_f_i[b_b])]
        brgb_np = np.stack(brgb_l, axis=-1)
        brgb_np = brgb_np.astype(np.uint16)
        brgb_np_tf = tf.io.encode_png(brgb_np, compression = 9)
        #brgb_np_tf = tf.io.serialize_tensor(brgb_np)
        
        ### NIR
        if False:
            bnir_np = d_f_i[nir_b]      
            bnir_np = np.expand_dims(bnir_np, axis=2) # original (224, 224), change to (224,224,1) -> so can stack
            bnir_np = bnir_np.astype(np.uint16)
            bnir_np_tf = tf.io.encode_png(bnir_np, compression = 9)
            #bndvi_np_tf = tf.io.serialize_tensor(bndvi_np)

        if True:
            # https://www.tensorflow.org/api_docs/python/tf/io/encode_png
            ### NDVI 
            bndvi_np = d_f_i['NDVI']      
            bndvi_np = np.expand_dims(bndvi_np, axis=2) # original (224, 224), change to (224,224,1) -> so can stack
            # Convert from -1 to 1 to 0 to 20000
            bndvi_np = bndvi_np + 1
            bndvi_np = bndvi_np * 10000
            bndvi_np = bndvi_np.astype(np.uint16)
            bndvi_np_tf = tf.io.encode_png(bndvi_np, compression = 9)
            #bndvi_np_tf = tf.io.serialize_tensor(bndvi_np)

            ### BU 
            bbu_np = d_f_i['BU']      
            bbu_np = np.expand_dims(bbu_np, axis=2) # original (224, 224), change to (224,224,1) -> so can stack
            # Convert from -1 to 1 to 0 to 20000
            bbu_np = bbu_np + 1
            bbu_np = bbu_np * 10000
            bbu_np = bbu_np.astype(np.uint16)
            bbu_np_tf = tf.io.encode_png(bbu_np, compression = 9)
            #bndvi_np_tf = tf.io.serialize_tensor(bndvi_np)

        ### NTL
        # Not uint16, so so serialize
        #bntl_np = np.array(n_f_i['avg_rad'])
        #bntl_np = np.expand_dims(bntl_np, axis=2)
        # Values to uint16
        #bntl_np = bntl_np + 2 # Can be negative
        #bntl_np = bntl_np * 100 # consider two decimal places before uint16 // could also to * 10 (second decimal may not matter)
        #bntl_np[bntl_np >= 65535] = 65535 # within range of uint16
        #bntl_np = bntl_np.astype(np.uint16)
        #bntl_np_tf = tf.io.encode_png(bntl_np, compression = 9)
        #bntl_np_tf = tf.io.serialize_tensor(bntl_np)

        ## Create dictionary
        feature = {
            'uid' : _bytes_feature(uid_i),
            'viirs_ntl_group' : _int64_feature(viirs_ntl_group),
            'year' : _int64_feature(survey_year_i),
            'b_rgb': _bytes_feature(brgb_np_tf),
            #'b_nir': _bytes_feature(bnir_np_tf)
            'b_ndvi': _bytes_feature(bndvi_np_tf),
            'b_bu': _bytes_feature(bbu_np_tf)
            }

        # Other MS Bands
        #b_other_list = []
        #for b_other_i in other_bs:
        #    bi_np = np.array(d_f_i[b_other_i])
        #    bi_np = np.expand_dims(bi_np, axis=2)
        #    #bi_np_tf = tf.io.serialize_tensor(bi_np)
        #    bi_np = bi_np.astype(np.uint16)
        #    bi_np_tf = tf.io.encode_png(bi_np, compression = 9)
        #    feature['b_' + b_other_i] = _bytes_feature(bi_np_tf)
  
        example_proto = tf.train.Example(features=tf.train.Features(feature=feature))

        example_proto_list.append(example_proto)

        #out_file_name = os.path.join(out_path, folder_name, survey_uid + '.tfrecord')
        #with tf.io.TFRecordWriter(out_file_name) as writer:
        #  writer.write(example_proto.SerializeToString())
        
    return example_proto_list

        #bndvi_np = np.expand_dims(bndvi_l, axis=2)
        #b_np = np.expand_dims(b_l, axis=2)
        #b_np = np.repeat(b_np, 3, -1)
        #np.save(os.path.join(out_path, band_i + "_" + survey_uid + '.npy'), b_np)
        #np.save(os.path.join(out_path, 'BRGB' + "_" + survey_uid + '.npy'), brgb_np)
        #bndvi_np = np.repeat(bndvi_np, 3, -1)
        #np.save(os.path.join(out_path, 'BNDVI' + "_" + survey_uid + '.npy'), bndvi_np)

        #for band_i in SINGLE_BANDS_ALL:
        #    
        #    b_l = np.array(f_i[band_i])
        #    b_np = np.expand_dims(b_l, axis=2)
        #    #b_np = np.repeat(b_np, 3, -1)
        #    np.save(os.path.join(out_path, band_i + "_" + survey_uid + '.npy'), b_np)

    
    #return "Done"

def prep_cnn_np(survey_df,
                satellite_name,
                kernel_size,
                year):
    '''
    Creates numpy arrays for CNN

    Input:  df - pandas dataframe
            lat_name - name of latitude variable in df
            lon_name - name of longitude variable in df
    Output: geopandas dataframe
    '''

    # Setup --------------------------------------------------------------------
    # Survey to FeatureCollection  
    survey_fc = survey_to_fc(survey_df)

    # Define kernel for neighborhood array
    list = ee.List.repeat(1, kernel_size)
    lists = ee.List.repeat(list, kernel_size)
    kernel = ee.Kernel.fixed(kernel_size, kernel_size, lists)
    
    # Define satellite
    if satellite_name == 's2':
        satellite = 's2'
    elif satellite_name == 'landsat':
        if year >= 2014:
            satellite = 'l8'
        else:
            satellite = 'l7'
            
    # Define scale
    if satellite in ['l7', 'l8']:
        SCALE = 30
    elif satellite in ['s2']: 
        SCALE = 10

    # Prep NTL -----------------------------------------------------------------
    
    # Year
    # VIIRS starts in 2012. At minimum, use 2013 to have year before and after
    #if False:
    #    if year <= 2013:
    #        year_use = 2013
    #    else:
    #        year_use = year

    #    year_plus = year_use + 1
    #    year_minus = year_use - 1

    #    year_minus_str = str(year_minus) + '-01-01'
    #    year_plus_str = str(year_plus) + '-12-31'

        # Reduce image collection
    #    ntl_image = ee.ImageCollection('NOAA/VIIRS/DNB/MONTHLY_V1/VCMCFG')\
    #        .filterDate(year_minus_str, year_plus_str)\
    #        .median()

        # Select Bands  
    #    ntl_image = ntl_image.select(['avg_rad'])

        # Image to neighborhood array
    #    ntl_arrays = ntl_image.neighborhoodToArray(kernel)

        # Extract values from GEE    
    #    ntl_values_ee = ntl_arrays.sample(
    #      region = survey_fc, 
    #      scale = SCALE,
    #      tileScale = 10 #8
    #    )

    #    ntl_dict_ee = ntl_values_ee.getInfo()

        # Convert values to numpy array
        #n_rows = survey_df.shape[0]
    #    ntl_f = ntl_dict_ee['features']    
        
    # l7 ----------------------------------------------------------------
    if satellite == "l7":
        
        # Bands
        b_b = 'B1'
        g_b = 'B2' 
        r_b = 'B3' 
        nir_b = 'B4'
        swir_b = 'B5'
        #other_bs = ['B5', 'B6', 'B7']
        
        #BANDS = single_bs.copy()
        BANDS = [b_b].copy()
        BANDS.append(g_b)
        BANDS.append(r_b)
        BANDS.append(nir_b)
        BANDS.append(swir_b)
        
        # Year
        # landsat 8 starts in May 1999; if year is less than
        # 2000, use 2000 as year (to ensure have year before and after)
        if year < 2000:
            year_use = 2000
        else:
            year_use = year

        # Year
        year_use = year
        
        year_plus = year_use + 1
        year_minus = year_use - 1
        
        year_minus_str = str(year_minus) + '-01-01'
        year_plus_str = str(year_plus) + '-12-31'
        
        image = ee.ImageCollection('LANDSAT/LE07/C01/T1_SR')\
            .filterDate(year_minus_str, year_plus_str)\
            .map(cloud_mask_landsatSR)\
            .median() #\
            #.multiply(0.0001)
    
    # l8 ----------------------------------------------------------------
    if satellite == "l8":
                
        # Bands
        # FOR COLLECTION 2
        #b_b = 'SR_B2'
        #g_b = 'SR_B3' 
        #r_b = 'SR_B4' 
        #nir_b = 'SR_B5'
        #other_bs = ['SR_B6', 'SR_B7', 'ST_B10']
        
        # FOR COLLECTION 1
        b_b = 'B2'
        g_b = 'B3' 
        r_b = 'B4' 
        nir_b = 'B5'
        swir_b = 'B6'
        #other_bs = ['B6', 'B7', 'B10']
        
        #BANDS = single_bs.copy()
        BANDS = [b_b].copy()
        BANDS.append(g_b)
        BANDS.append(r_b)
        BANDS.append(nir_b)
        BANDS.append(swir_b)

        # Year
        # landsat 8 starts in April 2013; if year is less than
        # 2014, use 2014 as year (to ensure have year before and after)
        if year < 2014:
            year_use = 2014
        else:
            year_use = year
                    
        year_plus = year_use + 1
        year_minus = year_use - 1
        
        year_minus_str = str(year_minus) + '-01-01'
        year_plus_str = str(year_plus) + '-12-31'
        
        #image = ee.ImageCollection('LANDSAT/LC08/C02/T1_L2')\
        #    .filterDate(year_minus_str, year_plus_str)\
        #    #.map(cloud_mask_landsatSR)\ #TODO cloud_mask_landsatSR doesn't work with landsat collection 2
        #    .median() #\
        #    #.multiply(0.0001)
        
        image = ee.ImageCollection('LANDSAT/LC08/C01/T1_SR')\
            .filterDate(year_minus_str, year_plus_str)\
            .map(cloud_mask_landsatSR)\
            .median()
        
        #image = ee.ImageCollection('LANDSAT/LC08/C02/T1_L2')\
        #    .filterDate(year_minus_str, year_plus_str)\
        #    .median()
            
    # s2 ----------------------------------------------------------------
    if satellite == "s2":
        
        # Bands
        b_b = 'B2'
        g_b = 'B3' 
        r_b = 'B4' 
        nir_b = 'B8'
        swir_b = 'B11'
        #other_bs = ['B5', 'B6', 'B7', 'B8A', 'B11', 'B12', 'AOT']
     
        #BANDS = single_bs.copy()
        BANDS = [b_b].copy()
        BANDS.append(g_b)
        BANDS.append(r_b)
        BANDS.append(nir_b)
        BANDS.append(swir_b)
        
        # Year
        # sentinel starts in March 2017; juse use 2018
        year_use = 2019
                    
        year_plus = year_use + 1
        year_minus = year_use - 1
        
        year_minus_str = str(year_minus) + '-01-01'
        year_plus_str = str(year_plus) + '-12-31'

        # Number of bands changes in sentinel, so need to select here before aggregate
        # https://gis.stackexchange.com/questions/374010/gee-tile-error-expected-a-homogeneous-image-collection-but-an-image-with-incom
        image = ee.ImageCollection('COPERNICUS/S2_SR')\
            .filterDate(year_minus_str, year_plus_str)\
            .map(cloud_mask_sentinel2)\
            .select(BANDS)\
            .median() # \
            #.multiply(0.0001)

    # Select Bands
    image = image.select(BANDS)
    
    # Create Indices
    # https://www.linkedin.com/pulse/ndvi-ndbi-ndwi-calculation-using-landsat-7-8-tek-bahadur-kshetri
    ndvi = image.normalizedDifference([nir_b, r_b]).rename('NDVI');
    ndbi = image.normalizedDifference([swir_b, nir_b]).rename('NDBI');
    image = image.addBands(ndvi)
    image = image.addBands(ndbi)
        
    bu = image.select('NDBI').subtract(image.select('NDVI')).rename('BU')
    image = image.addBands(bu)
        
    # Subset bands; don't need those used to create NDVI and NDBI
    image = image.select([b_b, g_b, r_b, 'NDVI', 'BU'])
        
    # Image to neighborhood array
    arrays = image.neighborhoodToArray(kernel)
    
    # New ---------
    #neighborhoodImage = myImageToBeSampled.neighborhoodToArray(kernel)
    #samples = arrays.sampleRegions(collection=survey_fc)
    
    # ee.batch.Export.table.toCloudStorage
    # ee.batch.Export.table.toDrive
    # Export.table.toDrive
    #mytask = ee.batch.Export.table.toDrive(
    # collection = samples,
    # fileFormat = 'TFRecord',
    # description = 'test123',
    # folder = 'gee_extracts',
    # selectors = [b_b, g_b, r_b, 'NDVI', 'BU'] + ['uid', 'ntl_group'])
    
    #return mytask

    # OLD ---------
    # Extract values from GEE   
    values_ee = arrays.sample(
      region = survey_fc, 
      scale = SCALE,
      tileScale = 12 # 8
    )
    
    dict_ee = values_ee.getInfo()
     
    # Convert values to numpy array
    n_rows = survey_df.shape[0]
    daytime_f = dict_ee['features']
    
    # Extract data
    out_ex_proto_list = ee_to_np_daytime(daytime_f, survey_df, n_rows, b_b, g_b, r_b)
    
    return out_ex_proto_list

# https://csaybar.github.io/blog/2019/05/30/eetf/
# https://stackoverflow.com/questions/63000565/extract-10000-images-from-google-earth-engine
# https://colab.research.google.com/github/google/earthengine-api/blob/master/python/examples/ipynb/UNET_regression_demo.ipynb#scrollTo=-IlgXu-vcUEY

In [12]:
# Setup --------------------------------------------------------------------
# Survey to FeatureCollection  
survey_fc = survey_to_fc(survey_df)

# Define kernel for neighborhood array
list = ee.List.repeat(1, kernel_size)
lists = ee.List.repeat(list, kernel_size)
kernel = ee.Kernel.fixed(kernel_size, kernel_size, lists)

# Define satellite
if satellite_name == 's2':
    satellite = 's2'
elif satellite_name == 'landsat':
    if year >= 2014:
        satellite = 'l8'
    else:
        satellite = 'l7'

# Define scale
if satellite in ['l7', 'l8']:
    SCALE = 30
elif satellite in ['s2']: 
    SCALE = 10
    
# l7 ----------------------------------------------------------------
if satellite == "l7":

    # Bands
    b_b = 'B1'
    g_b = 'B2' 
    r_b = 'B3' 
    nir_b = 'B4'
    swir_b = 'B5'
    #other_bs = ['B5', 'B6', 'B7']

    #BANDS = single_bs.copy()
    BANDS = [b_b].copy()
    BANDS.append(g_b)
    BANDS.append(r_b)
    BANDS.append(nir_b)
    BANDS.append(swir_b)

    # Year
    # landsat 8 starts in May 1999; if year is less than
    # 2000, use 2000 as year (to ensure have year before and after)
    if year < 2000:
        year_use = 2000
    else:
        year_use = year

    # Year
    year_use = year

    year_plus = year_use + 1
    year_minus = year_use - 1

    year_minus_str = str(year_minus) + '-01-01'
    year_plus_str = str(year_plus) + '-12-31'

    image = ee.ImageCollection('LANDSAT/LE07/C01/T1_SR')\
        .filterDate(year_minus_str, year_plus_str)\
        .map(cloud_mask_landsatSR)\
        .median() #\
        #.multiply(0.0001)

# l8 ----------------------------------------------------------------
if satellite == "l8":

    # Bands
    # FOR COLLECTION 2
    #b_b = 'SR_B2'
    #g_b = 'SR_B3' 
    #r_b = 'SR_B4' 
    #nir_b = 'SR_B5'
    #other_bs = ['SR_B6', 'SR_B7', 'ST_B10']

    # FOR COLLECTION 1
    b_b = 'B2'
    g_b = 'B3' 
    r_b = 'B4' 
    nir_b = 'B5'
    swir_b = 'B6'
    #other_bs = ['B6', 'B7', 'B10']

    #BANDS = single_bs.copy()
    BANDS = [b_b].copy()
    BANDS.append(g_b)
    BANDS.append(r_b)
    BANDS.append(nir_b)
    BANDS.append(swir_b)

    # Year
    # landsat 8 starts in April 2013; if year is less than
    # 2014, use 2014 as year (to ensure have year before and after)
    if year < 2014:
        year_use = 2014
    else:
        year_use = year

    year_plus = year_use + 1
    year_minus = year_use - 1

    year_minus_str = str(year_minus) + '-01-01'
    year_plus_str = str(year_plus) + '-12-31'

    #image = ee.ImageCollection('LANDSAT/LC08/C02/T1_L2')\
    #    .filterDate(year_minus_str, year_plus_str)\
    #    #.map(cloud_mask_landsatSR)\ #TODO cloud_mask_landsatSR doesn't work with landsat collection 2
    #    .median() #\
    #    #.multiply(0.0001)

    image = ee.ImageCollection('LANDSAT/LC08/C01/T1_SR')\
        .filterDate(year_minus_str, year_plus_str)\
        .map(cloud_mask_landsatSR)\
        .median()

    #image = ee.ImageCollection('LANDSAT/LC08/C02/T1_L2')\
    #    .filterDate(year_minus_str, year_plus_str)\
    #    .median()

# s2 ----------------------------------------------------------------
if satellite == "s2":

    # Bands
    b_b = 'B2'
    g_b = 'B3' 
    r_b = 'B4' 
    nir_b = 'B8'
    swir_b = 'B11'
    #other_bs = ['B5', 'B6', 'B7', 'B8A', 'B11', 'B12', 'AOT']

    #BANDS = single_bs.copy()
    BANDS = [b_b].copy()
    BANDS.append(g_b)
    BANDS.append(r_b)
    BANDS.append(nir_b)
    BANDS.append(swir_b)

    # Year
    # sentinel starts in March 2017; juse use 2018
    year_use = 2019

    year_plus = year_use + 1
    year_minus = year_use - 1

    year_minus_str = str(year_minus) + '-01-01'
    year_plus_str = str(year_plus) + '-12-31'

    # Number of bands changes in sentinel, so need to select here before aggregate
    # https://gis.stackexchange.com/questions/374010/gee-tile-error-expected-a-homogeneous-image-collection-but-an-image-with-incom
    image = ee.ImageCollection('COPERNICUS/S2_SR')\
        .filterDate(year_minus_str, year_plus_str)\
        .map(cloud_mask_sentinel2)\
        .select(BANDS)\
        .median() # \
        #.multiply(0.0001)

# Select Bands
image = image.select(BANDS)

# Create Indices
# https://www.linkedin.com/pulse/ndvi-ndbi-ndwi-calculation-using-landsat-7-8-tek-bahadur-kshetri
ndvi = image.normalizedDifference([nir_b, r_b]).rename('NDVI');
ndbi = image.normalizedDifference([swir_b, nir_b]).rename('NDBI');
image = image.addBands(ndvi)
image = image.addBands(ndbi)

bu = image.select('NDBI').subtract(image.select('NDVI')).rename('BU')
image = image.addBands(bu)

# Subset bands; don't need those used to create NDVI and NDBI
image = image.select([b_b, g_b, r_b, 'NDVI', 'BU'])

# Image to neighborhood array
arrays = image.neighborhoodToArray(kernel)

# New ---------
#neighborhoodImage = myImageToBeSampled.neighborhoodToArray(kernel)
#samples = arrays.sampleRegions(collection=survey_fc)

# ee.batch.Export.table.toCloudStorage
# ee.batch.Export.table.toDrive
# Export.table.toDrive
#mytask = ee.batch.Export.table.toDrive(
# collection = samples,
# fileFormat = 'TFRecord',
# description = 'test123',
# folder = 'gee_extracts',
# selectors = [b_b, g_b, r_b, 'NDVI', 'BU'] + ['uid', 'ntl_group'])

#return mytask

# OLD ---------
# Extract values from GEE   
values_ee = arrays.sample(
  region = survey_fc, 
  scale = SCALE,
  tileScale = 12 # 8
)

dict_ee = values_ee.getInfo()

# Convert values to numpy array
n_rows = survey_df.shape[0]
daytime_f = dict_ee['features']

# Extract data
out_ex_proto_list = ee_to_np_daytime(daytime_f, survey_df, n_rows, b_b, g_b, r_b)


AttributeError: module 'tensorflow_core._api.v2.io' has no attribute 'encode_png'

In [None]:
def prep_cnn_np(survey_df,
                satellite_name,
                kernel_size,
                year):
    '''
    Creates numpy arrays for CNN

    Input:  df - pandas dataframe
            lat_name - name of latitude variable in df
            lon_name - name of longitude variable in df
    Output: geopandas dataframe
    '''

    # Setup --------------------------------------------------------------------
    # Survey to FeatureCollection  
    survey_fc = survey_to_fc(survey_df)

    # Define kernel for neighborhood array
    list = ee.List.repeat(1, kernel_size)
    lists = ee.List.repeat(list, kernel_size)
    kernel = ee.Kernel.fixed(kernel_size, kernel_size, lists)
    
    # Define satellite
    if satellite_name == 's2':
        satellite = 's2'
    elif satellite_name == 'landsat':
        if year >= 2014:
            satellite = 'l8'
        else:
            satellite = 'l7'
            
    # Define scale
    if satellite in ['l7', 'l8']:
        SCALE = 30
    elif satellite in ['s2']: 
        SCALE = 10

    # Prep NTL -----------------------------------------------------------------
    
    # Year
    # VIIRS starts in 2012. At minimum, use 2013 to have year before and after
    #if False:
    #    if year <= 2013:
    #        year_use = 2013
    #    else:
    #        year_use = year

    #    year_plus = year_use + 1
    #    year_minus = year_use - 1

    #    year_minus_str = str(year_minus) + '-01-01'
    #    year_plus_str = str(year_plus) + '-12-31'

        # Reduce image collection
    #    ntl_image = ee.ImageCollection('NOAA/VIIRS/DNB/MONTHLY_V1/VCMCFG')\
    #        .filterDate(year_minus_str, year_plus_str)\
    #        .median()

        # Select Bands  
    #    ntl_image = ntl_image.select(['avg_rad'])

        # Image to neighborhood array
    #    ntl_arrays = ntl_image.neighborhoodToArray(kernel)

        # Extract values from GEE    
    #    ntl_values_ee = ntl_arrays.sample(
    #      region = survey_fc, 
    #      scale = SCALE,
    #      tileScale = 10 #8
    #    )

    #    ntl_dict_ee = ntl_values_ee.getInfo()

        # Convert values to numpy array
        #n_rows = survey_df.shape[0]
    #    ntl_f = ntl_dict_ee['features']    
        
    # l7 ----------------------------------------------------------------
    if satellite == "l7":
        
        # Bands
        b_b = 'B1'
        g_b = 'B2' 
        r_b = 'B3' 
        nir_b = 'B4'
        swir_b = 'B5'
        #other_bs = ['B5', 'B6', 'B7']
        
        #BANDS = single_bs.copy()
        BANDS = [b_b].copy()
        BANDS.append(g_b)
        BANDS.append(r_b)
        BANDS.append(nir_b)
        BANDS.append(swir_b)
        
        # Year
        # landsat 8 starts in May 1999; if year is less than
        # 2000, use 2000 as year (to ensure have year before and after)
        if year < 2000:
            year_use = 2000
        else:
            year_use = year

        # Year
        year_use = year
        
        year_plus = year_use + 1
        year_minus = year_use - 1
        
        year_minus_str = str(year_minus) + '-01-01'
        year_plus_str = str(year_plus) + '-12-31'
        
        image = ee.ImageCollection('LANDSAT/LC07/C01/T1_SR')\
            .filterDate(year_minus_str, year_plus_str)\
            .map(cloud_mask_landsatSR)\
            .median() #\
            #.multiply(0.0001)
    
    # l8 ----------------------------------------------------------------
    if satellite == "l8":
                
        # Bands
        # FOR COLLECTION 2
        #b_b = 'SR_B2'
        #g_b = 'SR_B3' 
        #r_b = 'SR_B4' 
        #nir_b = 'SR_B5'
        #other_bs = ['SR_B6', 'SR_B7', 'ST_B10']
        
        # FOR COLLECTION 1
        b_b = 'B2'
        g_b = 'B3' 
        r_b = 'B4' 
        nir_b = 'B5'
        swir_b = 'B6'
        #other_bs = ['B6', 'B7', 'B10']
        
        #BANDS = single_bs.copy()
        BANDS = [b_b].copy()
        BANDS.append(g_b)
        BANDS.append(r_b)
        BANDS.append(nir_b)
        BANDS.append(swir_b)

        # Year
        # landsat 8 starts in April 2013; if year is less than
        # 2014, use 2014 as year (to ensure have year before and after)
        if year < 2014:
            year_use = 2014
        else:
            year_use = year
                    
        year_plus = year_use + 1
        year_minus = year_use - 1
        
        year_minus_str = str(year_minus) + '-01-01'
        year_plus_str = str(year_plus) + '-12-31'
        
        #image = ee.ImageCollection('LANDSAT/LC08/C02/T1_L2')\
        #    .filterDate(year_minus_str, year_plus_str)\
        #    #.map(cloud_mask_landsatSR)\ #TODO cloud_mask_landsatSR doesn't work with landsat collection 2
        #    .median() #\
        #    #.multiply(0.0001)
        
        image = ee.ImageCollection('LANDSAT/LC08/C01/T1_SR')\
            .filterDate(year_minus_str, year_plus_str)\
            .map(cloud_mask_landsatSR)\
            .median()
        
        #image = ee.ImageCollection('LANDSAT/LC08/C02/T1_L2')\
        #    .filterDate(year_minus_str, year_plus_str)\
        #    .median()
            
    # s2 ----------------------------------------------------------------
    if satellite == "s2":
        
        # Bands
        b_b = 'B2'
        g_b = 'B3' 
        r_b = 'B4' 
        nir_b = 'B8'
        swir_b = 'B11'
        #other_bs = ['B5', 'B6', 'B7', 'B8A', 'B11', 'B12', 'AOT']
     
        #BANDS = single_bs.copy()
        BANDS = [b_b].copy()
        BANDS.append(g_b)
        BANDS.append(r_b)
        BANDS.append(nir_b)
        BANDS.append(swir_b)
        
        # Year
        # sentinel starts in March 2017; juse use 2018
        year_use = 2019
                    
        year_plus = year_use + 1
        year_minus = year_use - 1
        
        year_minus_str = str(year_minus) + '-01-01'
        year_plus_str = str(year_plus) + '-12-31'

        # Number of bands changes in sentinel, so need to select here before aggregate
        # https://gis.stackexchange.com/questions/374010/gee-tile-error-expected-a-homogeneous-image-collection-but-an-image-with-incom
        image = ee.ImageCollection('COPERNICUS/S2_SR')\
            .filterDate(year_minus_str, year_plus_str)\
            .map(cloud_mask_sentinel2)\
            .select(BANDS)\
            .median() # \
            #.multiply(0.0001)

    # Select Bands
    image = image.select(BANDS)
    
    # Create Indices
    # https://www.linkedin.com/pulse/ndvi-ndbi-ndwi-calculation-using-landsat-7-8-tek-bahadur-kshetri
    ndvi = image.normalizedDifference([nir_b, r_b]).rename('NDVI');
    ndbi = image.normalizedDifference([swir_b, nir_b]).rename('NDBI');
    image = image.addBands(ndvi)
    image = image.addBands(ndbi)
        
    bu = image.select('NDBI').subtract(image.select('NDVI')).rename('BU')
    image = image.addBands(bu)
        
    # Subset bands; don't need those used to create NDVI and NDBI
    image = image.select([b_b, g_b, r_b, 'NDVI', 'BU'])
        
    # Image to neighborhood array
    arrays = image.neighborhoodToArray(kernel)
    
    # New ---------
    #neighborhoodImage = myImageToBeSampled.neighborhoodToArray(kernel)
    #samples = arrays.sampleRegions(collection=survey_fc)
    
    # ee.batch.Export.table.toCloudStorage
    # ee.batch.Export.table.toDrive
    # Export.table.toDrive
    #mytask = ee.batch.Export.table.toDrive(
    # collection = samples,
    # fileFormat = 'TFRecord',
    # description = 'test123',
    # folder = 'gee_extracts',
    # selectors = [b_b, g_b, r_b, 'NDVI', 'BU'] + ['uid', 'ntl_group'])
    
    #return mytask

    # OLD ---------
    # Extract values from GEE   
    values_ee = arrays.sample(
      region = survey_fc, 
      scale = SCALE,
      tileScale = 12 # 8
    )
    
    dict_ee = values_ee.getInfo()
     
    # Convert values to numpy array
    n_rows = survey_df.shape[0]
    daytime_f = dict_ee['features']
    
    # Extract data
    out_ex_proto_list = ee_to_np_daytime(daytime_f, survey_df, n_rows, b_b, g_b, r_b)
    
    return out_ex_proto_list


In [13]:
bndvi_np_tf = tf.io.encode_png(bndvi_np, compression = 9)

AttributeError: module 'tensorflow' has no attribute 'encode_png'

In [14]:
tf.io.

NameError: name 'bndvi_np' is not defined

In [9]:
# DONT SKIP ERRORS
if True:
    ### Loop through all tfrecords
    for tfr_i in tf_record_list:

        # Sometimes we get computational time out errors. If occurs, just skip and go to next.
        # We can then go back and rescrape missed ones.

        survey_df_yeari = survey_df[survey_df['tfrecord_name'] == tfr_i]
        year_i = survey_df_yeari['year'].iloc[0]

        ### Loop through chunks within tfrecord (can only pull so much data from GEE at a time)
        survey_df_yeari['chunk_id'] = utils.chunk_ids(survey_df_yeari.shape[0], CHUNK_SIZE)

        print("Putting " + str(survey_df_yeari.shape[0]) + " observations into " + tfr_i)

        proto_examples_all = []
        for chunk_i in list(np.unique(survey_df_yeari.chunk_id)):
            #time.sleep(6)
            
            ## Sometimes we hit a memory error; try until we don't hit that
            
            # TODO: could say: try 3 times?
            try_extract_data = True
            while try_extract_data:
                try:

                    print("Observation: " + str(len(proto_examples_all)) + "/" + str(survey_df_yeari.shape[0]))

                    survey_df_yeari_chunki = survey_df_yeari[survey_df_yeari['chunk_id'] == chunk_i]

                    proto_examples_i = utils.prep_cnn_np(survey_df_yeari_chunki, SATELLITE, KERNEL_SIZE, year_i)
                    proto_examples_all.extend(proto_examples_i)
                    
                    try_extract_data = False
                    
                except:
                    print("Error!")
                    time.sleep(10)
                    pass
            
        ### Save data as tf record
        out_path_i = os.path.join(out_path, tfr_i)
        print(out_path_i)
        with tf.io.TFRecordWriter(out_path_i) as writer:
            for tf_example in proto_examples_all:
                writer.write(tf_example.SerializeToString())

        print("Success \o/")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  del sys.path[0]


Putting 66 observations into forcnn_AL_1_1_all.tfrecord
Observation: 0/66
Observation: 1/66
Observation: 2/66
Observation: 3/66
Error!


ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/Users/robmarty/anaconda3/lib/python3.7/site-packages/urllib3/connectionpool.py", line 377, in _make_request
    httplib_response = conn.getresponse(buffering=True)
TypeError: getresponse() got an unexpected keyword argument 'buffering'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "<ipython-input-9-35e4ef9beeda>", line 32, in <module>
    proto_examples_i = utils.prep_cnn_np(survey_df_yeari_chunki, SATELLITE, KERNEL_SIZE, year_i)
  File "/Users/robmarty/Documents/Github/Pakistan-Poverty-from-Sky/DataWork/02_get_process_ancillary_data/CNN Features Predict NTL/ee_utils.py", line 443, in prep_cnn_np
    dict_ee = values_ee.getInfo()
  File "/Users/robmarty/anaconda3/lib/python3.7/site-packages/ee/collection.py", line 127, in getInfo
    return super(Collection, self).getInfo()
  File "/Users/robmarty/anaconda3/lib/python3.7/site-packages/ee/computedobject.py", line 98, in getInfo


ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/Users/robmarty/anaconda3/lib/python3.7/site-packages/urllib3/connectionpool.py", line 377, in _make_request
    httplib_response = conn.getresponse(buffering=True)
TypeError: getresponse() got an unexpected keyword argument 'buffering'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "<ipython-input-9-35e4ef9beeda>", line 32, in <module>
    proto_examples_i = utils.prep_cnn_np(survey_df_yeari_chunki, SATELLITE, KERNEL_SIZE, year_i)
  File "/Users/robmarty/Documents/Github/Pakistan-Poverty-from-Sky/DataWork/02_get_process_ancillary_data/CNN Features Predict NTL/ee_utils.py", line 443, in prep_cnn_np
    dict_ee = values_ee.getInfo()
  File "/Users/robmarty/anaconda3/lib/python3.7/site-packages/ee/collection.py", line 127, in getInfo
    return super(Collection, self).getInfo()
  File "/Users/robmarty/anaconda3/lib/python3.7/site-packages/ee/computedobject.py", line 98, in getInfo


TypeError: can only concatenate str (not "list") to str

In [None]:
# SKIP ERRORS

### Loop through all tfrecords
for tfr_i in tf_record_list:

    # Sometimes we get computational time out errors. If occurs, just skip and go to next.
    # We can then go back and rescrape missed ones.

    survey_df_yeari = survey_df[survey_df['tfrecord_name'] == tfr_i]
    year_i = survey_df_yeari['year'].iloc[0]

    ### Loop through chunks within tfrecord (can only pull so much data from GEE at a time)
    survey_df_yeari['chunk_id'] = utils.chunk_ids(survey_df_yeari.shape[0], CHUNK_SIZE)

    print("Putting " + str(survey_df_yeari.shape[0]) + " observations into " + tfr_i)

    proto_examples_all = []
    for chunk_i in list(np.unique(survey_df_yeari.chunk_id)):
        ## Sometimes we hit a memory error; try until we don't hit that

        # TODO: could say: try 3 times?
        try_extract_data = 1
        while try_extract_data < 4:
            try:

                print("Observation: " + str(len(proto_examples_all)) + "/" + str(survey_df_yeari.shape[0]))

                survey_df_yeari_chunki = survey_df_yeari[survey_df_yeari['chunk_id'] == chunk_i]

                proto_examples_i = utils.prep_cnn_np(survey_df_yeari_chunki, SATELLITE, KERNEL_SIZE, year_i)
                proto_examples_all.extend(proto_examples_i)
                
                try_extract_data = 10

            except:
                try_extract_data = try_extract_data + 1
                print("Error!")
                print(try_extract_data)
                print(survey_df_yeari_chunki['uid'])
                time.sleep(5)
                pass

    ### Save data as tf record
    out_path_i = os.path.join(out_path, tfr_i)
    print(out_path_i)
    with tf.io.TFRecordWriter(out_path_i) as writer:
        for tf_example in proto_examples_all:
            writer.write(tf_example.SerializeToString())

    print("Success \o/")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  del sys.path[0]


Putting 36 observations into forcnn_UG_4_1_all.tfrecord
Observation: 0/36
Observation: 1/36
Observation: 2/36
Observation: 3/36
Observation: 4/36
Observation: 5/36
Error ---
2161    UG201800000328
Name: uid, dtype: object
Putting 246 observations into nocnn_IA_4_10_all.tfrecord
Observation: 0/246
Observation: 1/246
Observation: 2/246
Observation: 3/246
Observation: 4/246
Observation: 5/246
Observation: 6/246
Observation: 7/246
Observation: 8/246
Observation: 9/246
Observation: 10/246
Observation: 11/246
Observation: 12/246
Observation: 13/246
Observation: 14/246
Observation: 15/246
Observation: 16/246
Observation: 17/246
Observation: 18/246
Observation: 19/246
Observation: 20/246
Observation: 21/246
Observation: 22/246
Observation: 23/246
Observation: 24/246
Observation: 25/246
Observation: 26/246
Observation: 27/246
Observation: 28/246
Observation: 29/246
Observation: 30/246
Observation: 31/246
Observation: 32/246
Observation: 33/246
Observation: 34/246
Observation: 35/246
Observation

In [10]:
for row_i in range(0,10):
    print(row_i)
    proto_examples_i = utils.prep_cnn_np(survey_df_yeari_chunki.iloc[[row_i]], SATELLITE, KERNEL_SIZE, year_i)

0
1
2
3
4


IndexError: positional indexers are out-of-bounds

In [None]:
survey_df_yeari_chunki.iloc[[4]]