In [None]:
import json
import os
import pickle
import sys

import ee
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tqdm import tqdm


from shapely.geometry import box
from geojson import Point, Feature, FeatureCollection, dump
import geopandas

sys.path.append('../')
from scripts.get_s2_data_ee import get_history, get_history_polygon, get_pixel_vectors

%load_ext autoreload
%autoreload 2

In [None]:
# Sentinel 2 band descriptions
band_descriptions = {
    'B1': 'Aerosols, 442nm',
    'B2': 'Blue, 492nm',
    'B3': 'Green, 559nm',
    'B4': 'Red, 665nm',
    'B5': 'Red Edge 1, 704nm',
    'B6': 'Red Edge 2, 739nm',
    'B7': 'Red Edge 3, 779nm',
    'B8': 'NIR, 833nm',
    'B8A': 'Red Edge 4, 864nm',
    'B9': 'Water Vapor, 943nm',
    'B11': 'SWIR 1, 1610nm',
    'B12': 'SWIR 2, 2186nm'
}

## Load Sampling Locations

In [None]:
data_dir = os.path.join('..', '..', 'mining', 'data')

with open(os.path.join(data_dir, 'MinesPos2018-2020Sentinel.geojson'), 'r') as f:
    positive_sites = json.load(f)['features']
    
with open(os.path.join(data_dir, 'MinesNeg2018-2020Sentinel.geojson'), 'r') as f:
    negative_sites = json.load(f)['features']

In [None]:
mining_sites = pd.DataFrame({
    'name': ['positive_' + str(i) for i in range(len(positive_sites))],
    'lon': [np.squeeze(site['geometry']['coordinates'])[0][0] for site in positive_sites],
    'lat': [np.squeeze(site['geometry']['coordinates'])[0][1] for site in positive_sites],
    'coords': [np.squeeze(site['geometry']['coordinates'])[0] for site in positive_sites],
    'polygons': [ee.FeatureCollection([site]) for site in positive_sites]
})
display(mining_sites.head())

jungle_sites = pd.DataFrame({
    'name': ['negative_' + str(i) for i in range(len(negative_sites))],
    'lon': [np.squeeze(site['geometry']['coordinates'])[0][0] for site in negative_sites],
    'lat': [np.squeeze(site['geometry']['coordinates'])[0][1] for site in negative_sites],
    'coords': [np.squeeze(site['geometry']['coordinates'])[0] for site in negative_sites],
    'polygons': [ee.FeatureCollection([site]) for site in negative_sites]
})
display(jungle_sites.head())

In [None]:
with open(os.path.join(data_dir, 'MinesNeg_caleb_selection.geojson'), 'r') as f:
    caleb_negatives = json.load(f)['features']

In [None]:
negative_river_sites = pd.DataFrame({
    'name': ['river_negative_' + str(i) for i in range(len(caleb_negatives))],
    'lon': [np.squeeze(site['geometry']['coordinates'])[0] for site in caleb_negatives],
    'lat': [np.squeeze(site['geometry']['coordinates'])[1] for site in caleb_negatives],
    'coords': [np.squeeze(site['geometry']['coordinates']) for site in caleb_negatives],
})
display(negative_river_sites.head())

In [None]:
# Positive site coordinates
positive_coords = list(mining_sites['coords'])
positive_names = list(mining_sites['name'])
print(len(positive_coords), 'positive sites loaded')

# Negative site coordinates
negative_coords = list(pd.concat([jungle_sites['coords'], negative_river_sites['coords']]))
negative_names = list(pd.concat([jungle_sites['name'], negative_river_sites['name']]))
print(len(negative_coords), 'negative sites loaded')

## Download Data

In [None]:
# Enter rect width in degrees (0.035 max recommended) and site coordinates
rect_width = 0.002
num_months = 12
start_date = '2018-01-01'

In [None]:
positive_history = get_history(positive_coords, 
                               positive_names, 
                               rect_width,
                               num_months = num_months,
                               start_date = start_date)

In [None]:
negative_history = get_history(negative_coords, 
                               negative_names, 
                               rect_width,
                               num_months = num_months,
                               start_date = start_date)

In [None]:
list(negative_history.keys())

In [None]:
with open(os.path.join(data_dir, f'2d_mining_sites_{start_date}_{num_months}.pkl'), 'wb') as f:
    pickle.dump(positive_history, f)
    
with open(os.path.join(data_dir, f'2d_negative_sites_{start_date}_{num_months}.pkl'), 'wb') as f:
    pickle.dump(negative_history, f)

## Process Data for Network

In [None]:
def create_img_stack(patch_history):
    img_stack = []
    for date in patch_history:
        for site in patch_history[date]:
            spectral_stack = []
            band_shapes = [np.shape(patch_history[date][site][band])[0] for band in band_descriptions]
            if np.array(band_shapes).all() > 0:
                for band in band_descriptions:
                    spectral_stack.append(patch_history[date][site][band])
                if np.median(spectral_stack) > 0:
                    img_stack.append(np.rollaxis(np.array(spectral_stack), 0, 3))
    return img_stack

def normalize(x):
    return (np.array(x)) / (3000)

In [None]:
positive_img = create_img_stack(positive_history)
print(len(positive_img), 'positive images extracted')

negative_img = create_img_stack(negative_history)
print(len(negative_img), 'negative images extracted')

In [None]:
subsampled_positives = []
for img in positive_img:
    for i in range(4):
        for j in range(4):
            subsampled_positives.append(img[i*28:(i+1)*28, 
                                            j*28:(j+1)*28, :])
subsampled_negatives = []
for img in negative_img:
    for i in range(4):
        for j in range(4):
            subsampled_negatives.append(img[i*28:(i+1)*28, 
                                            j*28:(j+1)*28, :])
    

In [None]:
positive_img = np.array(subsampled_positives)
negative_img = np.array(subsampled_negatives)

In [None]:
min_dimension = np.min([np.shape(img)[0] for img in positive_img])
positive_img = [img[:min_dimension, :min_dimension, :] for img in positive_img]
negative_img = [img[:min_dimension, :min_dimension, :] for img in negative_img]

In [None]:
x = normalize(np.concatenate((positive_img, negative_img)))
y = np.concatenate((np.ones(len(positive_img)), np.zeros(len(negative_img))))
x, y = shuffle(x, y, random_state=42)

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.10, random_state=42)
print("Num Train:\t\t", len(x_train))
print("Num Test:\t\t", len(x_test))
print(f"Percent Negative Train:\t {100 * sum(y_train == 0.0) / len(y_train):.1f}")
print(f"Percent Negative Test:\t {100 * sum(y_test == 0.0) / len(y_test):.1f}")

num_classes = 2
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

## Construct and Train Model

In [None]:
input_shape = np.shape(x_train[0])
print("Input Shape:", input_shape)

In [None]:
model = keras.Sequential([
        keras.Input(shape=input_shape),
        layers.Conv2D(16, kernel_size=(3), activation="relu"),
        layers.MaxPooling2D(pool_size=(3)),
        layers.Conv2D(32, kernel_size=(3), activation="relu"),
        layers.MaxPooling2D(pool_size=(3)),
        layers.Flatten(),
        layers.Dense(16, activation="relu"),
        layers.Dropout(0.1),
        layers.Dense(16, activation="relu"),
        layers.Dropout(0.1),
        layers.Dense(num_classes, activation="softmax")])
model.summary()

In [None]:
model.compile(loss="binary_crossentropy", 
              optimizer="adam", 
              metrics=["accuracy"])

In [None]:
datagen = ImageDataGenerator(
    rotation_range=360,
    #brightness_range=[0.8,1.2],
    width_shift_range=[0.8, 1.2],
    height_shift_range=[0.8, 1.2],
    #shear_range=10,
    zoom_range=[0.8, 1.2],
    vertical_flip=True,
    horizontal_flip=True,
    fill_mode='reflect'
)


plt.figure(figsize=(12,12), facecolor=(1,1,1))
images, labels = datagen.flow(x_train, y_train, batch_size=36).next()
for index, (image, label) in enumerate(zip(images, labels)):

    rgb = np.stack((image[:,:,3],
                    image[:,:,2], 
                    image[:,:,1]), axis=-1)
    plt.subplot(6,6,index+1)
    plt.imshow(np.clip(rgb, 0, 1))
    if label[1] == 1:
        plt.title('Mine')
    else:
        plt.title('No Mine')
    plt.axis('off')
plt.suptitle('Data Augmentation Examples')
plt.tight_layout()
plt.show()
    

In [None]:
batch_size = 32
epochs = 10
model.fit(datagen.flow(x_train, y_train, batch_size=batch_size), 
          epochs=epochs, 
          validation_data = (x_test, y_test),
          verbose = 1
         )

# Evaluate Network on Grid

In [None]:
model = keras.models.load_model('../models/1-20-2021-filtered-both.h5')

In [None]:
def create_img_stack(patch_history):
    img_stack = []
    for date in patch_history:
        for site in patch_history[date]:
            spectral_stack = []
            band_shapes = [np.shape(patch_history[date][site][band])[0] for band in band_descriptions]
            if np.array(band_shapes).all() > 0:
                for band in band_descriptions:
                    spectral_stack.append(patch_history[date][site][band])
                if np.median(spectral_stack) > 0:
                    img_stack.append(np.rollaxis(np.array(spectral_stack), 0, 3))
    return img_stack

def normalize(x):
    return (np.array(x)) / (3000)

def create_sampling_grid(min_lon, max_lon, min_lat, max_lat, steps_lon, steps_lat):
    lons = np.linspace(min_lon, max_lon, steps_lon)
    lats = np.linspace(min_lat, max_lat, steps_lat)
    lon, lat = np.meshgrid(lons, lats)
    coords = [[lon, lat] for lon, lat in zip(lon.flatten(), lat.flatten())]
    return coords

def plot_sampling_grid(coords, rect_width=0.025, output=False):
    sampling_df = pd.DataFrame({
        'lon': [coord[0] for coord in coords],
        'lat': [coord[1] for coord in coords],
        'pred': [0 for _ in range(len(coords))]
    })
    
    features = []
    for lon, lat in zip([coord[0] for coord in coords], [coord[1] for coord in coords]):
        rect = box(lon - rect_width / 2, lat - rect_width / 2, lon + rect_width / 2, lat + rect_width / 2)
        features.append(Feature(geometry=rect))
    feature_collection = FeatureCollection(features)
    geopandas.GeoDataFrame.from_features(feature_collection).plot(figsize=(10, 8))
    
    if output:
        return feature_collection
    

def get_image_stack(coords, start_date='2020-05-01', rect_width=0.025, scale=100):
    names = ['sample_' + str(i) for i in range(len(coords))]
    history = get_history(coords, 
                          names,
                          rect_width,
                          start_date=start_date,
                          num_months=1,
                          #scale=rect_width * (100 / 0.025)
                          scale=scale
                         )
    img_stack = create_img_stack(history)
    print("Image shape before cropping:", img_stack[0].shape)
    min_dim = np.min(img_stack[0].shape[:2])
    img_stack = [img[:min_dim, :min_dim, :] for img in img_stack]
    
    return history, img_stack

def predict_grid(model, history, img_stack, coords):
    
    preds = model.predict(normalize(img_stack))[:,1]
    
    cloud_free_coords = []
    for site, coords in zip(history[start_date], coords):
        if np.median(history[start_date][site]['B2']) > 0:
            cloud_free_coords.append(coords)

    preds_df = pd.DataFrame({
        'pred': preds,
        'lon': [coord[0] for coord in cloud_free_coords],
        'lat': [coord[1] for coord in cloud_free_coords]}
    )

    return preds_df
    
def write_data(data_frame, file_path, rect_width):
    data_frame.to_csv(file_path + '.csv', index=False)
    
    features = []
    for lon, lat, pred in zip(list(data_frame['lon']), list(data_frame['lat']), list(data_frame['pred'])):
        rect = box(lon - rect_width / 2, lat - rect_width / 2, lon + rect_width / 2, lat + rect_width / 2)
        features.append(Feature(geometry=rect, properties={'pred': pred}))

    feature_collection = FeatureCollection(features)
    with open(file_path + '.geojson', 'w') as f:
       dump(feature_collection, f)
    
    geopandas.GeoDataFrame.from_features(feature_collection).plot(column='pred', 
                                                                  cmap='seismic',
                                                                  figsize=(10, 8),
                                                                  vmin=0,
                                                                  vmax=1)

    
def stretch_histogram(array, min_val=0.1, max_val=0.75, gamma=1.2):
    clipped = np.clip(array, min_val, max_val)
    stretched = np.clip((clipped - min_val) / (max_val - min_val) ** gamma, 0, 1)
    return stretched

In [None]:
rect_width = 0.02

#tambopata whole
min_lon, max_lon = -70.72, -69.8
min_lat, max_lat = -13.2, -12.57

# Caroni whole
#min_lon, max_lon = -63.09674922312161, -62.18700279001181
#min_lat, max_lat = 4.514923184841662, 6.511625537541098

#min_lon, max_lon = -70.64, -70.4
#min_lat, max_lat = -12.96, -13.07


steps_lon = 20
steps_lat = 15

sampling_coords = create_sampling_grid(min_lon, max_lon, min_lat, max_lat, steps_lon, steps_lat)
plot_sampling_grid(sampling_coords, rect_width)


In [None]:
start_date='2019-05-01'
scale = 20
history, img_stack = get_image_stack(sampling_coords, start_date=start_date, rect_width=rect_width, scale=scale)

file_path = f"../data/tambopata_mine_{min_lon},{max_lon}_{min_lat},{max_lat}_{rect_width}_{scale}"
with open(file_path + '.pkl', 'wb') as f:
    pickle.dump(img_stack, f)

preds_df = predict_grid(model, history, img_stack, sampling_coords)
write_data(preds_df, file_path, rect_width)



In [None]:
start_date='2019-09-01'
scale = 20
history, img_stack = get_image_stack(positive_coords, start_date=start_date, rect_width=rect_width, scale=scale)
file_path = f"../data/positive_sites_{rect_width}_{scale}"
with open(file_path + '.pkl', 'wb') as f:
    pickle.dump(img_stack, f)


In [None]:
start_date='2019-09-01'
scale = 20
history, img_stack = get_image_stack(negative_coords, start_date=start_date, rect_width=rect_width, scale=scale)

file_path = f"../data/negative_sites_{rect_width}_{scale}"
with open(file_path + '.pkl', 'wb') as f:
    pickle.dump(img_stack, f)

In [None]:
plt.imshow(img_stack[0][:,:,3])

In [None]:
num_img = int(np.ceil(np.sqrt(len(img_stack))))

plt.figure(figsize=(36,36), dpi=150)
for index, (img, pred) in enumerate(zip(img_stack, preds_df['pred'])):
    plt.subplot(num_img, num_img, index + 1)
    plt.imshow(stretch_histogram(normalize(np.stack((img[:,:,3],
                         img[:,:,2],
                         img[:,:,1]), axis=-1
                        ))))
    plt.title(f"{pred:.0%}")
    plt.axis('off')
plt.tight_layout()
plt.show()

In [None]:
len(preds)

## Test the network on candidate site

In [None]:
num_months = 6
test_coords = [-62.42462292953776, 4.870750832241027]
#test_coords = [-62.39958492247051, 4.848350836417974]
#test_coords = [-61.73903184056069, 4.3381195947723405]
#test_coords = [-61.74202162706261, 4.417347798525773]
test_patch = get_history([test_coords], 
                           ['test_patch'],
                           rect_width,
                           start_date=start_date,
                           num_months=num_months)

test_data = create_img_stack(test_patch)
test_data = [img[:84, :84, :] for img in test_data]

In [None]:
num_img = int(np.ceil(np.sqrt(len(test_data))))
preds = []
plt.figure(figsize=(8,8), facecolor=(1,1,1))
for index, month in enumerate(test_data):
    im = np.expand_dims(normalize(month), 0)
    rgb = np.stack((normalize(month[:,:,3]),
                    normalize(month[:,:,2]), 
                    normalize(month[:,:,1])), axis=-1)
    pred = model.predict(im)[0][1]
    preds.append(pred)
    plt.subplot(num_img, num_img, index + 1)
    plt.imshow(np.clip(rgb, 0, 1), vmin=0, vmax=1)
    plt.title(f"{100 * pred:.0f}%")
    plt.axis('off')
plt.suptitle(f"Mean Classification: {100*np.mean(preds):.0f}%\nSite: {test_coords[1]:.3f}°, {test_coords[0]:.3f}°", size=16)
plt.tight_layout()
#plt.savefig(f'/Users/ckruse/Downloads/{test_coords[1]:32f}, {test_coords[0]:.3f}.png', bbox_inches='tight')
plt.show()

## Test network on river sampling sites

In [None]:
data_dir = os.path.join('..', '..', 'mining', 'data')

with open(os.path.join(data_dir, 'test_sampling_sites.geojson'), 'r') as f:
    test_sites = json.load(f)['features']

In [None]:
test_sites = pd.DataFrame({
    'name': ['test_' + str(i) for i in range(len(test_sites[0]['geometry']['coordinates']))],
    'lon': [coords[0] for coords in test_sites[0]['geometry']['coordinates']],
    'lat': [coords[0] for coords in test_sites[0]['geometry']['coordinates']],
    'coords': [coords[:2] for coords in test_sites[0]['geometry']['coordinates']],
})
display(test_sites.head())

In [None]:
offset = 0.005
sampling_coords = []
sampling_names = []
for coord, name in zip(test_sites['coords'], test_sites['name']):
    sampling_coords.append(coord)
    sampling_coords.append([coord[0] - offset, coord[1] - offset])
    sampling_coords.append([coord[0] + offset, coord[1] + offset])
    
    sampling_names.append(name)
    sampling_names.append(name + '_right')
    sampling_names.append(name + '_left')

plt.figure(figsize=(12,12))
plt.scatter([coord[0] for coord in sampling_coords], [coord[1] for coord in sampling_coords], s=1)
plt.show()

In [None]:
pd.DataFrame([coord[0] for coord in sampling_coords], [coord[1] for coord in sampling_coords])
a = pd.DataFrame()
a['lat'] = [coord[1] for coord in sampling_coords]
a['lon'] = [coord[0] for coord in sampling_coords]
a.to_csv('/Users/ckruse/Downloads/river_sampling.csv')

In [None]:
river_sampling = get_history(sampling_coords, 
                         sampling_names,
                         0.0025,
                         start_date=start_date,
                         num_months=1)

In [None]:
river_sampling[start_date]['test_0']['B2'].shape

In [None]:
river_patches = get_history(test_sites['coords'], 
                         test_sites['name'],
                         rect_width,
                         start_date=start_date,
                         num_months=1)

In [None]:
river_data = create_img_stack(river_patches)

In [None]:
river_sampling_data = create_img_stack(river_sampling)
river_sampling_data = [img[:28, :28, :] for img in river_sampling_data]

In [None]:
river_data = [img[:min_dimension, :min_dimension, :] for img in river_data]

In [None]:
np.shape(river_sampling_data)

In [None]:
subsampled_river = []
subsampled_coords = []
for img, coords in zip(river_sampling_data, sampling_coords):
    for i in range(2):
        for j in range(2):
            subsampled_lon = coords[0] + rect_width * ((0.5 * j) - 1)
            subsampled_lat = coords[1] - rect_width * ((0.5 * i) - 1)
            subsampled_coords.append([subsampled_lon, subsampled_lat])
            subsampled_river.append(img[i*28:(i+1)*28, j*28:(j+1)*28, :])

In [None]:
np.shape(subsampled_river[0])

In [None]:
for site in river_sampling[start_date]:
    if np.min(river_sampling[start_date][site]['B2']) < 0:
          print(site)

In [None]:
print(np.where(np.array(sampling_names) == 'test_54_right'))
print(np.where(np.array(sampling_names) == 'test_55_right'))

In [None]:
np.array(sampling_names).shape

In [None]:
sampling_coords[163]

In [None]:
np.concatenate((sampling_coords[:163], sampling_coords[164:166], sampling_coords[167:]))

In [None]:
preds = model.predict(normalize(river_sampling_data))[:,1]

In [None]:
len(preds)

In [None]:
predictions = pd.DataFrame(preds, columns=['pred'])
predictions['lat'] = [coord[1] for coord in np.concatenate((sampling_coords[:163], sampling_coords[164:166], sampling_coords[167:]))]
predictions['lon'] = [coord[0] for coord in np.concatenate((sampling_coords[:163], sampling_coords[164:166], sampling_coords[167:]))]
predictions.head()
predictions.to_csv('/Users/ckruse/Downloads/subsampled_mine_preds_2.csv')

In [None]:
# Plot blob locations on a satellite base image
from keplergl import KeplerGl
river_map = KeplerGl(data={'samples': predictions})

In [None]:
num_samples = 20 ** 2
num_img = int(np.ceil(np.sqrt(len(river_sampling_data[:num_samples]))))
preds = []
plt.figure(figsize=(24,24), facecolor=(1,1,1))
for index, site in enumerate(river_sampling_data[:num_samples]):
    im = np.expand_dims(normalize(site), 0)
    rgb = np.stack((normalize(site[:,:,3]),
                    normalize(site[:,:,2]), 
                    normalize(site[:,:,1])), axis=-1)
    pred = model.predict(im)[0][1]
    preds.append(pred)
    plt.subplot(num_img, num_img, index + 1)
    plt.imshow(np.clip(rgb, 0, 1), vmin=0, vmax=1)
    plt.title(f"{100 * pred:.0f}%")
    plt.axis('off')
plt.tight_layout()
#plt.savefig(f'/Users/ckruse/Downloads/{test_coords[1]:32f}, {test_coords[0]:.3f}.png', bbox_inches='tight')
plt.show()

# Tambopata Test

In [None]:
# Create a grid of coordinates:
min_lon, max_lon = (-70.72, -69.8)
min_lat, max_lat = (-13.07, -12.73)

num_samples = 30

lons = np.linspace(min_lon, max_lon, num_samples)
lats = np.linspace(min_lat, max_lat, num_samples)
lon, lat = np.meshgrid(lons, lats)
peru_coords = [[lon, lat] for lon, lat in zip(lon.flatten(), lat.flatten())]
peru_names = ['tambopata_' + str(i) for i in range(len(peru_coords))]

In [None]:
start_date = '2020-05-01'
tambopata = get_history(peru_coords, 
                        peru_names,
                        0.0025,
                        start_date=start_date,
                        num_months=1)

In [None]:
tambopata_stack = create_img_stack(tambopata)
tambopata_stack = [img[:28, :28, :] for img in tambopata_stack]

In [None]:
preds = model.predict(normalize(tambopata_stack))[:,1]

In [None]:
cloud_free_coords = []
for site, coords in zip(tambopata[start_date], peru_coords):
    if np.min(tambopata[start_date][site]['B2']) > 0:
        cloud_free_coords.append(coords)

In [None]:
peru_preds = pd.DataFrame(preds, columns=['pred'])
peru_preds['lat'] = [coord[1] for coord in cloud_free_coords]
peru_preds['lon'] = [coord[0] for coord in cloud_free_coords]

peru_preds.to_csv('/Users/ckruse/Downloads/tambopata_preds.csv')
peru_preds.head()

In [None]:
preds_df

In [None]:
rect_width = 0.025
min_lon, max_lon = -70.72, -69.8
min_lat, max_lat = -13.2, -12.57
num_samples = 20
sampling_grid = create_sampling_grid(min_lon, max_lon, min_lat, max_lat, num_samples, num_samples)
sampling_df = pd.DataFrame({
    'lon': [coord[0] for coord in sampling_grid],
    'lat': [coord[1] for coord in sampling_grid],
    'pred': [0 for _ in range(len(sampling_grid))]
})

write_data(sampling_df, f'/Users/ckruse/Downloads/grid_{min_lon},{max_lon}_{min_lat},{max_lat}_{rect_width}', rect_width)

In [None]:

tambopata_train_df, tambopata_train_img = predict_grid(min_lon, max_lon, min_lat, max_lat, num_samples, num_samples, rect_width=rect_width)
write_data(tambopata_train_df, f'/Users/ckruse/Downloads/tambopata_{min_lon},{max_lon}_{min_lat},{max_lat}_{rect_width}', rect_width)

In [None]:
peru_south_wide = predict_grid(-70.72, -68.88, -13.41, -13.07, 60, 30)
peru_south_wide.head()

In [None]:
pd.concat([peru_preds, peru_preds_north, peru_east, peru_south, peru_south_wide]).to_csv(os.path.join(data_dir, 'tambopata_grid_full.csv'), index=False)

In [None]:
# Create a grid of coordinates:
min_lon, max_lon = (-70.72, -69.8)
min_lat, max_lat = (-13.07, -12.73)

num_samples = 30

lons = np.linspace(min_lon, max_lon, num_samples)
lats = np.linspace(min_lat, max_lat, num_samples)
lon, lat = np.meshgrid(lons, lats)
peru_coords = [[lon, lat] for lon, lat in zip(lon.flatten(), lat.flatten())]
peru_names = ['tambopata_' + str(i) for i in range(len(coords))]

In [None]:
start_date = '2020-05-01'
tambopata_north = get_history(peru_coords, 
                        peru_names,
                        0.0025,
                        start_date=start_date,
                        num_months=1)

In [None]:
tambopata_stack_north = create_img_stack(tambopata_north)
tambopata_stack_north = [img[:28, :28, :] for img in tambopata_stack_north]

In [None]:
np.shape(tambopata_stack_north)

In [None]:
preds_north = model.predict(normalize(tambopata_stack_north))[:,1]

In [None]:
cloud_free_coords_north = []
for site, coords in zip(tambopata_north[start_date], peru_coords):
    if np.min(tambopata_north[start_date][site]['B2']) > 0:
        cloud_free_coords_north.append(coords)

In [None]:
peru_preds_north = pd.DataFrame(preds_north, columns=['pred'])
peru_preds_north['lat'] = [coord[1] for coord in cloud_free_coords_north]
peru_preds_north['lon'] = [coord[0] for coord in cloud_free_coords_north]

peru_preds_north.to_csv('/Users/ckruse/Downloads/tambopata_preds_north.csv')
peru_preds_north.head()

In [None]:
pd.concat([peru_preds, peru_preds_north, peru_east, peru_south]).to_csv(os.path.join(data_dir, 'tambopata_grid.csv'), index=False)

## River Head

In [None]:
tambopata_train_df

In [None]:
river_head.to_csv('/Users/ckruse/Downloads/river_head.csv', index=False)

In [None]:
tambopata_train_df, tambopata_train_img = predict_grid(-70.6, -70.3, -13.01, -12.95, 1, 1)

In [None]:
write_data(tambopata_train_df, '/Users/ckruse/Downloads/test_bigger')