In [1]:
import tstrain
import tsclust
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM

Using TensorFlow backend.


In [3]:
# Training data - no crop labels
#td = pd.read_csv('/home/ec2-user/training_data_large.csv')
td = pd.read_csv('/home/ec2-user/training_data_few_bands.csv') # <- only ndvi, green and blue bands

In [4]:
td = td.drop(['Unnamed: 0'], axis=1)

In [5]:
# Weird unexpexted strings
td.loc[td['label'] == '0', 'label'] = 0
td.loc[td['label'] == '1', 'label'] = 1
td.loc[td['label'] == '2', 'label'] = 2
td.loc[td['label'] == '3', 'label'] = 3
td.loc[td['label'] == '4', 'label'] = 4

In [6]:
td.label.unique()

array(['veg', 'water', 4, 2, 0, 1, 3, 'urban'], dtype=object)

In [7]:
# Group clusters 2 and 4 as "maize"; group clusters 0 and 1 as "crop_2"; leave cluster 3 alone, call it "crop_3"
td.loc[td['label'] == 2, 'label'] = "maize"
td.loc[td['label'] == 4, 'label'] = "maize"
td.loc[td['label'] == 0, 'label'] = "crop_2"
td.loc[td['label'] == 1, 'label'] = "crop_2"
td.loc[td['label'] == 3, 'label'] = "crop_3"

In [8]:
td.label.unique()

array(['veg', 'water', 'maize', 'crop_2', 'crop_3', 'urban'], dtype=object)

In [9]:
# Use first 6 dates in time series (Nov 16, 2016 through May 25, 2017)
dates = td.date.unique()
datesub = dates[0:6]
trainsub = td[td['date'].isin(datesub)]

In [13]:
# Fit a LSTM recurrent neural network. In this 'toy' example, a total of 25,000 samples are used to fit a model.
# including 10,000 from the clustered "cropped" class, and 5,000 from each of the "water", "urban" and
# "vegetation" classes. The bands (features) include red, blue, green, and nir. Y labels are numerically
# encoded, and converted to "one-hot" vectors.

# Format training data into correct 3D array of shape (n_samples, n_timesetps, n_features) required to fit a
# Keras LSTM model. N_features corresponds to number of bands included in training data

class_codes, x, y = tstrain.format_training_data(trainsub)

In [15]:
# Split training and test data
x_train, x_test, y_train, y_test = tstrain.split_train_test(x, y, seed=0)

In [16]:
# Standardize features
mu, sd, x_train_norm, x_test_norm = tstrain.standardize_features(x_train, x_test)

In [17]:
import numpy as np
np.save('/home/ec2-user/mu.npy', mu)
np.save('/home/ec2-user/sd.npy', sd)

In [19]:
# Train LSTM model
n_timesteps = len(trainsub['date'].unique())
n_features = len(trainsub['feature'].unique())

model = Sequential()
model.add(LSTM(32, activation='relu', return_sequences=True, input_shape=(n_timesteps, n_features)))
model.add(LSTM(32, activation='relu', return_sequences=True))
model.add(LSTM(32, activation='relu', return_sequences=True))
model.add(LSTM(32, activation='relu', return_sequences=True))
model.add(LSTM(32))
model.add(Dense(activation='softmax', units=y.shape[1]))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['categorical_accuracy'])
model.fit(x_train_norm, y_train, epochs=50, batch_size=32, verbose=2)

Epoch 1/50
 - 25s - loss: 0.5378 - categorical_accuracy: 0.7764
Epoch 2/50
 - 22s - loss: 0.3194 - categorical_accuracy: 0.8797
Epoch 3/50
 - 22s - loss: 0.2674 - categorical_accuracy: 0.8996
Epoch 4/50
 - 22s - loss: 0.2329 - categorical_accuracy: 0.9137
Epoch 5/50
 - 22s - loss: 0.2193 - categorical_accuracy: 0.9179
Epoch 6/50
 - 22s - loss: 0.1960 - categorical_accuracy: 0.9267
Epoch 7/50
 - 22s - loss: 0.1796 - categorical_accuracy: 0.9318
Epoch 8/50
 - 22s - loss: 0.1682 - categorical_accuracy: 0.9360
Epoch 9/50
 - 22s - loss: 0.1584 - categorical_accuracy: 0.9412
Epoch 10/50
 - 22s - loss: 0.1488 - categorical_accuracy: 0.9443
Epoch 11/50
 - 22s - loss: 0.1448 - categorical_accuracy: 0.9466
Epoch 12/50
 - 22s - loss: 0.1380 - categorical_accuracy: 0.9479
Epoch 13/50
 - 22s - loss: 0.1282 - categorical_accuracy: 0.9517
Epoch 14/50
 - 22s - loss: 0.1251 - categorical_accuracy: 0.9535
Epoch 15/50
 - 22s - loss: 0.1187 - categorical_accuracy: 0.9553
Epoch 16/50
 - 22s - loss: 0.1170 

<keras.callbacks.History at 0x7f6405f8a710>

In [20]:
# Model accuracy
_, accuracy = model.evaluate(x_test_norm, y_test, batch_size=32)
accuracy



0.9732999582811848

In [21]:
# Confusion matrix
tstrain.conf_mat(x_test_norm, y_test, model, class_codes)

Unnamed: 0,crop_2,crop_3,maize,urban,veg,water,recall
crop_2,798,7,31,6,11,0,0.935522
crop_3,11,59,1,0,0,0,0.830986
maize,51,5,978,0,18,0,0.929658
urban,11,0,1,1177,1,0,0.989076
veg,15,0,21,2,1964,0,0.981019
water,0,0,0,0,0,2023,1.0


In [22]:
# serialize model to JSON
model_json = model.to_json()
with open("/home/ec2-user/model_labeled.json", "w") as json_file:
    json_file.write(model_json)

In [23]:
# serialize weights to HDF5
model.save_weights("/home/ec2-user/model_labeled.h5")
print("Saved model to disk")

Saved model to disk
