In [1]:
from google.colab import drive
import os

drive.mount('/content/drive')
os.chdir('/content/drive/MyDrive/ML-Climate-Predicting-Wildfires/src')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
from tensorflow.keras import layers
from tensorflow import keras
import tensorflow as tf

# tfds.disable_progress_bar()

import matplotlib.pyplot as plt
import numpy as np

In [3]:
import tensorflow as tf
INPUT_WIDTH = 128
INPUT_HEIGHT = 128
N_CHANNELS = 12
N_CLASSES = 2

In [4]:
import uuid

def load_dir(data_dir):
  id_img_map = {}
  fire_sample_map = {}
  for f in os.listdir(data_dir):
    if '_' in f and not 'download_log' in f:
      id = f[:f.index('_')]
      if id in fire_sample_map:
        id_img_map[fire_sample_map[id]].append(f'{data_dir}{f}')
      else:
        fire_sample_map[id] = str(uuid.uuid4())
        id_img_map[fire_sample_map[id]] = [f'{data_dir}{f}']
  return id_img_map

In [5]:
id_img_map_negative = load_dir('data/TrainingData/AllData/Fire-Negative-5mo/')
id_img_map_positive = load_dir('data/TrainingData/AllData/Fire-Positive-5mo/')

In [6]:
from sklearn.model_selection import train_test_split

SIZE = 2769
all_ids_negative = list(zip(id_img_map_negative.keys(), [0]*SIZE))
all_ids_positive = list(zip(id_img_map_positive.keys(), [1]*SIZE))
all_ids = all_ids_negative + all_ids_positive
ids_train, ids_test = train_test_split(all_ids, test_size=0.2, random_state=42)
ids_train, ids_val = train_test_split(ids_train, test_size=0.25, random_state=42)

In [7]:
def batch(iterable, n=1):
    l = len(iterable)
    for ndx in range(0, l, n):
        yield iterable[ndx:min(ndx + n, l)]

BATCH_SIZE = 32
train_batch_ids = list(batch(ids_train, BATCH_SIZE))
val_batch_ids = list(batch(ids_val, BATCH_SIZE))
test_batch_ids = list(batch(ids_test, BATCH_SIZE))

In [8]:
def scale_resize_image(image):
    image = tf.image.convert_image_dtype(image, tf.float32) # equivalent to dividing image pixels by 255
    image = tf.image.resize(image, (INPUT_WIDTH, INPUT_HEIGHT)) # Resizing the image to 224x224 dimention
    return image

In [9]:
def get_image_data(id_img_map, sample_id):
  images = []
  fs = id_img_map[sample_id]
  for i,f in enumerate(fs[::-1]): # Reverse reversed time
    image = np.load(f)
    image_resized = scale_resize_image(image)
    images.append(image_resized)
  input = np.array(images)
  return input

In [10]:
from tqdm import tqdm

def get_batch_data(batch):
  X = []
  y = []
  for sample_id, label in batch:
    id_img_map = id_img_map_positive if label==1 else id_img_map_negative
    x = get_image_data(id_img_map, sample_id)
    X.append(x)
    y.append(label)
  X = np.array(X)
  y = np.array(y)
  return X, y

train_data = []
for batch in tqdm(train_batch_ids):
  X, y = get_batch_data(batch)
  train_data += [[X,y]]


100%|██████████| 104/104 [02:46<00:00,  1.60s/it]


In [18]:
def save_dataset(dataset, name):
  for i, batch in enumerate(dataset):
    X,y = batch
    with open(f'data/TrainingData/{name}/{i}_X.npy', 'wb') as f:
      np.save(f, X)
    with open(f'data/TrainingData/{name}/{i}_y.npy', 'wb') as f:
      np.save(f, y)

In [21]:
test_data = []
for batch in tqdm(test_batch_ids):
  X, y = get_batch_data(batch)
  test_data += [[X,y]]

100%|██████████| 35/35 [14:33<00:00, 24.97s/it]


In [22]:
save_dataset(test_data, 'test')

In [30]:
def load_batch(dataset_name, batch_no):
  X = np.load(f'data/TrainingData/{dataset_name}/{batch_no}_X.npy')
  y = np.load(f'data/TrainingData/{dataset_name}/{batch_no}_y.npy')
  return X, y

## Timestamp-averaged CNN model

In [35]:
from keras.layers import Conv2D, BatchNormalization, \
    MaxPool2D, GlobalMaxPool2D

# https://medium.com/smileinnovation/training-neural-network-with-image-sequence-an-example-with-video-as-input-c3407f7a0b0f

def build_convnet(shape=(112, 112, 3)):
    momentum = .9
    model = keras.Sequential()
    model.add(Conv2D(64, (3,3), input_shape=shape,
        padding='same', activation='relu'))
    # model.add(Conv2D(64, (3,3), padding='same', activation='relu'))
    model.add(BatchNormalization(momentum=momentum))
    
    # model.add(MaxPool2D())
    
    # model.add(Conv2D(128, (3,3), padding='same', activation='relu'))
    # model.add(Conv2D(128, (3,3), padding='same', activation='relu'))
    # model.add(BatchNormalization(momentum=momentum))
    
    # model.add(MaxPool2D())
    
    # model.add(Conv2D(256, (3,3), padding='same', activation='relu'))
    # model.add(Conv2D(256, (3,3), padding='same', activation='relu'))
    # model.add(BatchNormalization(momentum=momentum))
    
    # model.add(MaxPool2D())
    
    # model.add(Conv2D(512, (3,3), padding='same', activation='relu'))
    # model.add(Conv2D(512, (3,3), padding='same', activation='relu'))
    # model.add(BatchNormalization(momentum=momentum))
    
    # flatten...
    model.add(GlobalMaxPool2D())
    return model

In [39]:
from keras.layers import TimeDistributed, GRU, Dense, Dropout
def action_model(shape=(5, 112, 112, 3), nbout=3):
    # Create our convnet with (112, 112, 3) input shape
    # convnet = build_convnet(shape)
    
    # then create our final model
    model = build_convnet(shape)
    # add the convnet with (5, 112, 112, 3) shape
    # model.add(TimeDistributed(convnet, input_shape=shape))
    # # here, you can also use GRU or LSTM
    # model.add(GRU(64))
    # and finally, we make a decision network
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(.5))
    model.add(Dense(32, activation='relu'))
    model.add(Dropout(.5))
    model.add(Dense(2, activation='relu'))
    model.add(Dense(nbout, activation='sigmoid'))
    return model


    # https://machinelearningmastery.com/how-to-develop-rnn-models-for-human-activity-recognition-time-series-classification/
    # model.add(TimeDistributed(Conv2D(filters=64, kernel_size=3, activation='relu'), input_shape=(None,INPUT_WIDTH,INPUT_HEIGHT, N_CHANNELS)))
    # model.add(TimeDistributed(Conv2D(filters=64, kernel_size=3, activation='relu')))
    # model.add(TimeDistributed(Dropout(0.5)))
    # model.add(TimeDistributed(MaxPool2D()))
    # model.add(TimeDistributed(Flatten()))
    # model.add(LSTM(100))
    # model.add(Dropout(0.5))
    # model.add(Dense(100, activation='relu'))
    # model.add(Dense(n_outputs, activation='softmax'))
    return model

NBFRAME = 6
# INSHAPE=(NBFRAME,) + (INPUT_WIDTH, INPUT_HEIGHT) + (N_CHANNELS,) # (6, 128, 128, 12)
INSHAPE= (INPUT_WIDTH, INPUT_HEIGHT) + (N_CHANNELS,) # (6, 128, 128, 12)
model = action_model(INSHAPE, nbout=1)
optimizer = keras.optimizers.Adam(1e-4)

model.compile(
    optimizer,
    'binary_crossentropy',
    metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall(), tf.keras.metrics.AUC()])

In [51]:
EPOCHS = 200
BATCHES_PER_EPOCH = 20

all_batch_nos = list(range(104))
for e in range(EPOCHS):
  print('\nEPOCH:', e+1)
  train_stats = []
  epoch_batch_nos = np.random.choice(all_batch_nos, BATCHES_PER_EPOCH, replace=False)
  for i in epoch_batch_nos:
    X,y = load_batch('train', i)
    X = np.mean(X, axis=1)
    # print(X.shape)
    # print('Training:', model.predict_on_batch(X))
    batch_stats = model.train_on_batch(X, y)
    train_stats.append(batch_stats)
  print('Training:', np.mean(train_stats, axis=0))
  val_stats = []
  for i in range(10):
    X,y = load_batch('val', i)
    X = np.mean(X, axis=1)
    batch_stats = model.test_on_batch(X, y)
    val_stats.append(batch_stats)
  print('Validating:', np.mean(val_stats, axis=0))

print('\n\nDONE TRAINING.')
for i in range(35):
  X,y = load_batch('val', i)
  X = np.mean(X, axis=1)
  print('Validating:', model.test_on_batch(X, y, return_dict=True))


EPOCH: 1
Training: [0.6846197  0.56237981 0.56169325 0.66406167 0.57582922]
Validating: [0.66136235 0.609375   0.57340318 0.64406888 0.68474359]

EPOCH: 2
Training: [0.67665922 0.57608173 0.57104279 0.65832966 0.60476259]
Validating: [0.66171114 0.596875   0.56029454 0.64406888 0.69177483]

EPOCH: 3
Training: [0.67448851 0.5875     0.57760405 0.6647251  0.62103   ]
Validating: [0.65816583 0.61875    0.58809862 0.63193653 0.69385253]

EPOCH: 4
Training: [0.66803063 0.6        0.606524   0.63133826 0.63605278]
Validating: [0.65737942 0.609375   0.57528827 0.64448555 0.69703748]

EPOCH: 5
Training: [0.67782651 0.57884615 0.60085658 0.6639526  0.60473713]
Validating: [0.65395042 0.64375    0.62385415 0.60517154 0.70318837]

EPOCH: 6
Training: [0.66136689 0.625      0.65050423 0.66886397 0.66673636]
Validating: [0.6508723  0.6375     0.6121478  0.60517154 0.70763818]

EPOCH: 7
Training: [0.67174988 0.5890625  0.59390362 0.64187642 0.62635873]
Validating: [0.64670607 0.6625     0.66196582 0

KeyboardInterrupt: ignored

In [53]:
model.save('models/t_averaged_cnn')

INFO:tensorflow:Assets written to: models/t_averaged_cnn/assets


## Channel-histogram CNN-LSTM model

In [110]:
from keras.layers import Conv1D, BatchNormalization, \
    MaxPool1D, GlobalMaxPool1D

# https://medium.com/smileinnovation/training-neural-network-with-image-sequence-an-example-with-video-as-input-c3407f7a0b0f

def build_convnet(shape=(112, 112, 3)):
    momentum = .9
    model = keras.Sequential()
    model.add(Conv1D(64, 3, input_shape=shape,
        padding='same', activation='relu'))
    # model.add(Conv2D(64, (3,3), padding='same', activation='relu'))
    model.add(BatchNormalization(momentum=momentum))
    
    # model.add(MaxPool2D())
    
    # model.add(Conv2D(128, (3,3), padding='same', activation='relu'))
    # model.add(Conv2D(128, (3,3), padding='same', activation='relu'))
    # model.add(BatchNormalization(momentum=momentum))
     
    # model.add(MaxPool2D())
    
    # model.add(Conv2D(256, (3,3), padding='same', activation='relu'))
    # model.add(Conv2D(256, (3,3), padding='same', activation='relu'))
    # model.add(BatchNormalization(momentum=momentum))
    
    # model.add(MaxPool2D())
    
    # model.add(Conv2D(512, (3,3), padding='same', activation='relu'))
    # model.add(Conv2D(512, (3,3), padding='same', activation='relu'))
    # model.add(BatchNormalization(momentum=momentum))
    
    # flatten...
    # model.add(GlobalMaxPool1D())
    return model

In [121]:
from keras.layers import TimeDistributed, LSTM, Dense, Dropout
def action_model(shape=(5, 112, 112, 3), nbout=3):
    # Create our convnet with (112, 112, 3) input shape
    # convnet = build_convnet(shape)
    
    # then create our final model
    # convnet = build_convnet(shape[1:])
    # print(convnet.summary())

    model = keras.Sequential()
    model.add(TimeDistributed(Conv1D(filters=64, kernel_size=3, activation='relu'), input_shape=shape))
    # model.add(TimeDistributed(Conv1D(filters=64, kernel_size=3, activation='relu')))
    model.add(TimeDistributed(Dropout(0.5)))
    model.add(TimeDistributed(MaxPool1D(pool_size=2)))
    model.add(TimeDistributed(GlobalMaxPool1D()))
    model.add(LSTM(64))
    model.add(Dropout(0.5))
    # model.add(Dense(100, activation='relu'))
    # model.add(Dense(n_outputs, activation='softmax'))
    # # add the convnet with (5, 112, 112, 3) shape
    # model.add(TimeDistributed(convnet, input_shape=shape))
    # # here, you can also use GRU or LSTM
    # model.add(GRU(64))
    # and finally, we make a decision network
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(.5))
    model.add(Dense(32, activation='relu'))
    model.add(Dropout(.5))
    model.add(Dense(2, activation='relu'))
    model.add(Dense(nbout, activation='sigmoid'))
    return model


    # https://machinelearningmastery.com/how-to-develop-rnn-models-for-human-activity-recognition-time-series-classification/
    # model.add(TimeDistributed(Conv2D(filters=64, kernel_size=3, activation='relu'), input_shape=(None,INPUT_WIDTH,INPUT_HEIGHT, N_CHANNELS)))
    # model.add(TimeDistributed(Conv2D(filters=64, kernel_size=3, activation='relu')))
    # model.add(TimeDistributed(Dropout(0.5)))
    # model.add(TimeDistributed(MaxPool2D()))
    # model.add(TimeDistributed(Flatten()))
    # model.add(LSTM(100))
    # model.add(Dropout(0.5))
    # model.add(Dense(100, activation='relu'))
    # model.add(Dense(n_outputs, activation='softmax'))
    return model

NBFRAME = 6
INSHAPE=(NBFRAME,) + (32,) + (N_CHANNELS,) # (6, 128, 128, 12)
# INSHAPE= (32,) + (N_CHANNELS,) # (6, 128, 128, 12)
print(INSHAPE)
model = action_model(INSHAPE, nbout=1)
optimizer = keras.optimizers.Adam(1e-4)

model.compile(
    optimizer,
    'binary_crossentropy',
    metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall(), tf.keras.metrics.AUC()])

(6, 32, 12)


In [122]:
EPOCHS = 200
BATCHES_PER_EPOCH = 20

all_batch_nos = list(range(104))
for e in range(EPOCHS):
  print('\nEPOCH:', e+1)
  train_stats = []
  epoch_batch_nos = np.random.choice(all_batch_nos, BATCHES_PER_EPOCH, replace=False)
  for i_epoch in epoch_batch_nos:
    X,y = load_batch('train', i_epoch)
    histograms_batch = []
    for i_sample in range(X.shape[0]):
      histograms_sample = []
      for channel in range(N_CHANNELS):
        histograms_frame = []
        for frame in range(NBFRAME):
          histogram, _ = np.histogram(
              X[i_sample,frame,:,:,channel], bins=32, range=(0, 0.3)
          )
          histograms_frame.append([histogram])
        histograms_frame = np.concatenate(histograms_frame, axis=0).T
        histograms_sample.append([histograms_frame])
      histograms_sample = np.concatenate(histograms_sample, axis=0).T
      histograms_batch.append([histograms_sample])
    histograms_batch = np.concatenate(histograms_batch, axis=0)
    batch_stats = model.train_on_batch(histograms_batch, y)
    train_stats.append(batch_stats)
  print('Training:', np.mean(train_stats, axis=0))
#   val_stats = []
#   for i in range(10):
#     X,y = load_batch('val', i)
#     X = np.mean(X, axis=1)
#     batch_stats = model.test_on_batch(X, y)
#     val_stats.append(batch_stats)
#   print('Validating:', np.mean(val_stats, axis=0))

# print('\n\nDONE TRAINING.')
# for i in range(35):
#   X,y = load_batch('val', i)
#   X = np.mean(X, axis=1)
#   print('Validating:', model.test_on_batch(X, y, return_dict=True))


EPOCH: 1
Training: [0.70627557 0.5078125  0.49602589 0.58131348 0.51188537]

EPOCH: 2
Training: [0.70465723 0.515625   0.51169368 0.52893237 0.5026588 ]

EPOCH: 3
Training: [0.70598189 0.484375   0.48379602 0.52744944 0.49911648]

EPOCH: 4
Training: [0.69585466 0.5296875  0.54685862 0.57717874 0.53817243]

EPOCH: 5
Training: [0.70581698 0.478125   0.47209753 0.49129212 0.48212802]

EPOCH: 6
Training: [0.70303938 0.46875    0.49209905 0.46081402 0.46821645]

EPOCH: 7
Training: [0.69908733 0.51947115 0.51648903 0.47958186 0.53680509]

EPOCH: 8
Training: [0.69771644 0.509375   0.52086316 0.48016148 0.51702967]

EPOCH: 9
Training: [0.70382934 0.5109375  0.48467928 0.42680395 0.47667662]

EPOCH: 10
Training: [0.70497007 0.4953125  0.49354042 0.43048947 0.47699436]

EPOCH: 11
Training: [0.69582818 0.484375   0.52675763 0.42479821 0.5003361 ]

EPOCH: 12


KeyboardInterrupt: ignored

In [None]:
model.save('models/channel_hist_lstm')