In [None]:
import shutil
from glob import glob
from PIL import Image, ImageOps

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
os.environ['KAGGLE_CONFIG_DIR'] = "/content/drive/MyDrive/kaggle"
!pip install kaggle
!mkdir ~/.kaggle
!cp /content/drive/MyDrive/kaggle.json ~/.kaggle/kaggle.json

!kaggle datasets download -d fantineh/next-day-wildfire-spread

!unzip -q ./next-day-wildfire-spread.zip # Unzipping dataset
!rm ./next-day-wildfire-spread.zip       # Removing zip file

In [None]:
!mkdir 'dataset'
!mkdir 'dataset/train'
!mkdir 'dataset/eval'
!mkdir 'dataset/test'
for dir in ['train', 'test', 'eval']:
  for i in sorted(glob('/content/*_{}*'.format(dir))):
    if(i.split('/')[-1].split('.')[-1] == 'tfrecord'):
      src = i
      dst = '/content/dataset/{}/'.format(dir)+i.split('/')[-1]
      shutil.move(src, dst)

## Dataset Reading

In [None]:
import re
from typing import Dict, List, NamedTuple, Optional, Text, Tuple
import matplotlib.pyplot as plt
from matplotlib import colors

import tensorflow as tf
import shutil

In [None]:
InputImage = tf.Tensor
OutputImage = tf.Tensor
DataStatsType = Dict[str, NamedTuple]


class DataStats(NamedTuple):
    min: float
    max: float
    mean: float
    std: float


INPUT_FEATURES: List[str] = [
    'elevation', 'th', 'vs', 'tmmn', 'tmmx', 'sph', 'pr', 'pdsi',
    'NDVI', 'population', 'erc', 'PrevFireMask'
]

OUTPUT_FEATURES: List[str] = ['FireMask']

DATA_STATS: DataStatsType = {
    'elevation': DataStats(0.0, 3141.0, 657.3003, 649.0147),
    'pdsi': DataStats(-6.12974870967865, 7.876040384292651, -0.0052714925, 2.6823447),
    'NDVI': DataStats(-9821.0, 9996.0, 5157.625, 2466.6677),
    'pr': DataStats(0.0, 44.53038024902344, 1.7398051, 4.482833),
    'sph': DataStats(0., 1., 0.0071658953, 0.0042835088),
    'th': DataStats(0., 360.0, 190.32976, 72.59854),
    'tmmn': DataStats(253.15, 298.94891357421875, 281.08768, 8.982386),
    'tmmx': DataStats(253.15, 315.09228515625, 295.17383, 9.815496),
    'vs': DataStats(0.0, 10.024310074806237, 3.8500874, 1.4109988),
    'erc': DataStats(0.0, 106.24891662597656, 37.326267, 20.846027),
    'population': DataStats(0., 2534.06298828125, 25.531384, 154.72331),
    'PrevFireMask': DataStats(-1., 1., 0., 1.),
    'FireMask': DataStats(-1., 1., 0., 1.)
}


In [None]:
def random_crop_input_and_output_images(
    input_tensor: tf.Tensor,
    output_tensor: tf.Tensor,
    sample_size: int,
    num_input_channels: int,
    num_output_channels: int,
) -> Tuple[tf.Tensor, tf.Tensor]:
    combined = tf.concat([input_tensor, output_tensor], axis=2)
    combined = tf.image.random_crop(
        combined,
        [sample_size, sample_size, num_input_channels + num_output_channels])
    input_tensor = combined[:, :, 0:num_input_channels]
    output_tensor = combined[:, :, -num_output_channels:]
    return input_tensor, output_tensor


def center_crop_input_and_output_images(
    input_tensor: tf.Tensor,
    output_tensor: tf.Tensor,
    sample_size: int,
) -> Tuple[tf.Tensor, tf.Tensor]:

    central_fraction = sample_size / input_tensor.shape[0]
    input_tensor = tf.image.central_crop(input_tensor, central_fraction)
    output_tensor = tf.image.central_crop(output_tensor, central_fraction)
    return input_tensor, output_tensor


def resize_data(
    input_tensor: tf.Tensor,
    output_tensor: tf.Tensor,
    sample_size: int,
) -> Tuple[tf.Tensor, tf.Tensor]:

  SIZE = (sample_size, sample_size)
  input_tensor = tf.image.resize(input_tensor, SIZE)
  output_tensor = tf.image.resize(output_tensor, SIZE)
  return input_tensor, output_tensor

In [None]:
def _get_base_key(key: Text) -> Text:
    base_key = ''.join(filter(str.isalpha, key))
    if base_key in DATA_STATS:
        return base_key
    raise ValueError(
        f'No data statistics available for the requested key: {key}.')



def _clip_and_rescale(inputs: tf.Tensor, key: Text) -> tf.Tensor:
    base_key = _get_base_key(key)
    if base_key not in DATA_STATS:
        raise ValueError(
            f'No data statistics available for the requested key: {key}.')
    min_val, max_val, _, _ = DATA_STATS[base_key]
    inputs = tf.clip_by_value(inputs, min_val, max_val)
    if base_key=='PrevFireMask':
        rescaled_inputs = tf.math.add(inputs, 1) #
        # rescaled_inputs = tf.where(tf.equal(inputs, -1), tf.zeros_like(inputs), inputs)
    else:
      rescaled_inputs = (inputs - min_val) / (max_val - min_val)
    return tf.where(tf.math.is_finite(rescaled_inputs),
                    rescaled_inputs,
                    tf.zeros_like(rescaled_inputs))



def _clip_and_normalize(inputs: tf.Tensor, key: Text) -> tf.Tensor:
    base_key = _get_base_key(key)
    if base_key not in DATA_STATS:
        raise ValueError(
            f'No data statistics available for the requested key: {key}.')
    min_val, max_val, mean, std = DATA_STATS[base_key]
    inputs = tf.clip_by_value(inputs, min_val, max_val)
    if base_key=='PrevFireMask':
        normalized_inputs = tf.math.add(inputs, 1) #
        # normalized_inputs = tf.where(tf.equal(inputs, -1), tf.zeros_like(inputs), inputs)
    else:
        normalized_inputs = (inputs - min_val) / (max_val - min_val)
    normalized_inputs = tf.where(tf.math.is_finite(normalized_inputs),
                                 normalized_inputs,
                                 tf.zeros_like(normalized_inputs))
    return normalized_inputs - mean / std


def _get_features_dict(
    sample_size: int,
    features: List[Text],
) -> Dict[Text, tf.io.FixedLenFeature]:
  sample_shape = [sample_size, sample_size]
  features = set(features)
  columns = [
      tf.io.FixedLenFeature(shape=sample_shape, dtype=tf.float32)
      for _ in features
  ]
  return dict(zip(features, columns))


def _parse_fn(
    example_proto: tf.train.Example, data_size: int, sample_size: int,
    num_in_channels: int, clip_and_normalize: bool,
    clip_and_rescale: bool, random_crop: bool, center_crop: bool,
) -> Tuple[tf.Tensor, tf.Tensor]:

  if (random_crop and center_crop):
    raise ValueError('Cannot have both random_crop and center_crop be True')
  input_features, output_features = INPUT_FEATURES, OUTPUT_FEATURES
  feature_names = input_features + output_features
  features_dict = _get_features_dict(data_size, feature_names)
  features = tf.io.parse_single_example(example_proto, features_dict)

  if clip_and_normalize:
    inputs_list = [
        _clip_and_normalize(features.get(key), key) for key in input_features
    ]
  elif clip_and_rescale:
    inputs_list = [
        _clip_and_rescale(features.get(key), key) for key in input_features
    ]
  else:
    inputs_list = [features.get(key) for key in input_features]
  
  inputs_stacked = tf.stack(inputs_list, axis=0)
  input_img = tf.transpose(inputs_stacked, [1, 2, 0])

  outputs_list = [features.get(key) for key in output_features]
  assert outputs_list, 'outputs_list should not be empty'
  outputs_stacked = tf.stack(outputs_list, axis=0)

  outputs_stacked_shape = outputs_stacked.get_shape().as_list()
  assert len(outputs_stacked.shape) == 3, ('outputs_stacked should be rank 3'
                                            'but dimensions of outputs_stacked'
                                            f' are {outputs_stacked_shape}')
  output_img = tf.transpose(outputs_stacked, [1, 2, 0])
  output_img = tf.math.add(output_img, 1)
  # output_img = tf.where(tf.equal(output_img, -1), tf.zeros_like(output_img), output_img)


  if random_crop:
    input_img, output_img = random_crop_input_and_output_images(
        input_img, output_img, sample_size, num_in_channels, 1)
  if center_crop:
    input_img, output_img = center_crop_input_and_output_images(
        input_img, output_img, sample_size)
  
  input_img, output_img = resize_data(
        input_img, output_img, sample_size)

  return input_img, output_img[...,0]



def get_dataset(file_pattern: Text, data_size: int, sample_size: int,
                batch_size: int, num_in_channels: int, compression_type: Text,
                clip_and_normalize: bool, clip_and_rescale: bool,
                random_crop: bool, center_crop: bool) -> tf.data.Dataset:

    if clip_and_normalize and clip_and_rescale:
        raise ValueError('Cannot have both normalize and rescale.')

    dataset = tf.data.Dataset.list_files(file_pattern)

    dataset = dataset.interleave(
        lambda x: tf.data.TFRecordDataset(x, compression_type=compression_type),
        num_parallel_calls=tf.data.experimental.AUTOTUNE)

    dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

    def parse_fn(example_proto):
        return _parse_fn(
            example_proto, data_size, sample_size, num_in_channels, clip_and_normalize,
            clip_and_rescale, random_crop, center_crop
        )

    dataset = dataset.map(
        parse_fn, num_parallel_calls=tf.data.experimental.AUTOTUNE)

    if random_crop or center_crop:
        dataset = dataset.shuffle(buffer_size=1000)

    dataset = dataset.batch(batch_size, drop_remainder=True)
    dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

    return dataset

In [None]:
train_pattern = '/content/dataset/train/*'
eval_pattern = '/content/dataset/eval/*'
test_pattern = '/content/dataset/test/*'

In [None]:
side_length = 64 #length of the side of the square you select (so, e.g. pick 64 if you don't want any random cropping)
num_obs = 100 #batch size
batch_size = num_obs

train_dataset = get_dataset(
      train_pattern,
      data_size=64,
      sample_size=side_length,
      batch_size=num_obs,
      num_in_channels=12,
      compression_type=None,
      clip_and_normalize=True,
      clip_and_rescale=False,
      random_crop=True,
      center_crop=False)

eval_dataset = get_dataset(
      eval_pattern,
      data_size=64,
      sample_size=side_length,
      batch_size=num_obs,
      num_in_channels=12,
      compression_type=None,
      clip_and_normalize=True,
      clip_and_rescale=False,
      random_crop=False,
      center_crop=False)

test_dataset = get_dataset(
      test_pattern,
      data_size=64,
      sample_size=side_length,
      batch_size=num_obs,
      num_in_channels=12,
      compression_type=None,
      clip_and_normalize=True,
      clip_and_rescale=False,
      random_crop=True,
      center_crop=False)

In [None]:
import pandas as pd
import tensorflow as tf

data_iterator = train_dataset.as_numpy_iterator()
data_iterator2 = eval_dataset.as_numpy_iterator()
data_iterator3 = test_dataset.as_numpy_iterator()

data = next(data_iterator)
data2 = next(data_iterator2)
data3 = next(data_iterator)

data_2d = data[0].reshape((-1, 12))
data_2d_2 = data2[0].reshape((-1, 12))
data_2d_2_2 = data3[0].reshape((-1, 12))

label_2d = data[1].reshape((-1, 1))
label_2d_2 = data2[1].reshape((-1, 1))
label_2d_2_2 = data3[1].reshape((-1, 1))


df = pd.DataFrame(data_2d, columns=[f'channel_{i+1}' for i in range(12)])
df_val = pd.DataFrame(data_2d_2, columns=[f'channel_{i+1}' for i in range(12)])
df_test = pd.DataFrame(data_2d_2_2, columns=[f'channel_{i+1}' for i in range(12)])
df['labels'] = label_2d
# Display the dataframe
df

Unnamed: 0,channel_1,channel_2,channel_3,channel_4,channel_5,channel_6,channel_7,channel_8,channel_9,channel_10,channel_11,channel_12,labels
0,-0.981566,-2.512052,-2.433997,-30.571190,-29.283895,-1.662671,-0.388104,0.500954,-1.283540,-0.072844,-1.510136,1.0,1.0
1,-0.979019,-2.511947,-2.434091,-30.570742,-29.283724,-1.662676,-0.388104,0.499585,-1.283540,-0.077428,-1.508976,1.0,1.0
2,-0.978383,-2.511850,-2.434182,-30.570312,-29.283546,-1.662682,-0.388104,0.498683,-1.261892,-0.084774,-1.507847,1.0,1.0
3,-0.980293,-2.511759,-2.434274,-30.569904,-29.283361,-1.662687,-0.388104,0.498033,-1.255686,-0.143192,-1.506755,1.0,1.0
4,-0.981885,-2.511671,-2.434372,-30.569521,-29.283171,-1.662693,-0.388104,0.497518,-1.258108,-0.163917,-1.505704,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
409595,-0.836708,-2.381022,-2.429994,-30.917007,-29.555618,-1.670013,-0.387295,0.600641,-1.464799,-0.164106,-1.498710,1.0,1.0
409596,-0.837026,-2.380044,-2.430504,-30.916901,-29.555265,-1.670014,-0.387310,0.600468,-1.471258,-0.163702,-1.498078,1.0,1.0
409597,-0.837345,-2.379048,-2.431017,-30.916811,-29.554907,-1.670014,-0.387326,0.600371,-1.472570,-0.050549,-1.497453,1.0,1.0
409598,-0.837026,-2.378037,-2.431527,-30.916742,-29.554546,-1.670015,-0.387341,0.600323,-1.472570,-0.155723,-1.496843,1.0,1.0


#### Performing SMOTE for balancing dataset

In [None]:
from imblearn.over_sampling import SMOTE

smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(data_2d, label_2d)

## Neural Network Model Training

In [None]:
def weighted_cross_entropy(logits, onehot_labels):
    class_weights = tf.constant([0.4, 0.1, 0.5])
    if not logits.dtype == tf.float32:
        logits = tf.cast(logits, tf.float32)

    if not onehot_labels.dtype == tf.float32:
        onehot_labels = tf.cast(onehot_labels, tf.float32)

    #Obtain the logit label predictions and form a skeleton weight mask with the same shape as it
    logit_predictions = tf.argmax(logits, -1) 
    weight_mask = tf.zeros_like(logit_predictions, dtype=tf.float32)

    #Obtain the number of class weights to add to the weight mask
    num_classes = 3

    #Form the weight mask mapping for each pixel prediction
    for i in range(num_classes):
        binary_mask = tf.equal(logit_predictions, i) #Get only the positions for class i predicted in the logits prediction
        binary_mask = tf.cast(binary_mask, tf.float32) #Convert boolean to ones and zeros
        class_mask = tf.multiply(binary_mask, class_weights[i]) #Multiply only the ones in the binary mask with the specific class_weight
        weight_mask = tf.add(weight_mask, class_mask) #Add to the weight mask

    #Multiply the logits with the scaling based on the weight mask then perform cross entropy
    print(weight_mask)
    weight_mask = tf.expand_dims(weight_mask, 3) #Expand the fourth dimension to 1 for broadcasting
    logits_scaled = tf.multiply(logits, weight_mask)

    return tf.losses.softmax_cross_entropy(onehot_labels=onehot_labels, logits=logits_scaled)

In [None]:
class WeightedCCE(tf.keras.losses.Loss):
    def __init__(self, class_weight, from_logits=False, name='weighted_scce'):
        if class_weight is None or all(v == 1. for v in class_weight):
            self.class_weight = None
        else:
            self.class_weight = tf.convert_to_tensor(class_weight,
                dtype=tf.float32)
        self.reduction = tf.keras.losses.Reduction.NONE
        self.unreduced_scce = tf.keras.losses.SparseCategoricalCrossentropy(
            from_logits=from_logits, name=name,
            reduction=self.reduction)

    def __call__(self, y_true, y_pred, sample_weight=None):
        batch_size = y_true.shape[0]
        print(y_true.shape, )
        loss = self.unreduced_scce(y_true, y_pred, sample_weight)
        print(loss.shape)
        if self.class_weight is not None:
            y_true = tf.cast(y_true, tf.int32)
            weight_mask = tf.gather(self.class_weight, y_true)
            print(loss.shape, weight_mask.shape, weight_mask.shape)
            loss = tf.math.multiply(loss, weight_mask)
        
        loss = tf.reduce_sum(loss)*(1/batch_size)
        return loss

In [None]:
X = df.iloc[:, :-1]
y = df.iloc[:, -1]
X = X_resampled.copy()
y = y_resampled.copy()
y = tf.keras.utils.to_categorical(y)
label_2d_2_in = tf.keras.utils.to_categorical(label_2d_2)
label_2d_2_2_in = tf.keras.utils.to_categorical(label_2d_2_2)

model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(X.shape[1],)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(3, activation = 'sigmoid')
])

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
history = model.fit(X, y, epochs=35, batch_size=32, validation_data=(df_val,label_2d_2_in ))

# Evaluate the model on the test set
test_loss, test_mae = model.evaluate(df_test,label_2d_2_2_in)
print(test_loss)



Epoch 1/35
Epoch 2/35
Epoch 3/35
Epoch 4/35
Epoch 5/35
Epoch 6/35
Epoch 7/35
Epoch 8/35
Epoch 9/35
Epoch 10/35
Epoch 11/35
Epoch 12/35
Epoch 13/35
Epoch 14/35
Epoch 15/35
Epoch 16/35
Epoch 17/35
Epoch 18/35
Epoch 19/35
Epoch 20/35
Epoch 21/35
Epoch 22/35
Epoch 23/35
Epoch 24/35

In [None]:
# Evaluate the model on the test set
import numpy as np
from sklearn.metrics import precision_score, recall_score, classification_report
y_pred = model.predict(data_2d_2_2)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = label_2d_2_2

# Calculate precision and recall
# precision = precision_score(y_true_classes, y_pred_classes, average='weighted')
# recall = recall_score(y_true_classes, y_pred_classes, average='weighted')

target_names = ['unknown', 'no fire', 'too much fire']
print(classification_report(y_true_classes, y_pred_classes, target_names=target_names))

               precision    recall  f1-score   support

      unknown       0.28      0.41      0.33     13770
      no fire       0.97      0.78      0.87    391103
too much fire       0.03      0.54      0.07      4727

     accuracy                           0.77    409600
    macro avg       0.43      0.58      0.42    409600
 weighted avg       0.94      0.77      0.84    409600

