# Wildfire Detection DS420
### Authors: Aj Botticelli & Yusuke Satani
### Date: Spring 2023
### Goal: Using satellite image data from Kaggle (from the Canadian Govt), predict wildfire occurances in a model integrated with spark for real-time parsing
Based from https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/image/ImageDataGenerator

https://vijayabhaskar96.medium.com/tutorial-image-classification-with-keras-flow-from-directory-and-generators-95f75ebe5720

https://towardsdatascience.com/how-to-train-your-neural-networks-in-parallel-with-keras-and-apache-spark-ea8a3f48cae6

https://towardsdatascience.com/deep-learning-with-apache-spark-part-2-2a2938a36d35

https://github.com/HDFGroup/hdf5-spark-connector

https://towardsdatascience.com/implementing-alexnet-cnn-architecture-using-tensorflow-2-0-and-keras-2113e090ad98

# Imports & Pathfinding

In [None]:
import numpy as np

import matplotlib.pyplot as plt

import pandas as pd

import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import pathlib

from PIL import ImageFile

In [None]:
pathTrain = "../input/wildfire-prediction-dataset/train"
pathVal = "../input/wildfire-prediction-dataset/valid"
pathTest = "../input/wildfire-prediction-dataset/test"

# Offline Model: Base CNN

## Preproccessing

In [None]:
trainDatagen = ImageDataGenerator(rescale=1./255)
testDatagen = ImageDataGenerator(rescale=1./255)
validationDatagen = ImageDataGenerator(rescale=1./255)

In [None]:
#Target size chosen arbitrarily along with batch size
trainIter = trainDatagen.flow_from_directory(
    pathTrain,
    target_size=(256, 256),
    batch_size=16
)

In [None]:
testIter = testDatagen.flow_from_directory(
    pathTest,
    target_size=(256, 256),
    batch_size=16
)

In [None]:
validIter = validationDatagen.flow_from_directory(
    pathVal,
    target_size=(256, 256),
    batch_size=16
)

In [None]:
#the elements in each category
from collections import Counter
counter = Counter(trainIter.classes)
print(counter.items())

In [None]:
trainIter.reset() 

batchX, batchy = trainIter.next()
print("Batch shape of X = " + str(batchX.shape) + ", Xmin = "+str(batchX.min()) + ", Xmax = "+str(batchX.max()))
print("Batch shape of Y = " + str(batchy.shape) + ", ymin = "+str(batchy.min()) + ", ymax = "+str(batchy.max()))

In [None]:
print(trainIter.class_indices)

## Modeling
-Using first a simple self made CNN

-Then using a CNN following AlexNet structure

In [None]:
# Dimensions of our images.
img_width, img_height = 256, 256 
input_shape = (img_width, img_height, 3) # each image has 3 channels

# Epochs
epochsAmt = 10

# Number of images to read every iteration 
batch_size = 64

# Total number of images for training and testing
train_num = 30250 
test_num = 6300 
valid_num = 6300 

# Total number of steps in each epoch
train_steps = train_num // batch_size # integer division
test_steps  = test_num  // batch_size 

# Total number of classes
num_classes = 2


In [None]:
model = keras.Sequential(name = 'Wildfire-CNN')

model.add(layers.Conv2D(8, kernel_size = (3,3),activation = 'relu', input_shape = input_shape))

model.add(layers.Conv2D(16, kernel_size = (3,3),activation = 'relu'))

model.add(layers.MaxPooling2D(pool_size = (2,2)))

model.add(layers.Dropout(0.25))

model.add(layers.Flatten())

model.add(layers.Dense(32,activation = 'relu'))

model.add(layers.Dropout(0.25))

model.add(layers.Dense(2,activation = 'softmax'))

model.summary()

In [None]:
opt = tf.keras.optimizers.Adam(learning_rate=0.001, beta_1 = 0.9, beta_2=0.999)
model.compile(optimizer = opt, loss = 'categorical_crossentropy', metrics = ['accuracy'])

In [None]:
ImageFile.LOAD_TRUNCATED_IMAGES = True

history = history = model.fit(trainIter,
                    batch_size=batch_size,
                    epochs=epochsAmt,
                    validation_data=(validIter),
                    steps_per_epoch = train_steps,
                    validation_steps = test_steps)

In [None]:
result = model.evaluate(testIter)

In [None]:
plt.plot(history.history['accuracy'], label = 'train',)
plt.plot(history.history['val_accuracy'], label = 'valid')

plt.legend()
plt.xlabel('Epoch')
plt.ylabel('Accuracy Scores')

plt.show()

In [None]:
testIter.reset() 
images, labels =  testIter.next()
y_pred = model.predict(images)


In [None]:
y_pred

In [None]:
y_pred[1].argmax()

In [None]:
labels

In [None]:
labels[1].argmax()

In [None]:
testIter.reset() 
images, labels =  testIter.next()
y_pred = model.predict(images)

for idx, img in enumerate(images):
    plt.figure(figsize = (2, 2))
    pred_label=str(y_pred[idx].argmax())
    true_label=str(labels[idx].argmax())
    plt.title('Sample: '+str(idx) + "\n[True]: "+true_label + " \n[Pred]: "+ pred_label)
    plt.imshow(img)
    plt.show()


Old Code

y_pred = model.predict(testIter)
testIter.reset() 
images, labels =  testIter.next()

for idx, img in enumerate(images):
    plt.figure(figsize = (2, 2))
    pred_label=str(y_pred[idx].argmax())
    true_label=str(y_pred.argmax())
    if(pred_label!= true_label):
        plt.title('Sample: '+str(idx) + "\n[True]: "+true_label + " \n[Pred]: "+ pred_label)
        plt.imshow(img)
        plt.show()


# Offline Model: AlexNet CNN

## Preproccessing

In [None]:
trainDatagen = ImageDataGenerator(rescale=1./227) #May need to rescale to 255
testDatagen = ImageDataGenerator(rescale=1./227)
validationDatagen = ImageDataGenerator(rescale=1./227)

In [None]:
trainIter = trainDatagen.flow_from_directory(
    pathTrain,
    target_size=(227, 227),
    batch_size=32 #Tune this parameter
)

In [None]:
testIter = testDatagen.flow_from_directory(
    pathTest,
    target_size=(227, 227),
    batch_size=32
)

In [None]:
validIter = validationDatagen.flow_from_directory(
    pathVal,
    target_size=(227, 227),
    batch_size=32
)

In [None]:
#the elements in each category
from collections import Counter
counter = Counter(trainIter.classes)
print(counter.items())

In [None]:
trainIter.reset() 

batchX, batchy = trainIter.next()
print("Batch shape of X = " + str(batchX.shape) + ", Xmin = "+str(batchX.min()) + ", Xmax = "+str(batchX.max()))
print("Batch shape of Y = " + str(batchy.shape) + ", ymin = "+str(batchy.min()) + ", ymax = "+str(batchy.max()))

In [None]:
print(trainIter.class_indices)

### AlexNet Model

In [None]:
# Dimensions of our images.
img_width, img_height = 227, 227 
input_shape = (img_width, img_height, 3) # each image has 3 channels

# Epochs
epochsAmt = 10

# Number of images to read every iteration 
batch_size = 32 #Tune this Parameter

# Total number of images for training and testing
train_num = 30250 
test_num = 6300 
valid_num = 6300 

# Total number of steps in each epoch
train_steps = train_num // batch_size # integer division
test_steps  = test_num  // batch_size 

# Total number of classes
num_classes = 2

In [None]:
model = keras.Sequential(name = 'Wildfire-AlexNet')

model.add(layers.Conv2D(filters = 96, kernel_size = (11,11), strides = (4,4), activation = 'relu', input_shape = input_shape))

model.add(layers.MaxPooling2D(pool_size = (3,3), strides = (2,2)))

model.add(layers.Conv2D(filters = 256, kernel_size = (5,5), strides = (1,1), activation = 'relu', padding = 'same'))

model.add(layers.MaxPooling2D(pool_size = (3,3), strides = (2,2)))

model.add(layers.Conv2D(filters = 384, kernel_size = (3,3), strides = (1,1), activation = 'relu', padding = 'same'))

model.add(layers.Conv2D(filters = 384, kernel_size = (3,3), strides = (1,1), activation = 'relu', padding = 'same'))

model.add(layers.Conv2D(filters = 256, kernel_size = (3,3), strides = (1,1), activation = 'relu', padding = 'same'))

model.add(layers.MaxPooling2D(pool_size = (3,3), strides = (2,2)))

model.add(layers.Flatten())

model.add(layers.Dense(4096, activation = 'relu'))

model.add(layers.Dropout(0.5))

model.add(layers.Dense(4096, activation = 'relu'))

model.add(layers.Dropout(0.5))

model.add(layers.Dense(2, activation = 'softmax'))

model.summary()

In [None]:
model.compile(optimizer = tf.optimizers.SGD(lr=0.001), loss = 'categorical_crossentropy', metrics = ['accuracy'])
model.summary()

In [None]:
ImageFile.LOAD_TRUNCATED_IMAGES = True #About 40 Minutes Per epoch

history =  model.fit(trainIter,
                    batch_size=batch_size,
                    epochs=epochsAmt,
                    validation_data=(validIter),
                    steps_per_epoch = train_steps,
                    validation_steps = test_steps)

In [None]:
result = model.evaluate(testIter)

In [None]:
plt.plot(history.history['accuracy'], label = 'train',)
plt.plot(history.history['val_accuracy'], label = 'valid')

plt.legend()
plt.xlabel('Epoch')
plt.ylabel('Accuracy Scores')

plt.show()

In [None]:
testIter.reset() 
images, labels =  testIter.next()
y_pred = model.predict(images)

for idx, img in enumerate(images):
    plt.figure(figsize = (2, 2))
    pred_label=str(y_pred[idx].argmax())
    true_label=str(labels[idx].argmax())
    plt.title('Sample: '+str(idx) + "\n[True]: "+true_label + " \n[Pred]: "+ pred_label)
    plt.imshow(img)
    plt.show()
