## Fire Image Classification

- Download this dataset -> https://github.com/cair/Fire-Detection-Image-Dataset

- Create a dataframe like this for each folder -> Assign label 1 for fire images and label 0 for normal images

<img src="sample_df.png" width="600" height="600">

- Merge dataframes -> then save the total df as a csv file

- Train a CNN + MLP model while using `.fit_generator` way (Do remember to Resize all images)

## To make the model better:

- Upsample minority class (fire images) -> https://elitedatascience.com/imbalanced-classes?_ga=2.44533796.1624997989.1593199508-1623274989.1547664151

- Do Data Augmentation for each image 

- Hyper-parameters tuning

In [1]:
%cd images

/Users/alannanoguchi/dev/DS/DS2.2/DS-2.2-Deep-Learning/Final_Project/images


In [34]:
import glob
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from PIL import Image
from keras.preprocessing.image import img_to_array
import numpy as np
from keras.models import Sequential
from keras.utils import np_utils
from keras.layers.core import Dense, Activation
from keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dropout
from keras.preprocessing.image import ImageDataGenerator

In [35]:
files = glob.glob("Fire_images/*.*")
ls_fire = []
for filepath in files:
    ls_fire.append(['Fire_images', os.path.basename(filepath), '1'])

In [36]:
files = glob.glob("Normal_images/*.*")
ls_normal = []
for filepath in files:
    ls_normal.append(['Normal_images', os.path.basename(filepath), '0'])

In [37]:
df_fire = pd.DataFrame(ls_fire, columns=['Folder', 'filename', 'label'])

In [38]:
df_normal = pd.DataFrame(ls_normal, columns=['Folder', 'filename', 'label'])

In [39]:
df_fire.head()

Unnamed: 0,Folder,filename,label
0,Fire_images,dsc_01001.jpg,1
1,Fire_images,burning-charcoal-briquettes.jpg,1
2,Fire_images,Chimney-Fire3.jpg,1
3,Fire_images,canada-wildfire.jpg,1
4,Fire_images,Living_Room_Camp_Fire.jpg,1


In [40]:
df_normal.head()

Unnamed: 0,Folder,filename,label
0,Normal_images,1526t.jpg,0
1,Normal_images,theoffice3.jpg,0
2,Normal_images,23e90181fcef550cffae7c4ff77e566d.jpg,0
3,Normal_images,tumblr_nhcrcizCaH1ty53xvo1_1280.jpg,0
4,Normal_images,day7_8.jpg,0


In [41]:
df = pd.concat([df_fire, df_normal]).reset_index().drop('index', axis=1) # combining the dataframes
df

Unnamed: 0,Folder,filename,label
0,Fire_images,dsc_01001.jpg,1
1,Fire_images,burning-charcoal-briquettes.jpg,1
2,Fire_images,Chimney-Fire3.jpg,1
3,Fire_images,canada-wildfire.jpg,1
4,Fire_images,Living_Room_Camp_Fire.jpg,1
...,...,...,...
639,Normal_images,norway.jpg,0
640,Normal_images,AAEAAQAAAAAAAAJqAAAAJDAwNDUwNDAwLWUwYTAtNDlmNy...,0
641,Normal_images,open-offices.jpg,0
642,Normal_images,norway-old-bergen-museum.jpg,0


In [42]:
# split into train and test
df_train, df_test = train_test_split(df, test_size=0.2, random_state=0)

In [43]:
img = Image.open(os.path.join(df['Folder'][640], df['filename'][640]))
print(img.size)
image_red = img.resize((1024, 1024))   # resizes all images to 1024x1024
image = img_to_array(image_red)     # takes image object and converts to numpy nums

(558, 320)


In [44]:
# Check that image is an array
image

array([[[117., 122., 125.],
        [117., 122., 125.],
        [118., 123., 126.],
        ...,
        [ 79.,  80.,  72.],
        [ 80.,  81.,  73.],
        [ 80.,  81.,  73.]],

       [[117., 122., 125.],
        [117., 122., 125.],
        [118., 123., 126.],
        ...,
        [ 79.,  80.,  72.],
        [ 80.,  81.,  73.],
        [ 80.,  81.,  73.]],

       [[117., 122., 125.],
        [117., 122., 125.],
        [118., 123., 126.],
        ...,
        [ 79.,  80.,  72.],
        [ 80.,  81.,  73.],
        [ 80.,  81.,  73.]],

       ...,

       [[ 61.,  60.,  66.],
        [ 61.,  60.,  66.],
        [ 51.,  50.,  56.],
        ...,
        [ 15.,  15.,  15.],
        [ 14.,  14.,  14.],
        [ 14.,  14.,  14.]],

       [[ 61.,  60.,  66.],
        [ 61.,  60.,  66.],
        [ 51.,  50.,  56.],
        ...,
        [ 15.,  15.,  15.],
        [ 14.,  14.,  14.],
        [ 14.,  14.,  14.]],

       [[ 61.,  60.,  66.],
        [ 61.,  60.,  66.],
        [ 51.,  

In [45]:
# check image is resized
image_red.size

(1024, 1024)

In [46]:
# check image shape
image.shape

(1024, 1024, 3)

## Merge the two DataFrames (fire and normal)

In [49]:
def data_gen(df, batch_size):
    
    while True:
        x_batch = np.zeros((batch_size, 1024, 1024, 3))
        y_batch = np.zeros((batch_size, 1))


        # Run through each chunk
        for chunk_index in range(len(df)//batch_size):
            start_chunk_index = chunk_index*batch_size
            end_chunk_index = (chunk_index+1)*batch_size

#             print(f'Chunk index: {chunk_index}' )

            i = 0
            for folder, filename, label in zip(df['Folder'].values[start_chunk_index:end_chunk_index], df['filename'].values[start_chunk_index:end_chunk_index], df['label'].values[start_chunk_index:end_chunk_index]):
#                 print(f'Looking at folder: {folder}, filename:{filename}, label:{label}')
                img = Image.open(os.path.join(folder, filename))
                image_red = img.resize((1024, 1024))
                try:
                    x_batch[i] = img_to_array(image_red)
                except:
                    print(f'this file is currupted: {os.path.join(folder, filename)}')
                    os.remove(os.path.join(folder, filename))

                y_batch[i] = label
                i += 1


            yield x_batch, np_utils.to_categorical(y_batch, 2)     # 2 for the number of classes we have
        

In [30]:
df['filename']

0                                          dsc_01001.jpg
1                        burning-charcoal-briquettes.jpg
2                                      Chimney-Fire3.jpg
3                                    canada-wildfire.jpg
4                              Living_Room_Camp_Fire.jpg
                             ...                        
639                                           norway.jpg
640    AAEAAQAAAAAAAAJqAAAAJDAwNDUwNDAwLWUwYTAtNDlmNy...
641                                     open-offices.jpg
642                         norway-old-bergen-museum.jpg
643    romantic-bedrooms-bedrooms-and-red-color-schem...
Name: filename, Length: 644, dtype: object

 # Define the model

In [22]:
model = Sequential()

model.add(Conv2D(32, kernel_size=(3, 3),   
                 activation='relu',
                 input_shape=(1024, 1024, 3)))

model.add(Conv2D(32, kernel_size=(3,3), activation='relu'))  
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(2, activation='sigmoid'))

# compile the model, this is necessary before fitting
model.compile(loss='binary_crossentropy',
             optimizer='adam',
             metrics = ['accuracy'])

# Train the model while using `.fit_generator`

In [24]:
batch_size = 50
model.fit_generator(generator=data_gen(df_train, batch_size=batch_size), steps_per_epoch=len(df_train) // batch_size, epochs=2, verbose=1)

Epoch 1/2
Epoch 2/2


<keras.callbacks.callbacks.History at 0x7f8c7748c4d0>

## Accuracy is 73.5 with a loss of 4.26 ( not that great )

In [25]:
model.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_9 (Conv2D)            (None, 1022, 1022, 32)    896       
_________________________________________________________________
conv2d_10 (Conv2D)           (None, 1020, 1020, 32)    9248      
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 510, 510, 32)      0         
_________________________________________________________________
dropout_9 (Dropout)          (None, 510, 510, 32)      0         
_________________________________________________________________
flatten_5 (Flatten)          (None, 8323200)           0         
_________________________________________________________________
dense_9 (Dense)              (None, 64)                532684864 
_________________________________________________________________
dropout_10 (Dropout)         (None, 64)               

https://www.pyimagesearch.com/2018/12/24/how-to-use-keras-fit-and-fit_generator-a-hands-on-tutorial/

# To improve performance, use data augmentation

- Use the training make some transformations on the images
    - For example: image rotation, rescaling, horizontal/vertical flip, zooming, etc.

In [47]:
train_datagen = ImageDataGenerator( rotation_range = 30,     # rotation_range randomly rotates images
                            width_shift_range = 0.2,   # translates images vertically
                            height_shift_range = 0.2,  # translates images horizontally
                            horizontal_flip = True,)   # randomly flips half of the images horizontally

train_datagen.fit(df_train)

ValueError: could not convert string to float: 'Normal_images'