# _Submitted By:_
# Archita Singla
# 102003575
# 3CO23

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tqdm import tqdm
import seaborn as sns

In [None]:
from tensorflow import keras
from keras import Input, Sequential, Model
from keras.regularizers import l2
from keras.layers import Dense, Flatten, InputLayer, Reshape, BatchNormalization, Dropout, Conv2D, MaxPooling2D
from tensorflow.keras.utils import plot_model, to_categorical

%matplotlib inline

In [None]:
categories = {'cloudy': 0, 'desert': 1, 'green_area': 2, 'water': 3}

In [None]:
def load_data(imgFolder, imgSize = (128, 128), scale = False):
  imgPath = []
  for dirName, _, fileNames in os.walk(imgFolder):
    for fileName in fileNames:
      imgPath.append(os.path.join(dirName, fileName))
  
  print("There are {} images in {}".format(len(imgPath), imgFolder))

  images = []
  labels = []

  for path in tqdm(imgPath):
    img = cv2.imread(path)
    img = cv2.resize(img, imgSize)
    img = np.array(img)

    images.append(img)
    labels.append(categories[path.split('/')[-2]])

  images = np.array(images)
  images = images.astype(np.int64)

  if scale:
    images = images / 255
  
  return imgPath, images, np.asarray(labels)

In [None]:
imgSize = (128, 128)
imgFolder = os.path.join('/', 'kaggle', 'input', 'satellite-image-classification')
imgPath, images, labels = load_data(imgFolder, imgSize = imgSize)

images.shape

In [None]:
plt.figure(figsize = (10, 10))
randInds = np.random.choice(len(imgPath), 49)
for i in range(49):
  plt.subplot(7, 7, i+1)
  plt.xticks([])
  plt.yticks([])
  plt.grid(False)
  imgInd = randInds[i]

  plt.imshow(np.squeeze(images[imgInd]), cmap = plt.cm.binary)

  label = list(categories.keys())[list(categories.values()).index(labels[imgInd])]

  plt.title(label)

In [None]:
labels_df = pd.DataFrame(labels)
labels_df.value_counts()

In [None]:
def build_model():
    cnnModel = Sequential([
        Conv2D(filters = 32, kernel_size = (3, 3), activation = 'relu', input_shape = images.shape[1:]),
        MaxPooling2D(2, 2),
        BatchNormalization(),
        Dropout(0.4),
        
        Conv2D(filters = 64, kernel_size = (3, 3), activation = 'relu', padding = 'same'),
        Conv2D(filters = 64, kernel_size = (3, 3), activation = 'relu', padding = 'same'),
        MaxPooling2D(2, 2),
        BatchNormalization(),
        Dropout(0.4),
        
        Conv2D(filters = 128, kernel_size = (3, 3), activation = 'relu', padding = 'same'),
        Conv2D(filters = 128, kernel_size = (3, 3), activation = 'relu', padding = 'same'),
        MaxPooling2D(2, 2),
        BatchNormalization(),
        Dropout(0.4),

        Conv2D(filters = 256, kernel_size = (3, 3), activation = 'relu', padding = 'same'),
        Conv2D(filters = 256, kernel_size = (3, 3), activation = 'relu', padding = 'same'),
        MaxPooling2D(2, 2),
        BatchNormalization(),
        Dropout(0.4),
        
        Conv2D(filters = 128, kernel_size = (3, 3), activation = 'relu', padding = 'same'),
        Conv2D(filters = 128, kernel_size = (3, 3), activation = 'relu', padding = 'same'),
        MaxPooling2D(2, 2),
        BatchNormalization(),
        Dropout(0.4),
        
        Conv2D(filters = 64, kernel_size = (3, 3), activation = 'relu', padding = 'same'),
        Conv2D(filters = 64, kernel_size = (3, 3), activation = 'relu', padding = 'same'),
        MaxPooling2D(2, 2),
        BatchNormalization(),
        Dropout(0.4),
        
        Flatten(),
        
        Dense(units = len(categories), activation = 'softmax')        
    ])
    
    return cnnModel

In [None]:
model = build_model()
model.predict(images[[0]])
print(model.summary())

In [None]:
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import shuffle

In [None]:
labEnc = LabelEncoder()
labels = labEnc.fit_transform(labels)
labels = to_categorical(labels)

In [None]:
labels[4567:4600]

In [None]:
model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

In [None]:
# images.shape
labels.shape

In [None]:
from sklearn.model_selection import train_test_split as tts

xTrain, xTest, yTrain, yTest = tts(images, labels, test_size = 0.30, random_state = 42)

xTest, xVal, yTest, yVal = tts(xTest, yTest, test_size = 0.5, random_state = 42)

In [None]:
print('Train:')
print(xTrain.shape, yTrain.shape)
print()
print('Test:')
print(xTest.shape, yTest.shape)
print()
print('Validation:')
print(xVal.shape, yVal.shape)

In [None]:
history = model.fit(xTrain, yTrain,
                   batch_size = 37,
                   epochs = 8,
                   verbose = 1,
                   validation_data = (xVal, yVal))

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend(['Train', 'Test'], loc = 'upper right')
plt.show()

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend(['Train', 'Test'], loc = 'upper right')
plt.show()

In [None]:
def make_predictions(img):
    img = img.reshape(1, 128, 128, 3)
    
    predic = model.predict(img)
    truePredic = [tf.argmax(pred) for pred in predic]
    truePredic = np.array(truePredic)
    
    return list(categories.keys())[list(categories.values()).index(truePredic)]

In [None]:
make_predictions(xTest[0])

In [None]:
yPred = model.predict(xTest)

In [None]:
yPred.shape

In [None]:
yPredd = [np.argmax(y) for y in yPred]
yTestt = [np.argmax(y) for y in yTest]

In [None]:
from sklearn.metrics import classification_report

In [None]:
print(classification_report(yTestt, yPredd))

In [None]:
categories.keys()

In [None]:
from sklearn.metrics import confusion_matrix, precision_score
from mlxtend.plotting import plot_confusion_matrix

cm = confusion_matrix(yTestt, yPredd)
plot_confusion_matrix(conf_mat = cm,
           figsize = (8, 7),
           class_names = categories.keys(),
           show_normed = True)

In [None]:
plt.figure(figsize = (10, 10))
randInds = np.random.choice(xTest.shape[0], 49)

for i in range(49):
    plt.subplot(7, 7, i + 1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    
    imgInd = randInds[i]
    
    plt.imshow(np.squeeze(xTest[imgInd]), cmap = plt.cm.binary)
    
    label = make_predictions(xTest[imgInd])
    plt.xlabel(label)
    

In [None]:
def evaluate_model(model):
    yPred = model.predict(xTest)
    yPredd = []
    yTestt = []
    
    for i in range(len(yPred)):
        yPredd.append(np.argmax(yPred[i]))
        yTestt.append(np.argmax(yTest[i]))
        
        print(classification_report(yTestt, yPredd))
        
        confMat = confusion_matrix(yTestt, yPredd) / np.sum(confusion_matrix(yTestt, yPredd))
        accuracy = precision_score(yTestt, yPredd, average = None)
        
        categList = ['cloudy', 'desert', 'green_area', 'water']
        
        print('Confusion Matrix:\n')
        print(confMat)
        
        hm = sns.heatmap(confMat, annot = True, fmt = '.2%', lw = 0.1, cmap = 'Blues', cbar = False)
        
        hm.set_title('Confusion Matrix\n')
        hm.set_xlabel('\nPredicted Classes')
        hm.set_ylabel('True Classes')
#         hm.xaxis.set_ticklabels(['cloudy', 'desert', 'green_area', 'water'])
#         hm.yaxis.set_ticklabels(['cloudy', 'desert', 'green_area', 'water'])
        
        plt.show()
        
        print('Accuracy For Each Class:\n')
        for i in range(len(accuracy)):
            print(f"{categList[i]}: {format(accuracy[i], '.2f')}")

In [None]:
evaluate_model(model)

In [None]:
model.save('satelliteImageClassifier.h5')