Each example is a 150 x 150 x 3 RGB digitized image 

For more information regarding this dataset see https://zenodo.org/record/7711810#.ZAm3k-zMKEA

In [None]:
conda install tensorflow

In [2]:
import os
import numpy as np
import zipfile
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator

ModuleNotFoundError: No module named 'tensorflow'

### Explore Images

In [None]:
base_dir = "./EuroSAT_RGB"
for dir in os.listdir(base_dir):
    print(f"there are {len(os.listdir(os.path.join(base_dir,dir)))} images in {dir}")

In [None]:
from tensorflow.keras.preprocessing.image import img_to_array, load_img

print("Printing random sample images")

nrows = 2
ncols = 4
index = np.random.randint(0,625)

fig = plt.gcf()
fig.set_size_inches(ncols*4, nrows*4)

for i, img_path in enumerate(os.listdir(base_dir)):
    img_dir = os.path.join(base_dir, img_path)
    sample_image = load_img(f"{os.path.join(img_dir, os.listdir(img_dir)[index])}")
    if i == 0:
        # Convert an image into its numpy array representation
        sample_array = img_to_array(sample_image)
        print(f"Each image has shape: {sample_array.shape}")
    
    sp = plt.subplot(nrows, ncols, i + 1)
    sp.title.set_text(img_path)
    sp.axis('Off') # Don't show axes (or gridlines)
    
    plt.imshow(sample_image)

### Split images for validation
This function will create a new folder with 500 random images per class for training and another with 125 images per class for validating the model.

In [None]:
import splitfolders
splitfolders.ratio(base_dir, output="eurosat_images", seed=1337, ratio=(.8, 0.2,0)) 

In [None]:
train_dir = r"eurosat_images/train"
validation_dir = r"eurosat_images/val"

#### Data preprocessing
Training and validation image data generators
Now that the data are split into training and validation sets I will create generators to feed the labelled images to the network.

#### Normalization and Augmentation
We can also normalize (rescale) the images during this step as well as expand the training set through augmentation to prevent overfitting on the original training set.

In [None]:
def train_val_generators(TRAINING_DIR, VALIDATION_DIR):
    
    # instantiate the image generator class with normalization and augmentation
    train_datagen = ImageDataGenerator(rescale=1/255,
                                      rotation_range=40,
                                      width_shift_range=0.2,
                                      height_shift_range=0.2,
                                      shear_range=0.2,
                                      zoom_range=0.2,
                                      horizontal_flip=True,
                                      fill_mode='nearest')
    
    # now use the flow from directory method
    train_generator = train_datagen.flow_from_directory(TRAINING_DIR,
                                                      batch_size=50,
                                                      class_mode='categorical',
                                                      target_size=(150,150))
    
    # repeat for validation set, no augmentation necessary
    validation_datagen = ImageDataGenerator(rescale=1/255)
    
    validation_generator = validation_datagen.flow_from_directory(VALIDATION_DIR,
                                                                 batch_size=25,
                                                                 class_mode='categorical',
                                                                 target_size=(150,150),
                                                                 shuffle=False)
    
    return train_generator, validation_generator

In [None]:
train_generator, validation_generator = train_val_generators(train_dir, validation_dir)

### Building and compiling the model
Now we will build the CNN architecture using the sequential API and compile it with the Adam optimizer and a very small learning rate.

- use 3x3 filters for convolving.

- use 2x2 for pooling

The model initially suffered from underfitting, with high bias resulting in poor training accuracy. To combat this I deepened the network with more convolutional and dense layers, increased the number of neurons in dense layers and trained the model for more epochs.

Dropout of 0.1 is used to prevent overfitting.

In [None]:
def create_model():
    
    model = tf.keras.models.Sequential([
        # first convolution
        tf.keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=(150,150,3)),
        tf.keras.layers.MaxPooling2D(2,2),
        # second convolution
        tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
        tf.keras.layers.MaxPooling2D(2,2),
        # third convolution
        tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
        tf.keras.layers.MaxPooling2D(2,2),
        # flattening layer
        tf.keras.layers.Flatten(),
        # dropout to prevent overfitting
        tf.keras.layers.Dropout(0.1),
        # first dense layer with 256 neurons
        tf.keras.layers.Dense(256, activation='relu'),
        # second dense layer with 512 neurons
        tf.keras.layers.Dense(512, activation='relu'),
        # one output layer with 8 neurons (one for each class) and softmax activation for multiclass classification
        tf.keras.layers.Dense(8, activation='softmax')
    ])
    
    model.compile(optimizer=Adam(learning_rate=0.0001),
                 loss = 'categorical_crossentropy',
                 metrics=['accuracy'])
    
    return model

### Training and testing the model accuracy
First extend the callback class to create a callback that stops the model training further if 90% validation accuracy has been reached.

In [None]:
class myCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if logs.get("val_accuracy") is not None and logs.get("val_accuracy") > 0.9:
            print("\n90% validation accuracy reached, stopping training.")
            self.model.stop_training = True

In [None]:
# instantiate the model and the callback
model = create_model()
callbacks = myCallback()

# train the model
history = model.fit(train_generator,
                   epochs=100,
                   validation_data=validation_generator,
                   callbacks=[callbacks])

### Plot model training history

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs=range(len(acc))

plt.plot(epochs, acc, 'b', label='Training accuracy')
plt.plot(epochs, val_acc, 'r', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend()
plt.show()

plt.plot(epochs, loss, 'b', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()

Validation accuracy and loss that tracks training accuracy and loss closely is a good sign that we are not suffering from overfitting.

### Confusion Matrix
lets determine whether the model is good at predicting land use and which classes the model is better/worse at predicting

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

Y_pred = model.predict(validation_generator)
y_pred = np.argmax(Y_pred, axis=1)
y_test = validation_generator.classes

labels = [x[3:] for x in os.listdir(base_dir)]
cm = confusion_matrix(y_test, y_pred)

disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)

print('Confusion Matrix')
plt.rcParams["figure.figsize"] = (25,5.5)
disp.plot(cmap=plt.cm.Blues,xticks_rotation=45)
plt.show()