# Satellite Image Classification

![](https://cff2.earth.com/uploads/2019/08/09193739/satellite-67718_1280-960x640.jpg)

[Image Source](https://www.earth.com/earthpedia-articles/how-many-satellites-in-space-do-we-know-about/)

The "Satellite Image Classification" dataset on Kaggle is a collection of satellite images labeled with classes such as "Cloudy", "Desert", "Green_Area", and "Water". The data is made up of satellite images of size 256x256, and can be used for computer vision tasks such as image classification.

# Satellite Image Classification Model

---

# GPU Being Used:

In [15]:
!nvidia-smi

Mon Jun 26 15:32:02 2023       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.98                 Driver Version: 535.98       CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                     TCC/WDDM  | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce RTX 3060      WDDM  | 00000000:0B:00.0  On |                  N/A |
| 42%   53C    P5              21W / 170W |   6348MiB / 12288MiB |      7%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

# Data Pre-processing:

In [16]:
import pandas as pd
import os

# Create an empty dataframe
data = pd.DataFrame(columns=['image_path', 'label'])

# Define the labels/classes
labels = {'data/cloudy' : 'Cloudy',
          'data/desert' : 'Desert',
          'data/green_area' : 'Green_Area',
          'data/water' : 'Water',
           }

In [17]:
# Loop over the train, test, and val folders and extract the image path and label
for folder in labels:
    for image_name in os.listdir(folder):
        image_path = os.path.join(folder, image_name)
        label = labels[folder]
        data = data.append({'image_path': image_path, 'label': label}, ignore_index=True)

  data = data.append({'image_path': image_path, 'label': label}, ignore_index=True)
  data = data.append({'image_path': image_path, 'label': label}, ignore_index=True)
  data = data.append({'image_path': image_path, 'label': label}, ignore_index=True)
  data = data.append({'image_path': image_path, 'label': label}, ignore_index=True)
  data = data.append({'image_path': image_path, 'label': label}, ignore_index=True)
  data = data.append({'image_path': image_path, 'label': label}, ignore_index=True)
  data = data.append({'image_path': image_path, 'label': label}, ignore_index=True)
  data = data.append({'image_path': image_path, 'label': label}, ignore_index=True)
  data = data.append({'image_path': image_path, 'label': label}, ignore_index=True)
  data = data.append({'image_path': image_path, 'label': label}, ignore_index=True)
  data = data.append({'image_path': image_path, 'label': label}, ignore_index=True)
  data = data.append({'image_path': image_path, 'label': label}, ignore_inde

In [18]:
# Save the data to a CSV file
data.to_csv('image_dataset.csv', index=False)

In [19]:
# pip install --upgrade "protobuf<=3.20.1"

In [20]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

In [21]:
# Load the dataset
df = pd.read_csv("image_dataset.csv")

In [22]:
df

Unnamed: 0,image_path,label
0,data/cloudy\train_10021.jpg,Cloudy
1,data/cloudy\train_10043.jpg,Cloudy
2,data/cloudy\train_10070.jpg,Cloudy
3,data/cloudy\train_10081.jpg,Cloudy
4,data/cloudy\train_10096.jpg,Cloudy
...,...,...
5626,data/water\SeaLake_995.jpg,Water
5627,data/water\SeaLake_996.jpg,Water
5628,data/water\SeaLake_997.jpg,Water
5629,data/water\SeaLake_998.jpg,Water


In [23]:
# Split the dataset into training and testing sets
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

In [24]:
# Pre-process the data
train_datagen = ImageDataGenerator(rescale=1./255,
                                   shear_range=0.2,
                                   zoom_range=0.2,
                                   horizontal_flip=True,
                                   rotation_range=45,
                                   vertical_flip=True,
                                   fill_mode='nearest')


test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_dataframe(dataframe=train_df,
                                                    x_col="image_path",
                                                    y_col="label",
                                                    target_size=(255, 255),
                                                    batch_size=32,
                                                    class_mode="categorical")

test_generator = test_datagen.flow_from_dataframe(dataframe=test_df,
                                                  x_col="image_path",
                                                  y_col="label",
                                                  target_size=(255, 255),
                                                  batch_size=32,
                                                  class_mode="categorical")


Found 4504 validated image filenames belonging to 4 classes.
Found 1127 validated image filenames belonging to 4 classes.


# Deep Learning Model

In [25]:
def build_model(activation, optimizer):
    model = Sequential()
    model.add(Conv2D(32, (3, 3), input_shape=(255, 255, 3), activation=activation))
    model.add(Conv2D(32, (3, 3), activation=activation, input_shape=(64, 64, 3)))
    model.add(MaxPooling2D(2, 2))
    model.add(Conv2D(64, (3, 3), activation=activation))
    model.add(MaxPooling2D(2, 2))
    model.add(Conv2D(128, (3, 3), activation=activation))
    model.add(MaxPooling2D(2, 2))
    model.add(Flatten())
    model.add(Dense(128, activation=activation))
    model.add(Dropout(0.5))
    model.add(Dense(4, activation='softmax'))

    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [26]:
from time import time
import tensorflow as tf
from tensorflow.keras.utils import plot_model
from tensorflow.keras.models import load_model
print(tf.config.list_physical_devices())

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [27]:
from time import time
import tensorflow as tf
import json
from tensorflow.keras.utils import plot_model
from tensorflow.keras.models import load_model

optimizers = {
    'adam': tf.keras.optimizers.Adam,
    'sgd': tf.keras.optimizers.SGD,
    'adamax': tf.keras.optimizers.Adamax
}

for activation in ['relu', 'elu', 'softmax']:
    for optimizer_name in ['adam', 'sgd', 'adamax']:
        for learning_rate in [0.001, 0.01, 0.5]:

            start = time()
            folder = f"{activation}_{optimizer_name}_{learning_rate}"
            try:
                os.mkdir(folder)
            except FileExistsError:
                pass

            optimizer = optimizers[optimizer_name](learning_rate = learning_rate)

            model = build_model(activation, optimizer)
            history = model.fit_generator(train_generator, epochs=5, validation_data=test_generator)

            num_samples = test_df.shape[0]
            score = model.evaluate(test_generator,
                           steps=num_samples//32+1,
                           )
            print(f"{score = }")

            plot_model(model, to_file=f'{folder}/cnn_model.png', show_shapes=True, show_layer_names=True)

            # save it as a h5 file
            model.save(f'{folder}/Model.h5')

            with open(f"{folder}/history.json", "w") as f:
                json.dump(history.history, f)
            end = time()
            print(f"{end-start = }")
            with open(f"{folder}/time.txt", "w") as f:
                f.write(str(end-start))

  history = model.fit_generator(train_generator, epochs=5, validation_data=test_generator)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
score = [0.30814942717552185, 0.8589174747467041]
You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model to work.
end-start = 173.66261076927185
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
score = [1.2424670457839966, 0.4480922818183899]
You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model to work.
end-start = 164.75992274284363
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
score = [1.4145185947418213, 0.20141971111297607]
You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model to work.
end-start = 165.55054306983948
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
score = [0.6020866632461548, 0.7107365131378174]
You must install pydot (`pip install pydot`) and install graph