In [2]:
import os
import numpy as np
import pandas as pd
import cv2
import tensorflow as tf
from tensorflow.keras import layers, models

# Constants
IMAGE_SIZE = (128, 128)  # Resize images to 128x128
BATCH_SIZE = 32

# Load labels from the CSV file
def load_labels_from_csv(csv_file):
    df = pd.read_csv(csv_file)
    filenames = df['image_name'].values
    classes = df['tags'].values
    return filenames, classes

# Load images based on filenames from CSV
def load_images_from_folder(folder, filenames):
    images = []
    i =  0
    for filename in filenames:
        # if i > 1000:
        #     break
        if filename.endswith('.jpg'):
            img_path = os.path.join(folder, filename)
            img = cv2.imread(img_path)
            if img is not None:
                img = cv2.resize(img, IMAGE_SIZE)
                images.append(img)
                
    return np.array(images)




In [3]:
from sklearn.preprocessing import LabelEncoder

# Load data
filenames, y = load_labels_from_csv('/Users/angel/OneDrive/Desktop/CS 4100/Satellite_Image_Classifier/data/train_v2.csv')
filenames = [name + '.jpg' for name in filenames]
image_folder = '/Users/angel/OneDrive/Desktop/CS 4100/train-jpg/train-jpg/'
X = []

X = load_images_from_folder(image_folder, filenames)
print(np.shape(filenames))

# Normalize the images
X = X.astype('float32') / 255.0

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)


print('X: ', len(X), '\nY: ', len(y))

(40479,)
X:  40479 
Y:  40479


In [4]:
num_classes = len(np.unique(y_encoded))
print(num_classes)

449


In [12]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# # Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 3)),
    layers.MaxPooling2D((2, 2)),

    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),

    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(len(np.unique(y)), activation='softmax')  # Use the number of unique classes
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.fit(X_train, y_train, epochs=7, batch_size=BATCH_SIZE, validation_split=0.2)


predictions = model.predict(X_test)
predicted_classes = np.argmax(predictions, axis=1)

accuracy = accuracy_score(predicted_classes, y_test)

print(f"Test accuracy: {accuracy}")


Epoch 1/7
[1m810/810[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 82ms/step - accuracy: 0.3629 - loss: 3.1122 - val_accuracy: 0.4368 - val_loss: 2.5792
Epoch 2/7
[1m810/810[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 82ms/step - accuracy: 0.4771 - loss: 2.3520 - val_accuracy: 0.4908 - val_loss: 2.3555
Epoch 3/7
[1m810/810[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 82ms/step - accuracy: 0.5006 - loss: 2.1978 - val_accuracy: 0.5032 - val_loss: 2.2343
Epoch 4/7
[1m810/810[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m69s[0m 86ms/step - accuracy: 0.5192 - loss: 2.0638 - val_accuracy: 0.5103 - val_loss: 2.2325
Epoch 5/7
[1m810/810[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m68s[0m 84ms/step - accuracy: 0.5329 - loss: 1.9747 - val_accuracy: 0.5235 - val_loss: 2.1595
Epoch 6/7
[1m810/810[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 81ms/step - accuracy: 0.5536 - loss: 1.8554 - val_accuracy: 0.5293 - val_loss: 2.1142
Epoch 7/7
[1m810/810

In [14]:
# Example of printing a prediction
for i in range(5):  # Print first 5 predictions
    print(f"Predicted class for test image {i}: {label_encoder.inverse_transform([predicted_classes[i]])} (Actual class: {label_encoder.inverse_transform([y_test[i]])})")

Predicted class for test image 0: ['clear primary'] (Actual class: ['clear primary road'])
Predicted class for test image 1: ['clear primary water'] (Actual class: ['clear primary water'])
Predicted class for test image 2: ['agriculture clear habitation primary road'] (Actual class: ['clear conventional_mine habitation primary road water'])
Predicted class for test image 3: ['cloudy'] (Actual class: ['cloudy'])
Predicted class for test image 4: ['agriculture clear habitation primary road'] (Actual class: ['agriculture clear cultivation cultivation habitation primary road water'])
