In [28]:
import zipfile
import os

# Specify the path to your train data zip file
zip_file_path = 'D:/train_dataset.zip'  # Change this to your actual zip file path
extract_to_path = 'D:/picss'
dir='D:/picss/train_dataset'


with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extract_to_path)


In [29]:
import pandas as pd
from sklearn.model_selection import train_test_split


In [31]:
# Specify the full path to your labels CSV file on the D drive
table_path = 'D:/train.csv'  # Change this to your actual CSV file path


labels_df = pd.read_csv(table_path)

# One-hot encode the labels
labels_df = pd.get_dummies(labels_df, columns=['Class'])

In [32]:
from PIL import Image
import numpy as np

def load_and_preprocess_image(image_path, target_size=(224, 224)):
    image = Image.open(image_path).convert('RGB')
    image = image.resize(target_size)
    image_array = np.array(image) / 255.0 
    return image_array

In [34]:
class MarsLandDataset:
    def __init__(self, image_dir, labels_df):
        self.image_dir = image_dir
        self.labels_df = labels_df
        self.image_names = labels_df['File Name'].tolist()
        self.labels = labels_df.drop(columns=['File Name']).values

    def __len__(self):
        return len(self.image_names)

    def __getitem__(self, idx):
        image_name = self.image_names[idx]
        label = self.labels[idx]
        image_path = os.path.join(self.image_dir, image_name)
        image = load_and_preprocess_image(image_path)
        return image, label
    

In [35]:

image_dir = dir
class_labels = labels_df.filter(like='Class').values.argmax(axis=1)
train_df, val_df= train_test_split(labels_df,test_size=0.2,stratify=class_labels)
train_dataset = MarsLandDataset(image_dir, train_df)
val_dataset=MarsLandDataset(image_dir,val_df)

In [36]:
import tensorflow as tf
from tensorflow.keras.models import Model,load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.applications import VGG16
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.applications import MobileNetV2
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

In [37]:
image, label = train_dataset[0]
print(f"Image shape: {image.shape}, Label: {label}")

Image shape: (224, 224, 3), Label: [False False False False  True False False False]


In [38]:
def create_batches(dataset,batch_size=32):
    dataset_size=len(dataset)
    indices=np.arange(dataset_size)
    np.random.shuffle(indices)
    
    for start_idx in range(0,dataset_size,batch_size):
        end_idx=min(start_idx + batch_size,dataset_size)
        batch_indices=indices[start_idx:end_idx]
        batch_images=[]
        batch_labels=[]
        for i in batch_indices:
            image,label=dataset[i]
            batch_images.append(image)
            batch_labels.append(label)
        yield np.array(batch_images), np.array(batch_labels)

In [39]:
batch_size = 32
for batch_images, batch_labels in create_batches(train_dataset, batch_size=batch_size):
    print(f"Batch images shape: {batch_images.shape}, Batch labels shape: {batch_labels.shape}")

Batch images shape: (32, 224, 224, 3), Batch labels shape: (32, 8)
Batch images shape: (32, 224, 224, 3), Batch labels shape: (32, 8)
Batch images shape: (32, 224, 224, 3), Batch labels shape: (32, 8)
Batch images shape: (32, 224, 224, 3), Batch labels shape: (32, 8)
Batch images shape: (32, 224, 224, 3), Batch labels shape: (32, 8)
Batch images shape: (32, 224, 224, 3), Batch labels shape: (32, 8)
Batch images shape: (32, 224, 224, 3), Batch labels shape: (32, 8)
Batch images shape: (32, 224, 224, 3), Batch labels shape: (32, 8)
Batch images shape: (32, 224, 224, 3), Batch labels shape: (32, 8)
Batch images shape: (32, 224, 224, 3), Batch labels shape: (32, 8)
Batch images shape: (32, 224, 224, 3), Batch labels shape: (32, 8)
Batch images shape: (32, 224, 224, 3), Batch labels shape: (32, 8)
Batch images shape: (32, 224, 224, 3), Batch labels shape: (32, 8)
Batch images shape: (32, 224, 224, 3), Batch labels shape: (32, 8)
Batch images shape: (32, 224, 224, 3), Batch labels shape: (32

In [40]:


base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = BatchNormalization()(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
predictions = Dense(labels_df.shape[1] - 1, activation='softmax')(x)


model = Model(inputs=base_model.input, outputs=predictions)


model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


loss_fn = tf.keras.losses.CategoricalCrossentropy()


optimizer = tf.keras.optimizers.Adam()

@tf.function
def train_step(batch_images, batch_labels):
    with tf.GradientTape() as tape:
        predictions = model(batch_images, training=True)
        loss = loss_fn(batch_labels, predictions)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    return loss
def update_learning_rate(optimizer, factor=0.2, min_lr=0.00001):
    old_lr = optimizer.learning_rate.numpy()
    new_lr = max(old_lr * factor, min_lr)
    optimizer.learning_rate.assign(new_lr)
    print(f"Learning rate updated from {old_lr} to {new_lr}")

batch_size = 32
num_epochs = 10

best_val_loss = float('inf')
early_stop_counter = 0
patience = 3
weights_path = 'model.weights.h5'

for epoch in range(num_epochs):
    total_loss = 0
    num_batches = 0

    for batch_images, batch_labels in create_batches(train_dataset, batch_size):
        loss = train_step(batch_images, batch_labels)
        total_loss += loss
        num_batches += 1

    avg_loss = total_loss / num_batches
    print(f'Epoch {epoch+1}/{num_epochs} completed - Average Loss: {avg_loss:.4f}')

  
    val_loss = 0
    val_steps = 0
    val_accuracy = tf.keras.metrics.CategoricalAccuracy()

    for batch_images, batch_labels in create_batches(val_dataset, batch_size=batch_size):
        predictions = model(batch_images, training=False)
        val_loss += loss_fn(batch_labels, predictions)
        val_accuracy.update_state(batch_labels, predictions)
        val_steps += 1

    val_loss /= val_steps
    val_acc = val_accuracy.result().numpy()
    print(f'Validation loss: {val_loss:.4f} - Validation accuracy: {val_acc:.4f}')

      


    if val_loss < best_val_loss:
       best_val_loss = val_loss
       early_stop_counter = 0
       model.save_weights(weights_path)  
       print("Model weights saved.")
    else:
       early_stop_counter += 1
       if early_stop_counter >= patience:
         print("Early stopping triggered.")
         break


    if val_loss > best_val_loss:
        update_learning_rate(optimizer)


if os.path.exists(weights_path):
    model.load_weights(weights_path)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    print("Model weights loaded and model compiled.")
else:
    print("Model weights file not found. Ensure the weights are saved before attempting to load.")

Epoch 1/10 completed - Average Loss: 0.6284
Validation loss: 0.2643 - Validation accuracy: 0.9114
Model weights saved.
Epoch 2/10 completed - Average Loss: 0.2580
Validation loss: 0.2276 - Validation accuracy: 0.9170
Model weights saved.
Epoch 3/10 completed - Average Loss: 0.2003
Validation loss: 0.2183 - Validation accuracy: 0.9210
Model weights saved.
Epoch 4/10 completed - Average Loss: 0.1418
Validation loss: 0.2287 - Validation accuracy: 0.9275
Learning rate updated from 0.0010000000474974513 to 0.00020000000949949026
Epoch 5/10 completed - Average Loss: 0.0974
Validation loss: 0.1977 - Validation accuracy: 0.9347
Model weights saved.
Epoch 6/10 completed - Average Loss: 0.0867
Validation loss: 0.1950 - Validation accuracy: 0.9363
Model weights saved.
Epoch 7/10 completed - Average Loss: 0.0885
Validation loss: 0.1923 - Validation accuracy: 0.9388
Model weights saved.
Epoch 8/10 completed - Average Loss: 0.0749
Validation loss: 0.1957 - Validation accuracy: 0.9371
Learning rate u

In [47]:

def extract_zip(file_path, extract_to='test_data'):
    with zipfile.ZipFile(file_path, 'r') as zip_ref:
        zip_ref.extractall(extract_to)


zip_file_path = 'D:/test_dataset.zip'  # Replace with the path to your test dataset zip file
extracted_folder = 'D:/picss/test_data/test_dataset'  # Destination folder
extract_zip(zip_file_path, extracted_folder)

def load_test_dataset(image_dir, image_size=(224, 224)):
    image_files = [f for f in os.listdir(image_dir) if os.path.isfile(os.path.join(image_dir, f))]
    print(f"Found {len(image_files)} files in the directory.")
    images = []
    valid_files = []
    for image_file in image_files:
        image_path = os.path.join(image_dir, image_file)
        image_array = load_and_preprocess_image(image_path, target_size=image_size)
        if image_array is not None:
            images.append(image_array)
            valid_files.append(image_file)
    return np.array(images), valid_files


test_images, test_image_files = load_test_dataset(extracted_folder)

if len(test_images) == 0:
    raise ValueError("No valid test images found. Please check the test dataset.")


test_predictions = model.predict(test_images)


if np.any(np.isnan(test_predictions)) or np.any(np.isinf(test_predictions)):
    raise ValueError("Predictions contain NaN or infinity values.")


predicted_classes = tf.argmax(test_predictions, axis=1).numpy()


class_names = list(labels_df.filter(like='Class').columns)
num_classes = len(class_names)

for i in range(min(2000, len(test_image_files))):
    image_file = test_image_files[i]
    predicted_label = class_names[predicted_classes[i]]
    print(f"Image File: {image_file}, Predicted Label: {predicted_label}")

Found 2000 files in the directory.
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 311ms/step
Image File: 1.jpg, Predicted Label: Class_slope streak
Image File: 10.jpg, Predicted Label: Class_crater
Image File: 100.jpg, Predicted Label: Class_other
Image File: 1000.jpg, Predicted Label: Class_other
Image File: 1001.jpg, Predicted Label: Class_other
Image File: 1002.jpg, Predicted Label: Class_other
Image File: 1003.jpg, Predicted Label: Class_other
Image File: 1004.jpg, Predicted Label: Class_other
Image File: 1005.jpg, Predicted Label: Class_other
Image File: 1006.jpg, Predicted Label: Class_slope streak
Image File: 1007.jpg, Predicted Label: Class_slope streak
Image File: 1008.jpg, Predicted Label: Class_other
Image File: 1009.jpg, Predicted Label: Class_crater
Image File: 101.jpg, Predicted Label: Class_other
Image File: 1010.jpg, Predicted Label: Class_crater
Image File: 1011.jpg, Predicted Label: Class_other
Image File: 1012.jpg, Predicted Label: Class_other
Image