In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
!ls /content/drive/MyDrive/dataset_split/train/class1
!ls /content/drive/MyDrive/dataset_split/validation/class1


ls: cannot access '/content/drive/MyDrive/dataset_split/train/class1': No such file or directory
ls: cannot access '/content/drive/MyDrive/dataset_split/validation/class1': No such file or directory


In [3]:
import os
import shutil
import random
from tqdm import tqdm

def split_data(source_dir, train_dir, val_dir, split_ratio=0.8):
    if not os.path.exists(train_dir):
        os.makedirs(train_dir)
    if not os.path.exists(val_dir):
        os.makedirs(val_dir)

    for class_name in tqdm(os.listdir(source_dir), desc="Processing classes"):
        class_path = os.path.join(source_dir, class_name)
        if os.path.isdir(class_path):
            train_class_path = os.path.join(train_dir, class_name)
            val_class_path = os.path.join(val_dir, class_name)
            os.makedirs(train_class_path, exist_ok=True)
            os.makedirs(val_class_path, exist_ok=True)

            files = [f for f in os.listdir(class_path) if os.path.isfile(os.path.join(class_path, f))]
            random.shuffle(files)
            split_index = int(len(files) * split_ratio)

            train_files = files[:split_index]
            val_files = files[split_index:]

            for file in train_files:
                shutil.copy2(os.path.join(class_path, file), os.path.join(train_class_path, file))
            for file in val_files:
                shutil.copy2(os.path.join(class_path, file), os.path.join(val_class_path, file))

            # Logging
            print(f"Class '{class_name}': {len(train_files)} training files, {len(val_files)} validation files")

# Paths
source_dir = '/content/drive/MyDrive/dataset/train'
train_dir = '/content/drive/MyDrive/dataset_split/train'
val_dir = '/content/drive/MyDrive/dataset_split/validation'

# Split
split_data(source_dir, train_dir, val_dir, split_ratio=0.8)





Processing classes:  25%|██▌       | 1/4 [00:06<00:20,  6.94s/it]

Class 'class3': 24 training files, 6 validation files


Processing classes:  50%|█████     | 2/4 [00:10<00:09,  4.79s/it]

Class 'class2': 9 training files, 3 validation files


Processing classes:  75%|███████▌  | 3/4 [00:20<00:07,  7.15s/it]

Class 'class4': 36 training files, 10 validation files


Processing classes: 100%|██████████| 4/4 [00:24<00:00,  6.06s/it]

Class 'class1': 12 training files, 4 validation files





In [4]:
# Verify the directories in train and validation
!ls /content/drive/MyDrive/dataset_split/train
!ls /content/drive/MyDrive/dataset_split/validation

# List a few files from one class directory in train and validation
!ls /content/drive/MyDrive/dataset_split/train/class1 | head -n 10
!ls /content/drive/MyDrive/dataset_split/validation/class1 | head -n 10


class1	class2	class3	class4
class1	class2	class3	class4
20200919.15-00-00.jpg
20200919.16-00-00.jpg
20200919.17-00-00.jpg
20200919.23-00-00.jpg
20200920.14-00-00.jpg
20200920.15-00-00.jpg
20200920.17-00-00.jpg
20200920.18-00-00.jpg
20200920.19-00-00.jpg
20200920.21-00-00.jpg
20200919.14-00-00.jpg
20200919.22-00-00.jpg
20200920.16-00-00.jpg
20200920.20-00-00.jpg


In [5]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

val_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    '/content/drive/MyDrive/dataset_split/train',
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical'
)

val_generator = val_datagen.flow_from_directory(
    '/content/drive/MyDrive/dataset_split/validation',
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical'
)


Found 81 images belonging to 4 classes.
Found 23 images belonging to 4 classes.


In [6]:
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.layers import Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator


In [7]:



train_datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

val_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    '/content/drive/MyDrive/dataset_split/train',
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    shuffle=True  # Shuffle the training images
)

val_generator = val_datagen.flow_from_directory(
    '/content/drive/MyDrive/dataset_split/validation',
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    shuffle=False  # Do not Shuffle the validation images
)


Found 81 images belonging to 4 classes.
Found 23 images belonging to 4 classes.


In [8]:
base_model = MobileNetV2(weights='imagenet', include_top=False)
# Load the pre-trained ResNet50 model without the top (fully connected) layers
#base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
#x = Dropout(0.5)(x)  # Dropout to prevent overfitting
predictions = Dense(4, activation='softmax')(x)  # Assuming 4 classes

# Combine base model and custom layers into a new model
model = Model(inputs=base_model.input, outputs=predictions)

# Freeze the layers of the base model
for layer in base_model.layers:
    layer.trainable = False

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])


  base_model = MobileNetV2(weights='imagenet', include_top=False)


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [9]:
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint

# Define the path in Google Drive where you want to save the model
model_save_path = '/content/drive/MyDrive/sky_classification_model_best_acc.keras'

# Create a ModelCheckpoint callback to save the best model
checkpoint_callback = ModelCheckpoint(
    filepath=model_save_path,     # Saves to your Google Drive
    monitor='val_loss',           # Or 'val_accuracy', depending on what you prefer
    save_best_only=True,          # Only save if the metric improves
    mode='min',                   # Use 'min' if you're monitoring val_loss, 'max' if val_accuracy
    verbose=1                     # Print a message when saving
)

# Fit the model with the checkpoint callback
history = model.fit(
    train_generator,
    epochs=30,
    validation_data=val_generator,
    callbacks=[checkpoint_callback]
)

print(f"Best model will be saved (if improved) at: {model_save_path}")




Epoch 1/30


  self._warn_if_super_not_called()


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 984ms/step - accuracy: 0.4201 - loss: 1.2423
Epoch 1: val_loss improved from inf to 1.09610, saving model to /content/drive/MyDrive/sky_classification_model_best_acc.keras
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 4s/step - accuracy: 0.4231 - loss: 1.2373 - val_accuracy: 0.5652 - val_loss: 1.0961
Epoch 2/30
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 901ms/step - accuracy: 0.5621 - loss: 0.9699
Epoch 2: val_loss improved from 1.09610 to 0.92505, saving model to /content/drive/MyDrive/sky_classification_model_best_acc.keras
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 2s/step - accuracy: 0.5697 - loss: 0.9644 - val_accuracy: 0.5652 - val_loss: 0.9251
Epoch 3/30
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.5711 - loss: 0.9718   
Epoch 3: val_loss improved from 0.92505 to 0.83102, saving model to /content/drive/MyDrive/sky_classifica

In [11]:
# Evaluate the model
loss, accuracy = model.evaluate(val_generator)
print(f'Validation Accuracy: {accuracy*100:.2f}%')

# Save the model
#model.save('sky_classification_model.h5')


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step - accuracy: 0.9130 - loss: 0.3349
Validation Accuracy: 91.30%


In [None]:
# Save the model in the recommended format
# Define the path in Google Drive where you want to save the model
#model_save_path = '/content/drive/MyDrive/sky_classification_model_100percentval_acc.keras'

# Save the Keras model in TensorFlow’s native format
#model.save(model_save_path, save_format='keras')

#print(f"Model saved to: {model_save_path}")

#!ls /content/drive/MyDrive/






Model saved to: /content/drive/MyDrive/sky_classification_model_100percentval_acc.keras
 20dec06.apinene.dilution.size.dist.gsheet
 A52_H_07_Zelenyuk.pptx
 Aarav_MidColumbia_ScienceFair_Wildfires.gslides
'Aarav_presentation 1.gslides'
 Aarav_presentation_final_2024.gslides
'AMZ SVTAG shareable 190306 IOP2 tracers for Manish.gsheet'
 ARM-AMF2-Proposal
 best_model.keras
'Colab Notebooks'
'Copy of MS_response_SOA.gdoc'
'Copy of NCAT-CRC Proposal Draft #2.docx'
 dataset
 dataset_split
'FACL_Shrivastava_Wu_23367 revised.gdoc'
 Figure_AQI_1.gdraw
 Figures_v1.gdoc
 GRC_Les_Diablerets_Manish.zip
 labels.txt.gdoc
'manish est.pdf'
 ManishShrivastava_Seminar_2024.pptx
 MANISH_SHRIVESTAVA_REPAIRS.PDF
 Manuscript_v1.gdoc
 model.tflite
 MS_CAM_reviews
 mshrivastava_pic.jpeg
'New folder.zip'
'poster_RE3_Shrivastava_final (1).gslides'
 poster_RE3_Shrivastava_final.gslides
 Preetipic.jpg
'Project proposal.gdoc'
'Responses of Shrivastava et al. Rev. Geophys..gdoc'
 Ropes.zip
 Shrivastava_ASR_Plenary_202

In [12]:
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.layers import Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Load the saved model from Google Drive
model = tf.keras.models.load_model('/content/drive/MyDrive/sky_classification_model_best_acc.keras')  # or .keras


In [21]:
from PIL import Image
import numpy as np

# Upload the image to Google Colab
from google.colab import files
uploaded = files.upload()  # This will prompt you to upload an image file

# Load and preprocess the image
image = Image.open(list(uploaded.keys())[0])  # Open the uploaded image
image = image.resize((224, 224))  # Resize image to match model input size

# Convert the image to a NumPy array and normalize (scale pixel values to [0, 1])
image_array = np.array(image) / 255.0

# Add a batch dimension (because the model expects input with shape [batch_size, height, width, channels])
image_array = np.expand_dims(image_array, axis=0)

print(f"Image shape after preprocessing: {image_array.shape}")


Saving 20180819.20.jpg to 20180819.20.jpg
Image shape after preprocessing: (1, 224, 224, 3)


In [22]:

# Make predictions on the new image
predictions = model.predict(image_array)

# 'predictions' is typically a 2D array of shape (1, num_classes)
# If you have 4 classes, it should look like [[p1, p2, p3, p4]]
probabilities = predictions[0]  # Extract the 1D array of probabilities

# Print out the probability for each class
for class_idx, prob in enumerate(probabilities):
    print(f"Class {class_idx}: {prob:.4f}")

# If you still want the class with the highest probability:
predicted_class_index = np.argmax(probabilities)
print(f"Predicted Class Index: {predicted_class_index}")



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
Class 0: 0.0050
Class 1: 0.0048
Class 2: 0.6217
Class 3: 0.3686
Predicted Class Index: 2


In [None]:
from google.colab import files

# Upload labels.txt from your local machine
uploaded = files.upload()

# This will prompt you to upload the file from your local machine.
# Once uploaded, you can access the file as follows:




Saving labels.txt to labels (1).txt


In [None]:
# Load the labels.txt file
with open('labels.txt', 'r') as f:
    labels = f.read().splitlines()

# Check the loaded labels
print(labels)


['1', '2', '3', '4']


In [None]:
# Assuming you have a preprocessed image and model ready
predictions = model.predict(image_array)

# Get the index of the class with the highest probability
predicted_class_index = np.argmax(predictions)

# Map the predicted index to the class label
predicted_label = labels[predicted_class_index]
print(f"Predicted Label: {predicted_label}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step
Predicted Label: 2


In [None]:
!cp /content/model.tflite /content/drive/MyDrive/

cp: cannot stat '/content/model.tflite': No such file or directory
