# SYSTEM OF INTEREST (SOI) 
<ol><li>DATA PREPARATION</li>
    <li>LEARNING MODEL</li>
    <li>EVALUATION AND DEPLOYMENT</li></ol>

## DATA PREPARATION SUBSYSTEM

<h3><em>Data preparation used in the base paper is mentioned below:</em></h3>
<ul><li><p>combined two main and common weather conditions datasets, DAWM2020 dataset and MCWCD2018,<br> to end up with a dataset composed of 1656 image samples that are grouped into six classes for weather conditions:<br> cloudy (300 images), rainy (215 images), snowy (204 images), sandy (319 images), shine (253 images), and sunrise (365 images)</p></li>
<li><p>Image datasets are collected, combined, and labeled into six class labels by means of six folders, each of which holds the name of one weather class</p></li>
<li><p>Initially, the image-type of all images is unified to JPG image extension, and then the image-resize operation is applied <br> over all JPG images
in which the sizes of all images are converted to 3D matrices (RGB images) with image
dimensions of 224 ×224 ×3.</p></li>
<li><p>applying randomized augmentation operations on the dataset. The augmentation process configures <br>a set of preprocessing
options such as resizing, cropping, rotation, reflection, invariant distortions, and others.</p></li>
<li><p>images are shuffled randomly.</p></li></ul>

<h3><em>Extra techniques used:</em></h3>
<ul><li><p>Categorical labels are converted into numerical format for better compatibality for ML and DL algos befor feeding it into model.</p></li>
<li><p>Normalization to ensure the data distribution is well-suited for the model's initial weights and activations and helps to converge fast.</p></li>
<li><p>Data balancing, as the dataset is imbalanced.</p></li><ul>

In [None]:
import matplotlib.pyplot as plt
from torchvision import transforms
import random
import shutil
from PIL import Image
import numpy as np
import os
import cv2

In [None]:
folders = list()
path = "/kaggle/input/ds-dataset/datasets"
for folder in os.listdir(path):
    folder = os.path.join(path, str(folder))
    folders.append(os.path.join(*folder.split("/")[3:]))
print(folders)
for folder in folders:    
    os.makedirs("/kaggle/working/"+folder, exist_ok=True)
    

In [None]:
def transform_n_create(image_folder: str, output_folder:str) -> None:
    """
    This function will take in image folder path as input
    and stores the transformed images into differnt folder
    
    Here Normalization is not done because it hinders the image content
    """
    transformations = transforms.Compose([
    transforms.Resize((224, 224)), # transforming the dimensions 
    transforms.ToTensor(),
#     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # normalizing the images
        ])
    
    for folder in os.listdir(image_folder):
        print(folder)
        ifolder = os.path.join(image_folder, str(folder))
        for files in os.listdir(ifolder):
            ofiles = os.path.join(output_folder, str(folder), str(files))
            ifiles = os.path.join(ifolder, str(files))
            image = Image.open(ifiles)
            # drop images which doesn't have rgb channels 
            if len(image.getbands()) == 3:
                resized_img = transformations(image)
                resized_img = transforms.ToPILImage()(resized_img)
                resized_img.save(ofiles)

transform_n_create("/kaggle/input/ds-dataset/datasets/", "/kaggle/working/ds-dataset/datasets/")

In [None]:
def augmentation(image_folder: str, output_folder:str) -> None:
    """
    This function will take in image folder path as input
    and stores the augmented images into differnt folder
    
    Here Normalization is not done because it hinders the image content
    """
    data_transforms = transforms.Compose([
    transforms.Resize((256, 256)),  # Resize the image to 256x256
    transforms.RandomRotation(degrees=15),  # Randomly rotate the image within +/- 15 degrees
    transforms.CenterCrop(224),  # Randomly crop a 224x224 region from the resized image
    transforms.RandomHorizontalFlip(p=1.0),  # Randomly flip the image horizontally
#     transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # Adjust brightness, contrast, saturation, and hue
    transforms.ToTensor(),  # Convert the image to a PyTorch tensor
#     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize the tensor
    ])
    
    for folder in os.listdir(image_folder):
        print(folder)
        ifolder = os.path.join(image_folder, str(folder))
        for files in os.listdir(ifolder):
            ofiles = os.path.join(output_folder, str(folder), str(files))
            ifiles = os.path.join(ifolder, str(files))
            image = Image.open(ifiles)
            # drop images which doesn't have rgb channels 
            if len(image.getbands()) == 3:
                resized_img = data_transforms(image)
                resized_img = transforms.ToPILImage()(resized_img)
                temp = ofiles.split(".")
                newfile = temp[0] + "a." + temp[1]
                resized_img.save(newfile)

augmentation("/kaggle/input/ds-dataset/datasets/", "/kaggle/working/ds-dataset/datasets/")

In [None]:
# check for shape changes in dataset
img_default = cv2.imread("/kaggle/input/ds-dataset/datasets/Cloudy/cloudy116.jpg")
img_augmented = cv2.imread("/kaggle/working/ds-dataset/datasets/Cloudy/cloudy116a.jpg")

print(f"Shape of normal image {img_default.shape}")
print(f"Shape of augmented image {img_augmented.shape}")

In [None]:
# to create train,test and validation sets 
path = "/kaggle/working/ds-dataset/datasets"
for fold_type in ["train_data", "test_data", "val_data"]:
    folders = list()
    for folder in os.listdir(path):
        folder = os.path.join(path, str(folder))
        folder = folder.split("/")[3:]
        folders.append(os.path.join(folder[0], fold_type, folder[2]))
    print(folders)
    for folder in folders:    
        os.makedirs("/kaggle/working/"+folder, exist_ok=True)

In [None]:
# select random images and add it to folders train and test
train_path = "/kaggle/working/ds-dataset/train_data"
test_path = "/kaggle/working/ds-dataset/test_data"
for folder in os.listdir(path):
    foldpath = os.path.join(path, str(folder))
    comp_files = os.listdir(foldpath)
#     print(comp_files)
    filenames = random.sample(comp_files, int(0.8*len(comp_files)))
    left_files = list(set(comp_files) - set(filenames))
    
    trainpath = os.path.join(train_path, str(folder))
    testpath = os.path.join(test_path, str(folder))
    
    for file in filenames:
        srcpath = os.path.join(foldpath, file)
        shutil.copy(srcpath, trainpath)
    for file in left_files:
        srcpath = os.path.join(foldpath, file)
        shutil.copy(srcpath, testpath)
        
train_path = "/kaggle/working/ds-dataset/train_data"
val_path = "/kaggle/working/ds-dataset/val_data"
for folder in os.listdir(train_path):
    foldpath = os.path.join(train_path, str(folder))
    comp_files = os.listdir(foldpath)
#     print(comp_files)
    filenames = random.sample(comp_files, int(0.2*len(comp_files)))
    
    valpath = os.path.join(val_path, str(folder))
    
    for file in filenames:
        srcpath = os.path.join(foldpath, file)
        shutil.move(srcpath, valpath)

In [None]:
Image.open("/kaggle/input/ds-dataset/datasets/Cloudy/cloudy116.jpg")

In [None]:
Image.open("/kaggle/working/ds-dataset/datasets/Cloudy/cloudy116.jpg")

In [None]:
Image.open("/kaggle/working/ds-dataset/datasets/Cloudy/cloudy116a.jpg")

## LEARNING MODEL SUBSYSTEM

<h3><em>Input Layer:</em></h3>
<p>RGB images of dimensions 224x224x3 passed as input for CNNs.</p>

<h3><em>Processing Layer:</em></h3>
<h4><em>Transfer learning on three deep CNNs used in the base paper:</em></h4>
<ul>
    <li><p>SqueezeNet CNN</p></li>
    <li><p> ResNet-50 CNN</p></li>
    <li><p>EfficientNet-b0 CNN</p></li>
</ul>
<p>The model is
fully trained to do classification task A. The knowledge (pretrained parameters)<br> is
stored and transferred to the new model to do classification task B with fine-tuning.</p>

<h3><em>Output Layer:</em></h3>
<p>The output of SqueezeNet (1000) is fully connected with the number
of classes (6), the output of ResNet-50 (2048)<br> is fully connected with the number of
classes (6), and the output of EfficientNet-B0 (1280) is fully connected with the<br> number
of classes (6). The final output will be provided as a SoftMax probability function, and
the maximum<br> probability will be selected to represent the final classification result.
</p>
<!-- ![image.png]("https://drive.usercontent.google.com/download?id=18Lz39CM7Qp7acZe2zq0KQDPhO0Q9jKnY")
<img src="https://drive.usercontent.google.com/download?id=18Lz39CM7Qp7acZe2zq0KQDPhO0Q9jKnY"> -->

<h3><em>Changes:</em></h3>
<p>
    Training the model for 7 classes which include,
    <ul>
        <li>Cloudy
        <li>Snow
        <li>Sand
        <li>Sunrise
        <li>Fog
        <li>Shine
        <li>Rainy
    </ul>
<p>

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Model
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.applications.efficientnet import preprocess_input

In [None]:
num_classes = 7

In [None]:
# loading pre-trained efficientnetB0 model , discarding the top layers 
effi_model = EfficientNetB0(weights="imagenet", include_top=False, input_shape=(224, 224, 3))

In [None]:
# freeze the base model layers 
effi_model.trainable = False

In [None]:
# Add custom layers for classification
x = effi_model.output
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = Dense(1280, activation="relu")(x)
predictions = Dense(num_classes, activation="softmax")(x)

In [None]:
# create the model
model = Model(inputs=effi_model.input, outputs=predictions)

In [None]:
model.compile(
    optimizer='adam',
    loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy']
    )

In [None]:
# display summary 
model.summary()

<p><strong>NOTE :</strong> The tensorflow data generator will convert the categorical labels into numerical labels, so no need of explicit conversion</p>

In [None]:
# convert the files into train, test, and validation sets
train_ds = tf.keras.utils.image_dataset_from_directory(
    "/kaggle/working/ds-dataset/train_data", image_size=(224, 224),
    batch_size=32
    )
test_ds = tf.keras.utils.image_dataset_from_directory(
    "/kaggle/working/ds-dataset/test_data", image_size=(224, 224),
    batch_size=32
    )
val_ds = tf.keras.utils.image_dataset_from_directory(
    "/kaggle/working/ds-dataset/val_data", image_size=(224, 224),
    batch_size=32
    )

In [None]:
model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=30
)

In [None]:
loss, accuracy = model.evaluate(test_ds)

print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")