<a href="https://colab.research.google.com/github/Waqasii/WeatherClassificationSystem/blob/main/Weather%20Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
## Install data split library

!pip install split-folders

In [None]:
## Import required packages

import json
from matplotlib import pyplot as plt
from keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.callbacks import ReduceLROnPlateau
from keras.preprocessing.image import ImageDataGenerator
from tensorflow import keras
import tensorflow as tf
import numpy as np
import os
import shutil
import PIL
import PIL.Image as PI
import splitfolders
import shutil
# setting seed and clearing session
tf.keras.backend.clear_session()# clear session to save model space
# setting seed to keep reproducibililty
tf.random.set_seed(1)
np.random.seed(1)

In [None]:
# Download and unzip data
!wget "https://md-datasets-cache-zipfiles-prod.s3.eu-west-1.amazonaws.com/4drtyfjtfy-1.zip";
!unzip "4drtyfjtfy-1.zip";
!unzip "dataset2.zip"

#Check all image sizes and store in list
l=[]
for img in os.listdir('dataset2'):
    image=PI.open("dataset2/"+img)
    l.append(image.size)
    

In [None]:
## Make folders for all the available classes
class_names=["cloudy","rain","shine","sunrise"]
for val in class_names:
    os.makedirs("data/{}".format(val),exist_ok=True)

## Copy the images to their respective folder

for img in os.listdir('dataset2'):
    for label in class_names:
        if label in img:
            shutil.copy('./dataset2/'+img,'./data/{}'.format(label))        

#### Defining Variables

In [None]:
img_height = 128
img_width = 128
dataset_size=len(os.listdir('dataset2'))
train_size=.7
validation_size=.2
test_size=.1
split_seed=1  #to ensure same split for data always

In [None]:
## Class distribution
data_count={val:len(os.listdir('/content/data/{}'.format(val))) for val in os.listdir('data')}
print(data_count,'\n')

plt.bar(list(data_count.keys()), data_count.values(),)
plt.title("Data Distribution of Image",)
plt.ylabel("No. of images")
plt.show()

In [None]:
## Split Data in train, Validation and test data

splitfolders.ratio("data", output="output",
    seed=split_seed, ratio=(train_size,validation_size,test_size), group_prefix=None, move=False,)

In [None]:
## Classwise distrubution of splitted dataset

for val in os.listdir('output'):
    for typ in os.listdir('output/'+val):
        print(val,"->",typ,"->",len(os.listdir('/content/output/{}/{}'.format(val,typ))))

In [None]:
## To save split data
# !zip -r -v  'split_data.zip' data/

In [None]:
## Convert image and apply augmentation

train_datagen = ImageDataGenerator(
      rescale=1./255,
      rotation_range=40,
      width_shift_range=0.5, # Shifting image width by 40%
      height_shift_range=0.2,# Shifting image height by 40%
      shear_range=0.2,       # Rotation across X-axis by 20%
      zoom_range=0.4,        # Image zooming by 30%
      horizontal_flip=True,
      
      fill_mode='nearest')

train_generator = train_datagen.flow_from_directory(
    "/content/output/train",
    target_size = (128, 128),
    shuffle=True,
    seed=1,
    class_mode = 'categorical',
    batch_size = 8)


datagen_test = ImageDataGenerator(rescale=1./255,
                                  
    )


validation_generator=datagen_test.flow_from_directory(
    "/content/output/val",
    target_size = (128, 128),
    class_mode = 'categorical',
    seed=1,
    shuffle=True,
    batch_size = 8)

test_generator = datagen_test.flow_from_directory(
        "/content/output/test",
         target_size = (128, 128),
        batch_size=8,
        shuffle=True,
        seed=1,
        class_mode='categorical',)


##LABELS dict for datagen

In [None]:
labels = (train_generator.class_indices)
labels = dict((v , k) for k , v in labels.items())
print(labels)

In [None]:
#########################################
# optimizer: 'adam','rmsprop'
# monitor: what parameter to monitor for earlystopping ( loss,val_loss,accuracy)
# mode: min or max for monitor
# epochs: total no of epochs to try
# batch_size: batch size
# wait: how long to wait before no change in monitored parameter 
# c: counter to given numbered name to saved files
#########################################



def train_model(optimizer,monitor,mode,epochs,batch_size,c,wait=20,activation='relu',metrics = ['accuracy']):
    ## Defining model architecture

    model = tf.keras.models.Sequential([
        
        tf.keras.layers.Conv2D(32, (3,3), activation=activation, input_shape=(128, 128, 3)),
        tf.keras.layers.MaxPooling2D(2, 2),
        
        tf.keras.layers.Conv2D(64, (3,3), activation=activation),
        tf.keras.layers.MaxPooling2D(2,2),
        tf.keras.layers.Dropout(0.4,seed=1),
        
        tf.keras.layers.Conv2D(128, (3,3), activation=activation),
        tf.keras.layers.MaxPooling2D(2,2),
        tf.keras.layers.Dropout(0.5,seed=1),
        
        tf.keras.layers.Conv2D(128, (3,3), activation=activation),
        tf.keras.layers.MaxPooling2D(2,2),
        tf.keras.layers.Dropout(0.4,seed=1),
        tf.keras.layers.Flatten(),
        
        tf.keras.layers.Dense(128, activation=activation),
        
        tf.keras.layers.Dense(4, activation='softmax')
    ])

    model.compile(loss = 'categorical_crossentropy',
                optimizer =optimizer ,
                metrics = metrics,)
    filepath="model_weight{}.hdf5".format(c)
    reduce_lr = ReduceLROnPlateau(monitor=monitor, factor=0.2,
                              patience=5, min_lr=0.00001,)
    checkpoint = ModelCheckpoint(filepath, monitor=monitor, verbose=1, save_best_only=True, mode=mode,)
    es = EarlyStopping(monitor=monitor,mode=mode,patience=wait,restore_best_weights=True)
    callbacks_list = [checkpoint]
    history = model.fit(
        train_generator,
        batch_size=batch_size,  
        epochs = epochs,
        callbacks=[callbacks_list,reduce_lr],
        verbose = 2,
        validation_data=validation_generator)
    
    ##SAVE MODEL ARCHITECTURE AND WEIGHT
    
    with open('history{}.json'.format(c), 'w') as f:
        json.dump(history.history, f)
    model.save("model{}.h5".format(c))  
    print("Model Trained and Saved")  
    os.makedirs('/content/drive/MyDrive/ML Project 1/{}'.format(c),exist_ok=True) 
    shutil.copy("model{}.h5".format(c) ,"/content/drive/MyDrive/ML Project 1/{}".format(c))
    shutil.copy("model_weight{}.hdf5".format(c), '/content/drive/MyDrive/ML Project 1/{}'.format(c))
    shutil.copy('history{}.json'.format(c), '/content/drive/MyDrive/ML Project 1/{}'.format(c))
    

    ##SAVE PLOTS OF ACCURACY AND LOSS
    fig =plt.figure(figsize=(20, 20))
    plt.subplot(2, 2, 1)
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']
    loss = history.history['loss']
    val_loss = history.history['val_loss']

    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='upper left')


    plt.subplot(2, 2, 2)
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['loss', 'val_loss'], loc='upper left')
    plt.show()
    fig.savefig('/content/drive/MyDrive/ML Project 1/{}/plot.jpg'.format(c))

    score = model.evaluate(test_generator,verbose=1,)

    print('\n', 'Test accuracy:', score[1]*100,"%")

    return history

## All models performance 

In [None]:
batch_size=64
from keras.models import load_model
import os 
x='/content/drive/MyDrive/ML Project 1/'
d={}
for val in os.listdir(x):
    path_to_model = "{}/model{}.h5".format((x+val),val)
    model = load_model(path_to_model)
    Y_pred_res = model.predict_generator(test_generator, test_generator.n // (batch_size+1),)
    y_pred_res = np.argmax(Y_pred_res, axis=1)
    # y_pred_res
    from sklearn.metrics import confusion_matrix, accuracy_score, ConfusionMatrixDisplay,classification_report
    class_names = ['Cloudy', 'Rain', 'Shine', 'Sunrise']
    n = 14

    image_batch, classes_batch = next(test_generator)

    for batch in range(n):
        temp = next(test_generator)
        image_batch = np.concatenate((image_batch, temp[0]))
        classes_batch = np.concatenate((classes_batch, temp[1]))

    classes_batch = classes_batch
    y_predict = model.predict(image_batch)

    ConfusionMatrixDisplay.from_predictions(
        y_true = [np.argmax(x) for x in classes_batch],
        #y_true = classes_batch.
        y_pred = [np.argmax(x) for x in y_predict],
        #y_pred = y_predict,
        display_labels=class_names,
        cmap='Blues'
    )
    y_true = [np.argmax(x) for x in classes_batch]
        #y_true = classes_batch.
    y_pred = [np.argmax(x) for x in y_predict]
    clf=classification_report( y_true,y_pred,target_names=class_names)
    # plt.savefig('./confusion_matrix.png')                                       
    plt.show()
    print(val)
    print("\n\n")
    print(clf)
    score = model.evaluate(test_generator,verbose=1,)
    print('\n', 'Test accuracy:', score[1]*100,"%")
    d[val]=score[1]*100
    print("\n\n **************"*10)


# All plots

In [None]:
import os 
x='/content/drive/MyDrive/ML Project 1/'
import json
import pandas as pd
# f=pd.read_json('/content/drive/MyDrive/ML Project 1/1/history1.json',).reset_index(drop=True)
for val in os.listdir(x):
    # path_to_model = "{}/model{}.h5".format((x+val),val)
    try:
        print("***"*14,val,"\n\n")
            
        f=pd.read_json(x+val+"/history{}.json".format(val))
        ##SAVE PLOTS OF ACCURACY AND LOSS
        fig =plt.figure(figsize=(8, 6))
        plt.subplot(2, 2, 1)
        acc = history['accuracy']
        val_acc = history['val_accuracy']
        loss = history['loss']
        val_loss = history['val_loss']

        plt.plot(history['accuracy'])
        plt.plot(history['val_accuracy'])
        plt.title('model accuracy')
        plt.ylabel('accuracy')
        plt.xlabel('epoch')
        plt.legend(['train', 'val'], loc='upper left')


        plt.subplot(2, 2, 2)
        plt.plot(history['loss'])
        plt.plot(history['val_loss'])
        plt.title('model loss')
        plt.ylabel('loss')
        plt.xlabel('epoch')
        plt.legend(['loss', 'val_loss'], loc='upper left')
        plt.show()
        
    except:
        print("NOT Parsed","XXX"*27, val)    
        

## Confidence interval Mc Nemar Test


                            Classifier2 Correct,	Classifier2 Incorrect

---


    Classifier1 Correct 	Yes/Yes					Yes/No 
    Classifier1 Incorrect 	No/Yes 					No/No

In [None]:
# 
from statsmodels.stats.contingency_tables import mcnemar
table = [[106, 5],
		 [3, 1]]

result = mcnemar(table, correction=True,)

print('statistic=%.3f, p-value=%.3f' % (result.statistic, result.pvalue))

alpha = 0.05
if result.pvalue > alpha:
	print('Same proportions of errors (fail to reject H0)')
else:
	print('Different proportions of errors (reject H0)')

In [None]:
## Some example runs

# tf.keras.backend.clear_session()
# train_model(adam2,'loss','min',500,32,12,15)
# tf.keras.backend.clear_session()
# train_model(adam2,'loss','min',500,32,13,15)
# tf.keras.backend.clear_session()
# train_model(adam3,'loss','min',500,32,14,15)

## Test Images

In [None]:
model2 = keras.models.load_model('/content/drive/MyDrive/ML Project 1/good.h5')

In [None]:
def test_images(model, num_images = None):  
    image_batch, classes_batch = next(test_generator)
    predicted_batch = model.predict(image_batch)
    for k in range(0,image_batch.shape[0] if num_images is None else num_images):
        image = image_batch[k]
        real_class = class_names[np.argmax(classes_batch[k],axis=-1)]
        predicted_class = class_names[np.argmax(predicted_batch[k],axis=-1)]
        value_predicted = predicted_batch[k]
        isTrue = (real_class == predicted_class)
        plt.figure(k,figsize=(6,4),)
        plt.title("Prediction - " + str("Correct\n" if isTrue else "Wrong")
         +'\nActual_Class: ' + real_class + '\nPredicted_class: ' +
          predicted_class +"\nScore: "+ str(np.max(value_predicted)*100)+"%\n")
        plt.axis('off')
        # plt.savefig('./' + real_class + '_' + predicted_class + '_' + str(value_predicted) + '.png')
        plt.imshow(image)



In [None]:
batch_size=64
Y_pred_res = model2.predict_generator(test_generator, test_generator.n // (batch_size+1),)
y_pred_res = np.argmax(Y_pred_res, axis=1)
y_pred_res

In [None]:
##https://www.kaggle.com/code/lomitofrito/punto4-parcial-emergentes/notebook

from sklearn.metrics import confusion_matrix, accuracy_score, ConfusionMatrixDisplay,classification_report
class_names = ['Cloudy', 'Rain', 'Shine', 'Sunrise']
n = 14

image_batch, classes_batch = next(test_generator)

for batch in range(n):
    temp = next(test_generator)
    image_batch = np.concatenate((image_batch, temp[0]))
    classes_batch = np.concatenate((classes_batch, temp[1]))

classes_batch = classes_batch
y_predict = model2.predict(image_batch)

ConfusionMatrixDisplay.from_predictions(
    y_true = [np.argmax(x) for x in classes_batch],
    #y_true = classes_batch.
    y_pred = [np.argmax(x) for x in y_predict],
    #y_pred = y_predict,
    display_labels=class_names,
    cmap='Blues'
)
y_true = [np.argmax(x) for x in classes_batch]
    #y_true = classes_batch.
y_pred = [np.argmax(x) for x in y_predict]
clf=classification_report( y_true,y_pred,target_names=class_names)
# plt.savefig('./confusion_matrix.png')                                       
plt.show()
print("\n\n")
print(clf)


In [None]:

print('\n', 'Test accuracy:', score[1]*100,"%")
