First section: EDA

In [None]:
import matplotlib.pyplot as plt
import os

In [None]:
# EDA BAR GRAPH

def count_images_in_folders(root_dir):
    folder_counts = {}
    for folder_name in os.listdir(root_dir):
        folder_path = os.path.join(root_dir, folder_name)
        if os.path.isdir(folder_path):
            image_count = len([file for file in os.listdir(folder_path) if file.lower().endswith(('.png', '.jpg', '.jpeg'))])
            folder_counts[folder_name] = image_count
    return folder_counts

def plot_bar_graph(folder_counts):
    plt.bar(range(len(folder_counts)), list(folder_counts.values()), align='center')
    plt.xticks(range(len(folder_counts)), list(folder_counts.keys()), rotation = 45)
    plt.xlabel('Folders')
    plt.ylabel('Image Counts')
    plt.title('Image Counts in Each Set')
    for i, count in enumerate(folder_counts.values()):
        plt.text(i, count + 0.1, str(count), ha='center')
    plt.tight_layout()
    plt.show()

if __name__ == "__main__":
    root_directory = "C:/Users/speed/Desktop/DS 4002/data"
    folder_counts = count_images_in_folders(root_directory)
    print("Folder Counts:", folder_counts)
    plot_bar_graph(folder_counts)

Second section: KNN model


In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.image import imread
import matplotlib.image as mpimg
import cv2
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, f1_score
from sklearn.svm import SVC
from PIL import Image
from sklearn.preprocessing import StandardScaler

In [None]:
images_with_mask_dir = "./DATA/with_mask"
images_without_mask_dir = "./DATA/without_mask"

In [None]:
data_path = "./DATA"
categories = ["with_mask","without_mask"]
data=[]
labels=[]
for category in categories:
  file_path = os.path.join(data_path,category) #with_mask and without_mask folders path
  for img in os.listdir(file_path):
    image_path = os.path.join(file_path,img)# these are images path which is present inside with_mask and without_mask folders
    image = cv2.imread(image_path)
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)  #convert colour image to grey to reduce compputation
    resize_image = cv2.resize(gray_image,(50,50))  #resize to bring all the images to similar size
    image_array = np.array(resize_image)
    # print(image_array)
    data.append(image_array)
    if category=="with_mask":
      labels.append(1)
    elif category =="without_mask":
      labels.append(0)

print(data[0].shape)
print(len(data))
print(len(labels))

In [None]:
data=np.array(data)  #shape is (7553,50,50) we have 7553 images each of size (50,50)
num_columns = data.shape[1] * data.shape[2] 
reshaped_data = data.reshape(data.shape[0], num_columns)  #(7553,2500)
df = pd.DataFrame(reshaped_data)
df['Labels'] = labels

df.head() #first 5 rows of the dataframe

In [None]:
# Function taken from kaggle to plot images
def sample_train_test_accuracy(X_train, y_train, X_test, y_test):

    subset_sizes = np.linspace(0.001, 0.3, num=10)  # 0.1% to 10%  i.e.,
# ex: - [0.001, 0.01211111,0.02322222,0.03433333,0.04544444,0.05655556,0.06766667,0.07877778,0.08988889,0.1] 
    accuracies = []
    f1_scores=[]
    for size_percentage in subset_sizes:
        # Sample subset of training data
        X_subset,_ , y_subset, _ = train_test_split(X_train, y_train, train_size=size_percentage, random_state=42)
        #I did not take test from here becuase when train is 0.1% test will be remianing 99.9% which is very high
        #To reduce the dimensionality we are using PCA
        desired_variance = 0.95
        pca = PCA(n_components=desired_variance)
        X_train_pca = pca.fit_transform(X_subset)
        X_test_pca = pca.transform(X_test)
        # kNN classifier
        knn = KNeighborsClassifier(n_neighbors=5)
        knn.fit(X_train_pca, y_subset)
        y_pred = knn.predict(X_test_pca)
        # Calculate accuracy
        accuracy = accuracy_score(y_test, y_pred)
        accuracies.append(accuracy)
        f1 = f1_score(y_test, y_pred)
        f1_scores.append(f1)

    plt.plot(subset_sizes * 100, accuracies, marker='o', label='Accuracy')
    plt.plot(subset_sizes * 100, f1_scores, marker='o', label='F1 Score')
    plt.xlabel("Training Set Size (%)")
    plt.ylabel("Metric Score")
    plt.title("Test Metric Score vs. Training Set Size")
    plt.legend()
    plt.grid(True)
    plt.show()


In [None]:
X_train, X_test, y_train, y_test = train_test_split(df.drop('Labels', axis=1), df['Labels'], test_size=0.2, random_state=42)
sample_train_test_accuracy(X_train, y_train, X_test, y_test)

In [None]:
categories = ["with_mask","without_mask"]
data_cnn=[]
labels_cnn=[]

path_of_data = "./DATA"
for category in categories:
  file_path1 = os.path.join(path_of_data,category) #with_mask and without_mask folders path
  for img in os.listdir(file_path1):
    image_path = os.path.join(file_path1,img)# these are images path which is present inside with_mask and without_mask folders
    image = cv2.imread(image_path)
    #here i am not converting the image into Grey
    resize_image = cv2.resize(image,(128,128))  #resize to bring all the images to similar size
    image_array = np.array(resize_image)
    # print(image_array)
    data_cnn.append(image_array)
    if category=="with_mask":
      labels_cnn.append(1)
    elif category =="without_mask":
      labels_cnn.append(0)
data_cnn[0].shape

Third Section: TF and KERAS Modeling

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
import os
# Import Tenosrflow
import tensorflow as tf
from tensorflow import keras
import zipfile

In [None]:
train_mask_dir = os.path.join('./DATA/train/with_mask')
train_without_mask_dir = os.path.join('./DATA/train/without_mask')

In [None]:
train_mask = os.listdir(train_mask_dir)
train_without = os.listdir(train_without_mask_dir)

print(f'Total Mask Data: {len(train_mask)}')
print(f'Mask-present Data \n{train_mask[:10]}')
print(f'No mask Data\n{train_without[:10]}')
# Totals for each
print('Size of mask-present images array: ',len(os.listdir(train_mask_dir)))
print('Size of no-mask images array: ',len(os.listdir(train_without_mask_dir)))

In [None]:
import matplotlib.image as mping
%matplotlib inline
nrows = 4
ncols = 4
pic_index = 0

In [None]:
import os
import matplotlib.pyplot as plt

folder_path = "./DATA/train/with_mask"
files = os.listdir(folder_path)

num_printed = 0

fig, axs = plt.subplots(2, 4, figsize=(12, 6))

print('Mask Image\n\n')

for file in files:
    
    if file.endswith(".jpg") or file.endswith(".png") or file.endswith(".jpeg"):
        
        image_path = os.path.join(folder_path, file)
        
        img = plt.imread(image_path)
        row_index = num_printed // 4
        col_index = num_printed % 4
        axs[row_index, col_index].imshow(img)
        axs[row_index, col_index].axis('off')
        
        num_printed += 1
        if num_printed == 8:
            break

# Hide any empty subplots
for i in range(num_printed, 8):
    row_index = i // 4
    col_index = i % 4
    axs[row_index, col_index].axis('off')

# Adjust layout
plt.tight_layout()
plt.show()


In [None]:
import os
import matplotlib.pyplot as plt

folder_path = "./DATA/train/without_mask"
files = os.listdir(folder_path)

num_printed = 0

fig, axs = plt.subplots(2, 4, figsize=(12, 6))

print('With Out Mask Image\n\n')

for file in files:
    
    if file.endswith(".jpg") or file.endswith(".png") or file.endswith(".jpeg"):
        
        image_path = os.path.join(folder_path, file)
        
        img = plt.imread(image_path)
        row_index = num_printed // 4
        col_index = num_printed % 4
        axs[row_index, col_index].imshow(img)
        axs[row_index, col_index].axis('off')
        
        num_printed += 1
        if num_printed == 8:
            break

# Hide any empty subplots
for i in range(num_printed, 8):
    row_index = i // 4
    col_index = i % 4
    axs[row_index, col_index].axis('off')

# Adjust layout
plt.tight_layout()
plt.show()


In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(16,(3,3),activation='relu',input_shape= (300,300,3)),
    tf.keras.layers.MaxPooling2D(2,2),

    tf.keras.layers.Conv2D(32,(3,3),activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),

    tf.keras.layers.Conv2D(64,(3,3),activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),

    tf.keras.layers.Conv2D(64,(3,3),activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),

    tf.keras.layers.Conv2D(64,(3,3),activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),

    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512,activation='relu'),
    tf.keras.layers.Dense(1,activation='sigmoid')
])

model.summary()

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
train_data = ImageDataGenerator(rescale = 1 /255)
train_gernater = train_data.flow_from_directory(
    './DATA/train',
    target_size=(300,300),
    batch_size=128,
    class_mode = 'binary'
)

In [None]:
# Callback taken from kaggle code
class myCallBack(tf.keras.callbacks.Callback):
    def on_epoch(self,epoch,logs={}):
        if (logs.get('accuracy' >= 0.80)):
            print('80% accuracy reached')
            self.model.stop_traning = True
callbacks  = myCallBack()            

from tensorflow.keras.optimizers import RMSprop

model.compile(loss='binary_crossentropy',
              optimizer=RMSprop(learning_rate=0.001),
              metrics=['accuracy'])

In [None]:
history = model.fit(
    train_gernater,
    steps_per_epoch=8,
    epochs=15,
    verbose=1,
    callbacks=[callbacks]
)

acc = history.history['accuracy']
loss = history.history['loss']
epochs = range(len(acc))

In [None]:
plt.plot(epochs,acc,label='Accuracy')
plt.plot(epochs,loss)

In [None]:
import numpy as np
import os
from keras.preprocessing import image

# testing with our own images
img = image.load_img('./DATA/test/with_mask/with_mask_25.jpg', target_size=(300,300))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)

images = np.vstack([x])
# print(img)
plt.imshow(img)

classes = model.predict(images, batch_size=10)
# print(classes[0])
if classes[0] > 0.5:
    print(fn +  '\n No Mask Present')
else:
    print(fn + '\n Mask Present' )

