In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import cv2
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Convo2D, MaxPooling2D, Flatten, Dense, Dropout
from sklearn.model_selection import train_test_split
from data_processing import load_images_and_labels
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns


# Path to Dataset
training_data_dir = '/Users/vaatsalyababbar/Desktop/Resume Classifier/DS'
images, labels = load_images_and_labels(training_data_dir)

# image dimensions and batch size
img_width, img_height = 150, 150
batch_size = 16

# Data Augmentation using TF
train_datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

# Load images
train_generator = train_datagen.flow_from_directory(
    training_data_dir,
    target_size=(150, 150),
    batch_size=16,
    class_mode='binary'
)

# Feature Extraction and Additional Data Augmentation
for images_batch, labels_batch in train_generator:
    for img in images_batch:
        # Histogram Equilisation
        gray_img = cv2.cvtColor(img, cv2.COLOR_RGB2BGRAY)
        equilised_img = cv2.equalizeHist(gray_img)

        # Sobel Edge Detection
        edge_x = cv2.Sobel(img, cv2.CV_64F, 1, 0, ksize=5)
        edge_y = cv2.Sobel(img, cv2.CV_64F, 0, 1, ksize=5)
        sobel_edges = cv2.addWeighted(cv2.convertScaleAbs(
            edge_x), 0.5, cv2.convertScaleAbs(edge_y), 0.5, 0)

# Train-Validation Split
train_img, val_img, train_labels, val_labels = train_test_split(
    images, labels, test_size=0.2, random_state=42, stratify=labels)

# MODEL BUILDING
model = Sequential()

# Input Layer: Convolutional Layer
model.add(Convo2D(32, (3, 3), activation='relu',
          input_shape=(img_width, img_height, 3)))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Adding more and pooling for feture extraction
model.add(Convo2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Flatten to transform
model.add(Flatten())

# Connected layers for classification
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))  # for regularisation
# because output is binary classification
model.add(Dense(1, activation='sigmoid'))

# Compile
model.compile(optimizer='adam', loss='binary.crossentropy',
              metrics=['accuracy'])

predicted_labels = model.predict(val_labels)

# Compute Confucion Matrix(CM)
conf_matrix = confusion_matrix(val_labels, predicted_labels)

# Plot CM
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', cbar=False)
plt.xlabel('PREDICTED LABELS')
plt.ylabel('TRUE LABELS')
plt.title('CONFUSION MATRIX')
plt.show()
