# CNN application

## Preprocess the image: 
Convert the image into a format that can be fed into the CNN. This usually involves resizing the image to a fixed size, normalizing the pixel values, and possibly converting the image to grayscale.

In [None]:
import os

# Get the list of all files and directories in the chapters directory
all_files_and_dirs = os.listdir('chapters')

# Filter out the list for directories only
chapter_dirs = [dir for dir in all_files_and_dirs if os.path.isdir(os.path.join('chapters', dir))]

# The number of chapters is just the length of the list
num_chapters = len(chapter_dirs)

In [None]:
import numpy as np
from PIL import Image
for i in range(1, num_chapters+1):
    # Get the list of all files and directories in the chapter_1 directory
    image_files = os.listdir(f'chapters/chapter_{i}')

    # Filter out the list for files ending with .jepg (assuming all images are in .jepg format)
    image_files = [file for file in image_files if file.endswith('.jpeg')]

    # The number of images is just the length of the list
    num_images = len(image_files)

    for j in range(1, num_images):
        # Load the image
        image = Image.open(f'chapters/chapter_{i}/image_{j}.jpeg')

        # Resize the image
        image = image.resize((200, 200))  # Resize to 200x200 pixels

        # Convert the image to grayscale
        image = image.convert('L')

        # Normalize the pixel values
        image = np.array(image) / 255.0

        # Convert the numpy array back to PIL Image object
        image = Image.fromarray((image * 255).astype(np.uint8))

        # Create the directory if it doesn't exist
        os.makedirs(f'chapters_PP/chapter_{i}', exist_ok=True)

        # Save the image
        image.save(f'chapters_PP/chapter_{i}/image_{j}.jpeg')


## Define the CNN: 

The CNN will likely consist of several convolutional layers, followed by some fully connected layers. The convolutional layers are responsible for detecting local features such as edges, while the fully connected layers combine these local features to make a final decision.

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

# Define the model
model = Sequential()

# Add convolutional layers
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(200, 200, 1)))  # Input shape: 200x200 pixels, 1 channel (grayscale)
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Flatten the tensor output by the convolutional layers
model.add(Flatten())

# Add fully connected layers
model.add(Dense(128, activation='relu'))
model.add(Dense(1, activation='sigmoid'))  # Output layer: 1 node for binary classification

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

## Train the CNN: 

Feed the preprocessed image into the CNN and use backpropagation to adjust the weights of the network. The goal is to minimize the difference between the network's output and the desired output.

UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.

In [None]:
train_images = []
train_labels = []

for i in range(1, num_chapters+1):
    # Get the list of all files in the preprocessed chapter directory
    image_files = os.listdir(f'chapters_PP/chapter_{i}')

    # Filter out the list for files ending with .jpeg
    image_files = [file for file in image_files if file.endswith('.jpeg')]

    for j in range(1, len(image_files)):
        # Load the image
        image = Image.open(f'chapters_PP/chapter_{i}/image_{j}.jpeg')

        # Convert the image to numpy array and normalize it
        image = np.array(image) / 255.0

        # Add a new axis to make it compatible with the model's input shape
        image = np.expand_dims(image, axis=-1)

        # Append the image to train_images
        train_images.append(image)

        # Append the label to train_labels
        train_labels.append(i)

# Convert lists to numpy arrays
train_images = np.array(train_images)
train_labels = np.array(train_labels)

# Fit the model to the training data
model.fit(train_images, train_labels, epochs=10, batch_size=32)

## Postprocess the output:

The output of the CNN will be a set of feature maps, which need to be postprocessed to obtain the final image. This could involve thresholding the feature maps to detect edges, and then using a technique such as the Hough transform to detect squares.

