# The Disgusting Transmogriphier

This notebook will take your *disgusted* pictures and get them ready for our model to classify them. We will use the following steps:

1. We need to unzip the images from the `Student Uploads.zip` file.
2. We need to change them from 'JPEG' to 'JPG' format.
3. We need to resize them to 48x48 pixels.
4. We need to make them grayscale.

Let's get started!

In [None]:
# Install the Pillow library with pip install Pillow
!pip install Pillow

In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import os
import sys
import random

# Import libraries for reading images
from PIL import Image
from PIL import ImageOps
import cv2

# Import libraries for unzipping files
import zipfile

# Import libraries for plotting
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib.colors as mcolors
import matplotlib.cm as cm
import matplotlib.colorbar as colorbar

In [None]:
# Let's unzip the file

# Define the path to the file
data_path = "Student Uploads.zip"

# Define the path to the directory where the files will be extracted
extract_path = "Student Uploads"

# Check if the directory already exists
if not os.path.exists(extract_path):
    # If it does not exist, create it
    os.makedirs(extract_path)

# Unzip the file
with zipfile.ZipFile(data_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

In [None]:
# Print 9 random pictures from the extracted files

# Get the list of files in the directory
first_file_list = os.listdir(extract_path)

# Define the number of pictures to display
num_pics = 9

def image_test(file_list, image_path, num_pics):
    # Create a figure
    fig, axs = plt.subplots(3, 3, figsize=(15, 15))

    # Loop over the number of pictures to display
    for i in range(num_pics):
        # Get a random file
        file = random.choice(file_list)
        # Define the path to the file
        file_path = os.path.join(image_path, file)
        # Load the image
        img = plt.imread(file_path)
        # Get the axes
        ax = axs[i // 3, i % 3]
        # Plot the image
        ax.imshow(img)
        # Remove the axes
        ax.axis("off")

    # Display the figure
    plt.show()

# Call the function
image_test(first_file_list, extract_path, num_pics)


In [None]:
# Change the files to the JPG format

# Get the list of files in the directory
og_files = os.listdir(extract_path)

# Convert the list to a DataFrame
og_df_files = pd.DataFrame(og_files)

# Print 10 random file names
print(og_df_files.sample(10))


In [None]:
# We need to convert the files to the JPG format from any other format

# Define the path to the directory where the files will be saved
jpg_path = "Student Uploads JPG"

# Check if the directory already exists
if not os.path.exists(jpg_path):
    # If it does not exist, create it
    os.makedirs(jpg_path)

# Define the function to convert the files to the JPG format
def convert_to_jpg(file_list, image_path, jpg_path):
    # Loop over the files
    for file in file_list:
        # Define the path to the file
        file_path = os.path.join(image_path, file)
        # Load the image
        img = plt.imread(file_path)
        # Define the path to save the file
        jpg_file_path = os.path.join(jpg_path, file.split(".")[0] + ".jpg")
        # Save the image
        plt.imsave(jpg_file_path, img)

# Call the function
convert_to_jpg(og_files, extract_path, jpg_path)



In [None]:
# Print 10 random file names from the new directory
jpg_files = os.listdir(jpg_path)
jpg_df_files = pd.DataFrame(jpg_files)
print(jpg_df_files.sample(10))

In [None]:
# Print 9 random pictures from the new directory
image_test(jpg_files, jpg_path, num_pics)


In [None]:
# Lets make the images grayscale

# Define the path to the directory where the files will be saved
gray_path = "Student Uploads Gray"

# Check if the directory already exists
if not os.path.exists(gray_path):
    # If it does not exist, create it
    os.makedirs(gray_path)

# Define the function to convert the files to grayscale
def convert_to_gray(file_list, image_path, gray_path):
    # Loop over the files
    for file in file_list:
        # Define the path to the file
        file_path = os.path.join(image_path, file)
        # Load the image
        img = Image.open(file_path)
        # Lets make the images grayscale
        
# Define the path to torywhere the files will be        # Define the path to save the file
        gray_file_path = os.path.join(gray_path, file)
        # Save the image
        gray_img.save(gray_file_path)

# Call the function
convert_to_gray(jpg_files, jpg_path, gray_path)

In [None]:
# Plot 9 random grayscale images
gray_files = os.listdir(gray_path)
image_test(gray_files, gray_path, num_pics)

In [None]:
# Rotate the images by 90 degrees counter-clockwise

# Define the path to the directory where the files will be saved
rotate_path = "Student Uploads Rotated"

# Check if the directory already exists
if not os.path.exists(rotate_path):
    # If it does not exist, create it
    os.makedirs(rotate_path)

# Define the function to rotate the images
def rotate_images(file_list, image_path, rotate_path):
    # Loop over the files
    for file in file_list:
        # Define the path to the file
        file_path = os.path.join(image_path, file)
        # Load the image
        img = Image.open(file_path)
        # Rotate the images by 90 degrees counter-clockwise
        rotated_img = img.rotate(270)
        # Define the path to save the file
        rotated_file_path = os.path.join(rotate_path, file)
        # Save the image
        rotated_img.save(rotated_file_path)

# Call the function
rotate_images(gray_files, gray_path, rotate_path)


In [None]:
# Plot 9 random rotated images
rotate_files = os.listdir(rotate_path)
image_test(rotate_files, rotate_path, num_pics)

In [None]:
# Crop the black borders from the images

# Define the path to the directory where the files will be saved
crop_path = "Student Uploads Cropped"

# Check if the directory already exists
if not os.path.exists(crop_path):
    # If it does not exist, create it
    os.makedirs(crop_path)

# Define the function to look for the black borders in the images and crop them
def crop_images(file_list, image_path, crop_path):
    # Loop over the files
    for file in file_list:
        # Define the path to the file
        file_path = os.path.join(image_path, file)
        # Load the image
        img = cv2.imread(file_path)
        if img is None:
            print(f"Error reading {file_path}")
            continue

        # Convert the image to grayscale
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        # Get the dimensions of the image
        height, width = gray.shape

        # Find the first non-black pixel from the left
        left = 0
        for x in range(width):
            if np.any(gray[:, x] != 0):
                left = x
                break

        # Find the first non-black pixel from the right
        right = width
        for x in range(width - 1, -1, -1):
            if np.any(gray[:, x] != 0):
                right = x
                break

        # Crop the image
        cropped_img = img[:, left:right]

        # Define the path to save the file
        cropped_file_path = os.path.join(crop_path, file)
        # Save the image
        cv2.imwrite(cropped_file_path, cropped_img)

# Call the function
crop_images(rotate_files, rotate_path, crop_path)

In [None]:
# Plot 9 random cropped images
crop_files = os.listdir(crop_path)
image_test(crop_files, crop_path, num_pics)

In [None]:
# Resize the images to 48x48 pixels

# Define the path to the directory where the files will be saved
rescale_path = "Student Uploads Rescaled"

# Check if the directory already exists
if not os.path.exists(rescale_path):
    # If it does not exist, create it
    os.makedirs(rescale_path)

# Define the function to rescale the images
def rescale_images(file_list, image_path, rescale_path):
    # Loop over the files
    for file in file_list:
        # Define the path to the file
        file_path = os.path.join(image_path, file)
        # Load the image
        img = Image.open(file_path)
        # Resize the images to 48x48 pixels
        rescaled_img = img.resize((48, 48))
        # Define the path to save the file
        rescaled_file_path = os.path.join(rescale_path, file)
        # Save the image
        rescaled_img.save(rescaled_file_path)

# Call the function
rescale_images(crop_files, crop_path, rescale_path)

In [None]:
# Plot 9 random rescaled images
rescale_files = os.listdir(rescale_path)
image_test(rescale_files, rescale_path, num_pics)