In [None]:
# Group
# Afshin Shahrestani - 54703632


In [38]:
import os
from pathlib import Path

# The required libraries are imported here

import numpy as np
% matplotlib inline
#The line above is necesary to show Matplotlib's plots inside a Jupyter Notebook
import cv2
from matplotlib import pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split

UsageError: Line magic function `%` not found.


In [43]:
# This cell contains the functions defined to read the images from the given directories, separate them into classes, and preprocess them so they can be used by thr model


def image_cropper(img: np.array, height_percentage: int = 40, width_percentage: int = 40) -> np.array:
    """
    the image is cropped from both sides of the height and width
    :param img: the images as an np.array
    :param width_percentage: the percentage of the pixels from the width of the image we would like to crop
    :param height_percentage: the percentage of the pixels from the height of the image we would like to crop
    :return: cropped image as np.array
    """
    height, width = img.shape[0], img.shape[1]
    cropped_height_amount = height // 100 * height_percentage
    cropped_width_amount = height // 100 * width_percentage
    img = img[cropped_height_amount // 2:height - cropped_height_amount // 2,
          cropped_width_amount // 2:width - cropped_width_amount // 2]
    return img


def image_scaler(img, new_height=20, new_width=20):
    """
    :param img: the images as an np.array
    :param new_height: the new height of the scaled image
    :param new_width: the new width of the scaled image
    :return: scaled image as np.array
    """
    img = cv2.resize(img, (new_height, new_width), interpolation=cv2.INTER_AREA)
    return img


def image_standardization(img, max_value=255):
    """
    changes the values of the image pixels to be between 0 and max_value
    :param max_value: max_value of the new pixels
    :param img: the images as an np.array
    :return: standardized image
    """
    img = (img - np.min(img)) / (np.max(img) - np.min(img)) * max_value
    return img


def separate_classes(img_files, name_separation_chars=3, test_flag=False, classes_dict=None):
    """
    separates the images into classes based on their file names
    :param img_files: list of image file names
    :param name_separation_chars: number of characters in the name that should get used for separation
    :return: a list of classes for image names, a dict containing the name and the corresponding class number of images
    """
    names = list(set(img_name[:name_separation_chars] for img_name in img_files))
    if not test_flag:
        classes_dict = dict(enumerate(names))
        classes_dict = {v: k for k, v in classes_dict.items()}
    classes = []
    for img_name in img_files:
        classes.append(classes_dict[img_name[:name_separation_chars]])
    return classes, classes_dict


def get_files_list(data_dir, name_separation_chars=3, test_flag = False, classes_dict = None):
    """
    gets the list of image names from the directory
    :param data_dir: path to the image files
    :return: list of image names, their classes and the dict for classes
    """
    data = os.listdir(data_dir)
    classes, classes_dict = separate_classes(data, name_separation_chars=name_separation_chars, test_flag=test_flag, classes_dict=classes_dict)
    return data, classes, classes_dict


def image_reader(img_dir):
    """
    reads image in grayscale
    :return: img as np.array
    """
    img = cv2.imread(img_dir, 0)
    return img


def get_data(path_to_training_data,
                      class_name_chars=3,
                      test_flag = False,
                      classes_dict = None,
                      standardization_flag=True,
                      standardization_max_value=255,
                      crop_flag=True,
                      crop_height_percentage=50,
                      crop_width_percentage=50,
                      scaling_flag=True,
                      scaling_height=50,
                      scaling_width=50):
    data = []
    image_names, classes, classes_dict = get_files_list(path_to_training_data, class_name_chars,test_flag=test_flag,classes_dict=classes_dict)
    for name in image_names:
        img = image_reader(str(Path(path_to_training_data) / name))
        if standardization_flag:
            img = image_standardization(img, standardization_max_value)
        if crop_flag:
            img = image_cropper(img, crop_height_percentage, crop_width_percentage)
        if scaling_flag:
            img = image_scaler(img, scaling_height, scaling_width)
        data.append(img)
    return np.array(data),classes,classes_dict

data,classes,classes_dict = get_data('../../data/Lego_dataset_1/training/')
data.shape
# img_files, classes, classes_dict = get_files_list('../../data/Lego_dataset_1/training/')
# names = set(img_name[:3] for img_name in img_files)
# print(classes)
# print(classes_dict)
# img = image_reader()
# img = image_standardization(img)
# img = image_cropper(img, 50, 50)
# img = image_scaler(img,60,60)
# print(img.shape)
# # print(img)
# plt.imshow(img, cmap='gray')
# plt.show()

(108, 50, 50)