In [45]:
from submissions.ab_submission.object_detector import imageDataLoader, follicleClassifier, ObjectDetector
import os
import numpy as np

In [46]:
import glob
import pandas as pd
from matplotlib import pyplot as plt

train_files = glob.glob("./data/train/*.jpg")
test_files = glob.glob("./data/test/*.jpg")
train_label = pd.read_csv("./data/train/labels.csv")
test_label = pd.read_csv("./data/test/labels.csv")

In [47]:
model = ObjectDetector(ramp_mode=False)
model.load(boxPixelClassifier="./params/boxPixelClassifier_opencv.joblib", follicleClassifier="./params/follicleClassifier2.model")
model.fit(train_files, train_label)
#model.save(boxPixelClassifier="./params/boxPixelClassifier_opencv.joblib",follicleClassifier="./params/follicleClassifier2.model")

Model loaded from ./params/follicleClassifier2.model
Fitting boxPixelClassifier
Fitting follicleClassifier


https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations


<submissions.ab_submission.object_detector.ObjectDetector at 0x146f87c4550>

In [48]:
test_loader = imageDataLoader(test_files, test_label[["filename","xmin","xmax","ymin","ymax","label"]])
train_loader = imageDataLoader(train_files, train_label[["filename","xmin","xmax","ymin","ymax","label"]])

In [5]:
#y_hat = model.predict(test_files)

In [31]:
from PIL import Image, ImageDraw, ImageFont

font_size = 60
if os.name != 'nt':
    font = ImageFont.truetype("/usr/share/fonts/truetype/freefont/FreeMono.ttf", font_size)
else:
    font = ImageFont.truetype("C:/Windows/Fonts/Arial/arialbd.ttf", font_size)

def write_rectangle(image, preds, folder=None, filename=None):
    img = Image.fromarray(image)
    img_draw = ImageDraw.Draw(img)
    for pred in preds:
        x1, y1, x2, y2 = pred["bbox"]
        label = pred["class"]
        img_draw.rounded_rectangle(((x1, y1), (x2,y2)), fill=None, outline="black", width=5)
        img_draw.text((x1, y1-70), label, font=font, fill="black")

    if folder is not None and filename is not None:
        img.save(f"./data/{folder}/{filename}")
    
    return np.array(img)

In [36]:
i = 0
folder = "test_predicted"
for x in test_loader.get_samples():
    write_rectangle(x[0], y_hat[i], folder=folder, filename=test_loader.X_filenames[i])
    i += 1    

In [49]:
import os
from torch.utils.data import Dataset
import tempfile
import pickle
import csv

In [54]:
tempFolder_test = tempfile.TemporaryFile()
tempFolder_train = tempfile.TemporaryFile()

In [87]:
class folliclesDataset(Dataset):
    """folliclesDataset
    
    This class provide a dataset for follicles algorithm training.
    The aim is to perform all the data transform and augmentation at the same place.
    This class provide an iterate, either it provide data in live, either it stores them in hard drive and provide them from memory 
    """

    def __init__ (self, image_loader, data_augmentation, local_path, box_classifier = None, verbose=True):
        """Parameters
            ----------
            image_loader: object from the image loader class
            data_augmentation: boolean, if True a data augmentation is performed
            local_path: str, local path for data storage which are kept in memory as picke in the local_path folder
            box_classifier: object from the box classifier class, if None no box are generated from the classifier
            verbose: boolean, informations about current operations are displayed
        """

        # Storing the image loader
        self.image_loader = image_loader
        self.box_classifier = box_classifier

        # Storing the parameters
        if local_path is not None and os.path.exists(local_path):
            self.local_path = local_path
        else:
            raise Exception("The provided path doesn't exist.")

        self.data_augmentation = data_augmentation
        self.verbose = verbose

        # Recording metadata
        ## Contains the dataset metadata
        ## files metadata, files location
        self.metadata = []

        # Generating data
        if self.verbose:
            print("Generating data")
            self._generate_all_data()

            self._write_metadata("/".join([
                self.local_path,
                "metadata.pickle"
                ])
            )

    def _generate_all_data(self, label_ratio_threshold=0.7):
        """Function that generate and write all the data

        Parameters
        ----------
        label_ratio_threshold: threshold of percentage of box intersection for keeping it

        Output
        ------
        No output. It writes all the data.
        """

        for filename in self.image_loader.X_filenames:
            output_data = self._generate_data(filename)

            output_filenames = [
                "/".join([
                    self.local_path,
                    str(x)+".pickle"
                ]) for x in range(
                    len(self.metadata), 
                    len(self.metadata)+len(output_data)
            )]

            for data, filename in zip(output_data, output_filenames):
                if self.verbose:
                    print(f"Writting {filename}")

                output_dict = dict([(key,value) for key, value in x.items() if key not in ["data"]])
                output_dict["filename"] = filename

                # Keeping the data in the internal metadata list
                self.metadata = output_dict
                # Writting file
                with open(filename,"w") as f:
                    pickle.dump(output_dict["data"], "w")


    def _generate_data(self, filename, label_ratio_threshold=0.7):
        """Generate the data from a sample

        Parameters
        ----------
        filename: str, name of the file from which we generate the data
        label_ratio_threshold: threshold of percentage of box intersection for keeping it

        Output
        ------
        List of dict, containing :
            filename: name of the original file
            width: width of the box
            height: height of the box
            ratio: ratio h/w of the box
            bbox: xmin, ymin, xmax, ymax of the box
            data: box content
            label: label of the box 
        """

        # Getting original data and cropped data
        original_data = self.image_loader.get_sample(filename)
        original_image, original_boxes, original_labels = original_data[0], original_data[1], original_data[2]
        original_image_shape = original_data[-1]
        original_image_crop = self.image_loader.get_crop(original_image, original_boxes, data_augmentation=self.data_augmentation)

        # Getting the box
        detected_box = self.box_classifier(image_loader = self.image_loader, image_name = filename)

        # Filter boxs and get labels
        new_box_coordonates, new_box_data, new_box_labels = self._filter_box(original_image=original_image, 
                                                                            original_boxes=original_boxes, 
                                                                            original_labels=original_labels,
                                                                            detected_box=detected_box,
                                                                            label_ratio_threshold=label_ratio_threshold
                                                            )
        
        # Creating the output data
        output_dict = []
        
        ## From original data

        output_data = [
            zip(original_boxes, original_image_crop, original_labels),
            zip(new_box_coordonates, new_box_data, new_box_labels)
        ]
        output_dict += [{
            "filename":filename,
            "height":original_image_shape[0],
            "width":original_image_shape[1],
            "ratio":original_image_shape[0]/original_image_shape[1],
            "bbox":x[0],
            "data":x[1],
            "label":x[2]
        } for data in output_data for x in data]

        return output_dict
        

    def _filter_box(self, original_image, original_boxes, original_labels, detected_box, label_ratio_threshold=0.7):
        """Given a box list, return a filtered list and its labels

        Parameters
        ----------
        original_image: numpy array of size (h, w, 3) of the original image
        original_boxes: list of original box locations in format xmin, xmax, ymin, ymax
        original_labels: list integer corresponding of the labels of the original box
        detected_box: list of detected box in formay xmin, ymin, xmax, ymax
        label_ratio_threshold: threshold of percentage of box intersection for keeping it

        Output
        ------
        Tuple new_box_coordonates, new_box_data, new_box_label :
        - new_box_coordonates: list of xmin, ymin, xmax and ymax coordonates
        - new_box_data: numpy array of size (h,w) which contains the content of the box
        - new_box_label: int of the box class
        """

        # We create a reference matrix, which contains the true labels
        label_matrix = np.ones(original_image.shape[0:2])*-1
        for original_box, original_label in zip(original_boxes, original_labels):
            label_matrix[original_box[2]:original_box[3],original_box[0]:original_box[1]] = original_label

        new_box_coordonates = []
        new_box_data = []
        new_box_label = []

        for box in detected_box:
            # Create a temporary matrix for working on data
            working_matrix = label_matrix[box[1]:box[3],box[0]:box[2]]
            if np.max(working_matrix) != -1:      
                # Compute the proportion of pixels with a label
                label_ratio = (working_matrix != -1).mean()
                
                if label_ratio > label_ratio_threshold:
                    box_label = np.argmax(np.bincount(working_matrix[working_matrix != -1].astype("int8")))-1
                    box_data = original_image[box[1]:box[3], box[0]:box[2]]

                    new_box_coordonates.append(box)
                    new_box_data.append(box_data)
                    new_box_label.append(box_label)

        return new_box_coordonates, new_box_data, new_box_label

    def _write_metadata(self, path):
        """Write the metadata in a pickle file

        Parameters
        ----------
        path: str, path where to write the metadata pickle file
        """

        if self.verbose:
            print(f"Writting metadata in {path}")

        with open(path, "w") as f:
            pickle.dump(self.metadata, f)

    def __len__(self):
        return len(self.metadata)

In [88]:
test_dataset = folliclesDataset(
    test_loader,
    data_augmentation=False,
    local_path=tempFolder_test.name,
    box_classifier=model._get_box_list,
    verbose=True
)

Generating data


TypeError: str.join() takes exactly one argument (2 given)

In [86]:
%debug

> [1;32mc:\users\4078182\appdata\local\temp\ipykernel_8772\132009943.py[0m(66)[0;36m<listcomp>[1;34m()[0m

0
