In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cv2
import os    
import random
import shutil
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
import csv

    
def show_points(coords, labels, ax, marker_size=375):
    pos_points = coords[labels==1]
    neg_points = coords[labels==0]
    ax.scatter(pos_points[:, 0], pos_points[:, 1], color='green', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)
    ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)   

# Flip image and landmark horizontally
def symmetric_horizontal(image, landmarks):
    image = image.transpose(Image.FLIP_LEFT_RIGHT)
    x_m = image.size[0] / 2
    landmarks[:,0] = 2 * (x_m - landmarks[:,0]) + landmarks[:,0]    
    return image, landmarks
# Flip image and landmark vertically
def symmetric_vertical(image, landmarks):
    image = image.transpose(Image.FLIP_TOP_BOTTOM)
    y_m = image.size[1] / 2
    landmarks[:,1] = 2 * (y_m - landmarks[:,1]) + landmarks[:,1]    
    return image, landmarks
    

In [4]:
from PIL import Image
def dev_train_val_test(train_p, val_p, test_p):
    """
    This function is used to process the original dataset.
    The argument train_p, val_p, test_p are the proportion of the training set, validation set and test set respectively.
    In the original data set, some images do not have corresponding landmark labels, we need to clean up these images first.
    Then, we need to divide the data set into training set, validation set and test set.
    We check each image and its corresponding landmark labels, if the 10th landmark label is not in the upper left corner, corresponding symmertric transformation is performed.
    Finally, we save the image path, image name and landmark labels in the csv file, and images are copied to the corresponding folder.

    """
    image_path = "C:\\Users\\14552\\Desktop\\project\\Heliconius_forewing_band-master\\images"
    landmark_path = "C:\\Users\\14552\\Desktop\\project\\Heliconius_forewing_band-master\\landmarks"
    train_path = "C:\\Users\\14552\\Desktop\\project\\Heliconius_forewing_band-master\\dataset_wash\\train"
    val_path = "C:\\Users\\14552\\Desktop\\project\\Heliconius_forewing_band-master\\dataset_wash\\val"
    test_path = "C:\\Users\\14552\\Desktop\\project\\Heliconius_forewing_band-master\\dataset_wash\\test"
    imagefolder_list = os.listdir(image_path)
    landmarkfolder_list = os.listdir(landmark_path)
    folder = set(imagefolder_list) & set(landmarkfolder_list)
    images = []
    landmarks = []
    names = []
    train_set = []
    val_set = []
    test_set = []
    img_landmark_dict = {}
    counter = 0
    for i in folder:
        i_path = os.path.join(image_path, i)
        l_path = os.path.join(landmark_path, i)
        i_list = os.listdir(i_path)
        l_list = os.listdir(l_path)
        for image in i_list:
            for landmark in l_list:
                image_copy = image
                img_name = image_copy.split('.')[0]
                if img_name in landmark and image[-4:] != ".ini" and image[-4:] != ".txt":
                    counter += 1
                    images.append(os.path.join(i_path, image))
                    landmarks.append(os.path.join(l_path, landmark))
                    names.append(img_name)
                    img_landmark = np.loadtxt(os.path.join(l_path, landmark), delimiter="\t", usecols=(0,1), skiprows=0)
                    img_landmark_dict[os.path.join(i_path, image)] = img_landmark.tolist()
    # with open("test.csv", "w", newline='') as f:
    #     writer = csv.writer(f)
    #     writer.writerow(["image_name", "landmarks"])
    #     for key, value in img_landmark_dict.items():
    #         writer.writerow([key, value])
    data_nums = len(img_landmark_dict)
    valid_imgs = list(img_landmark_dict.keys())
    offset_0 = int(data_nums * train_p)
    offset_1 = int(data_nums * (train_p + val_p))
    offset_2 = int(data_nums * (train_p + val_p + test_p))
    random.seed(2023)
    random.shuffle(valid_imgs)
    print(len(valid_imgs))
    
    train_set = valid_imgs[:offset_0]
    val_set = valid_imgs[offset_0:offset_1]
    test_set = valid_imgs[offset_1:offset_2]

    with open(os.path.join(train_path, "train.csv"), "w", newline='') as f:
        writer = csv.writer(f)
        writer.writerow(["image_path", "image_name", "landmarks"])
        for i in range(len(train_set)):
            image = Image.open(train_set[i])
            width, height = image.size
            landmark = np.array(img_landmark_dict[train_set[i]])
            if landmark[10][0] > width / 2:
                if landmark[10][1] > height / 2:
                    image, landmark = symmetric_horizontal(image, landmark)
                    image, landmark = symmetric_vertical(image, landmark)
                else:
                    image, landmark = symmetric_horizontal(image, landmark)
            elif landmark[10][1] > height / 2:
                image, landmark = symmetric_vertical(image, landmark)

            image.save(os.path.join(train_path, train_set[i].split("\\")[-1]))
            writer.writerow([train_set[i], train_set[i].split("\\")[-1], landmark.tolist()])
    f.close()

    with open(os.path.join(val_path, "val.csv"), "w", newline='') as f:
        writer = csv.writer(f)
        writer.writerow(["image_path", "image_name", "landmarks"])
        for i in range(len(val_set)):
            image = Image.open(val_set[i])
            width, height = image.size
            landmark = np.array(img_landmark_dict[val_set[i]])
            if landmark[10][0] > width / 2:
                if landmark[10][1] > height / 2:
                    image, landmark = symmetric_horizontal(image, landmark)
                    image, landmark = symmetric_vertical(image, landmark)
                else:
                    image, landmark = symmetric_horizontal(image, landmark)
            elif landmark[10][1] > height / 2:
                image, landmark = symmetric_vertical(image, landmark)

            image.save(os.path.join(val_path, val_set[i].split("\\")[-1]))
            writer.writerow([val_set[i], val_set[i].split("\\")[-1], landmark.tolist()])
    f.close()
    
    with open(os.path.join(test_path, "test.csv"), "w", newline='') as f:
        writer = csv.writer(f)
        writer.writerow(["image_path", "image_name", "landmarks"])
        for i in range(len(test_set)):
            image = Image.open(test_set[i])
            width, height = image.size
            landmark = np.array(img_landmark_dict[test_set[i]])
            if landmark[10][0] > width / 2:
                if landmark[10][1] > height / 2:
                    image, landmark = symmetric_horizontal(image, landmark)
                    image, landmark = symmetric_vertical(image, landmark)
                else:
                    image, landmark = symmetric_horizontal(image, landmark)
            elif landmark[10][1] > height / 2:
                image, landmark = symmetric_vertical(image, landmark)

            image.save(os.path.join(test_path, test_set[i].split("\\")[-1]))
            writer.writerow([test_set[i], test_set[i].split("\\")[-1], landmark.tolist()])
    f.close()


dev_train_val_test(0.7, 0.1, 0.2)

249
