In [1]:
from random import randint, uniform
from enum import IntEnum
from functools import partial
from math import ceil
from copy import deepcopy, copy

import os
import cv2
import numpy as np
import pandas as pd

In [2]:
TRAINING_PATH = "train" # PLACE YOURSELF WHERE THE TRAIN FOLDER IS
annotation_file = "train.anno.txt"
train_file = "train.txt"
validation_file = "validation.txt"
train_folder = "train_split"
validation_folder = "validation_split"

In [3]:
class Label(IntEnum):
    INDOOR = 0
    OUTDOOR = 1
    PERSON = 2
    DAY = 3
    NIGHT = 4
    WATER = 5
    ROAD = 6
    VEGETATION = 7
    TREE = 8
    MOUNTAINS = 9
    BEACH = 10
    BUILDINGS = 11
    SKY = 12
    SUNNY = 13
    PARTLY_CLOUDY = 14
    OVERCAST = 15
    ANIMAL = 16 
    
    def __repr__(self):
        return self.name  

In [4]:
def fill_proportions(prop_dict, sliced_label):
    for key, label in zip(prop_dict.keys(), sliced_label):
        prop_dict[key] += label

def compute_dataset_stats():
    proportions = {}
    proportions[Label.OUTDOOR] = {label: 0 for label in Label if label not in [Label.INDOOR, Label.OUTDOOR]}
    proportions[Label.INDOOR] = deepcopy(proportions[Label.OUTDOOR])
    
    
    total = 0
    outliers = 0
    total_indoor=0
    total_outdoor=0
    df = pd.read_csv(annotation_file,sep=" ", names= ["Img_Name","indoor","outdoor","person","day",
                                             "night", "water","road or pathway","vegetation",
                                             "tree","mountains","beach","buildings","sky","sunny",
                                             "partly cloudy","overcast","animal"]) 
    for index, row in df.iterrows():
        try:
            img_label = row.values.tolist()[1:]
            
            if (img_label[Label.INDOOR] == img_label[Label.OUTDOOR]):
                outliers += 1    
            img_name = row["Img_Name"].strip()
            path = os.path.join(TRAINING_PATH, img_name)
            img = cv2.imread(path, cv2.IMREAD_COLOR)
            if img is None:
                print(f"could not find {img_name}")
        except Exception as e:
            print(f"{img_name} is not valid {str(e)}")
            continue

        total += 1

        
        if img_label[Label.INDOOR] and not img_label[Label.OUTDOOR] :
            fill_proportions(proportions[Label.INDOOR], img_label[Label.PERSON:])
            total_indoor+=1
        elif img_label[Label.OUTDOOR] and not img_label[Label.INDOOR]:
            total_outdoor+=1
            fill_proportions(proportions[Label.OUTDOOR], img_label[Label.PERSON:])
            
    print(f"Total images: {total}") 
    print(f"Tolal outliers images: {outliers}\n")
    return proportions, total_indoor, total_outdoor 
    

proportions, total_indoor, total_outdoor = compute_dataset_stats()

# with open("Initial_stats.txt", "a") as f:
#         f.write(f"total indoor class: {total_indoor} samples: {proportions[Label.INDOOR]}\n total outdoor class: {total_outdoor} samples: {proportions[Label.OUTDOOR]}\n")

print(f"total indoor class: {total_indoor} samples: {proportions[Label.INDOOR]}\n")
print(f"total outdoor class: {total_outdoor} samples: {proportions[Label.OUTDOOR]}\n")

could not find 29-29092.jpg
could not find 29-29094.jpg
could not find 29-29130.jpg
Total images: 5088
Tolal outliers images: 40

total indoor class: 2159 samples: {PERSON: 1443, DAY: 933, NIGHT: 33, WATER: 28, ROAD: 0, VEGETATION: 89, TREE: 3, MOUNTAINS: 0, BEACH: 0, BUILDINGS: 0, SKY: 29, SUNNY: 0, PARTLY_CLOUDY: 0, OVERCAST: 1, ANIMAL: 89}

total outdoor class: 2889 samples: {PERSON: 1255, DAY: 1443, NIGHT: 530, WATER: 633, ROAD: 473, VEGETATION: 1443, TREE: 693, MOUNTAINS: 473, BEACH: 133, BUILDINGS: 1270, SKY: 1443, SUNNY: 248, PARTLY_CLOUDY: 520, OVERCAST: 385, ANIMAL: 228}



In [5]:
# Now we define the transformations to use

In [6]:
def rotate(image, angle=25, scale=1.0):
        w = image.shape[1]
        h = image.shape[0]
        M = cv2.getRotationMatrix2D((w/2,h/2), angle, scale)
        image = cv2.warpAffine(image,M,(w,h))
        return image
    
def scale(img, scale=1.0):
    return rotate(img, angle=0, scale=scale)
    
def flip(image, vflip=False, hflip=False):
    if hflip or vflip:
        if hflip and vflip:
            c = -1
        else:
            c = 0 if vflip else 1
        image = cv2.flip(image, flipCode=c)
    return image

def gaussian_blur(img, size=3, sigma=0):
    blurred = cv2.GaussianBlur(img,(size,size),sigma)
    return blurred

def add_salt_pepper(img, salt_pepper=0.5, amount=0.004):
    salt_pepper = uniform(0.3, 0.8)
    amount = uniform(0.004, 0.01)
    noisy = copy(img)
    salt_count = np.ceil(amount * img.size * salt_pepper)
    locations = [np.random.randint(0, i-1, int(salt_count)) for i in img.shape]
    noisy[locations] = 255.0

    pepper_count = np.ceil(amount * img.size * (1 - salt_pepper))
    locations = [np.random.randint(0, i-1, int(pepper_count)) for i in img.shape]
    noisy[locations] = 0
    return noisy

def randomize_contrast(img):
    alpha = uniform(1.5, 2) # Contrast control (1.0-3.0)
    beta = uniform(40,50) # Brightness control (0-100)
    return cv2.convertScaleAbs(img, alpha=alpha, beta=beta)

def affine_transform(img):
    rows,cols,ch = img.shape
    pts1 = np.float32([[randint(0,50),randint(0,50)],[randint(180,200), randint(0,50)],[randint(0,50),randint(180,200)]])
    pts2 = np.float32([[randint(0,25),randint(80,100)],[randint(180,200),randint(0,50)],[randint(0,20),randint(220,250)]])

    M = cv2.getAffineTransform(pts1,pts2)
    dst = cv2.warpAffine(img,M,(cols,rows))
    return dst

def save_training_sample(image, img_path, image_name, imge_label, annotation_file):
    cv2.imwrite(img_path+ "/" +image_name+'.jpg',image)
    with open(annotation_file, "a") as f:
        f.write(image_name+'.jpg '+  ' '.join(map(str, imge_label))+ "\n")

In [7]:
# we choose to perform the following transformations
transforms = [partial(scale, scale=0.75), partial(scale, scale=1.25),
              affine_transform,  partial(rotate, angle=25), partial(rotate, angle=50), 
              partial(gaussian_blur,size=5), partial(gaussian_blur,size=7), add_salt_pepper,
              add_salt_pepper, randomize_contrast, randomize_contrast, affine_transform, affine_transform]

In [None]:
# This is responsible of generating the data, uncomment the saves to write data
# if testing numbers just keep the saves commented so it does not mess up the dataset
img_indexer = 1


df = pd.read_csv(annotation_file,sep=" ", names= ["Img_Name","indoor","outdoor","person","day",
                                             "night", "water","road or pathway","vegetation",
                                             "tree","mountains","beach","buildings","sky","sunny",
                                              "partly cloudy","overcast","animal"]) 
thresh = 0
for proportion_type in proportions.keys():
    proportion = proportions[proportion_type]
    proportion_dict = {k: v for k,v in proportion.items() if k not in [Label.INDOOR, Label.OUTDOOR]}
    proportion_dict = {k: v for k, v in sorted(proportion.items(), key=lambda item: item[1])}
    max_label_val = proportion_dict[list(proportion_dict.keys())[-1]]
    print(proportion_type)
    
    black_list = []
    if max_label_val > thresh:
        black_list = [list(proportion_dict.keys())[-1]]
    thresh = max(thresh, max_label_val)

    for index, row in df.iterrows():
        img_label = row.values.tolist()[1:]
        if img_label[Label.INDOOR] == img_label[Label.OUTDOOR]:
            continue

        img_name = row["Img_Name"].strip()
        path = os.path.join(TRAINING_PATH, img_name)
        img = cv2.imread(path, cv2.IMREAD_COLOR)
        if img is None:
            print(f"{img_name} could not be read. Proceeding to next.")
            continue

        if img_label[proportion_type]:
            labels = [label for label in Label if img_label[label] and label > Label.OUTDOOR]
            if not labels:
                continue
            max_label = max([proportion_dict[label] for label in labels])
            max_label_key = list(proportion_dict.keys())[list(proportion_dict.values()).index(max_label)]

            if max_label_key in black_list:
                continue

            print(f"black list {black_list}, max_label_key {max_label_key}, labels {labels} ")

            flipped = flip(img, hflip=True)
            #save_training_sample(flipped, TRAINING_PATH,f"generated{img_indexer}", img_label, annotation_file)
            max_label += 1
            img_indexer += 1
            transform_count = 1

            for im in [img, flipped]:
                for transform in transforms:
                    if max_label >= thresh:
                        break
                    transformed = transform(im)
                    #save_training_sample(transformed, TRAINING_PATH, f"generated{img_indexer}", img_label, annotation_file)

                    max_label += 1
                    transform_count += 1
                    img_indexer += 1

            if max_label >= thresh:
                black_list.append(max_label_key)

            for label in labels:
                proportion_dict[label] += transform_count 

            proportions[proportion_type] = {k: v for k, v in sorted(proportion_dict.items(), key=lambda item: item[1])}

print(f"indoor class: {proportions[Label.INDOOR]}\n")
print(f"outdoor class: {proportions[Label.OUTDOOR]}\n")

Label.OUTDOOR
black list [SKY], max_label_key 3, labels [NIGHT, VEGETATION, TREE, MOUNTAINS, SKY] 
black list [SKY, DAY], max_label_key 7, labels [DAY, MOUNTAINS, SKY, PARTLY_CLOUDY] 
black list [SKY, DAY, VEGETATION], max_label_key 2, labels [PERSON, WATER] 
black list [SKY, DAY, VEGETATION], max_label_key 2, labels [PERSON, NIGHT, BUILDINGS] 




black list [SKY, DAY, VEGETATION], max_label_key 5, labels [WATER, MOUNTAINS] 
black list [SKY, DAY, VEGETATION], max_label_key 2, labels [PERSON, MOUNTAINS] 
29-29092.jpg could not be read. Proceeding to next.
29-29094.jpg could not be read. Proceeding to next.
29-29130.jpg could not be read. Proceeding to next.
black list [SKY, DAY, VEGETATION], max_label_key 16, labels [ANIMAL] 
black list [SKY, DAY, VEGETATION], max_label_key 2, labels [PERSON] 
black list [SKY, DAY, VEGETATION], max_label_key 11, labels [ROAD, BUILDINGS] 
black list [SKY, DAY, VEGETATION], max_label_key 11, labels [BUILDINGS] 
black list [SKY, DAY, VEGETATION], max_label_key 5, labels [WATER, ANIMAL] 
black list [SKY, DAY, VEGETATION], max_label_key 16, labels [ANIMAL] 
black list [SKY, DAY, VEGETATION], max_label_key 16, labels [BEACH, ANIMAL] 
black list [SKY, DAY, VEGETATION], max_label_key 11, labels [BUILDINGS] 
black list [SKY, DAY, VEGETATION], max_label_key 2, labels [PERSON, NIGHT] 
black list [SKY, DAY, 

In [None]:
# compute stats to check how the dataset has been augmented
proportions, total_indoor, total_outdoor = compute_dataset_stats()

#with open("Augmented_stats.txt", "a") as f:
#        f.write(f"total indoor class: {total_indoor} samples: {proportions[Label.INDOOR]}\n total outdoor class: {total_outdoor} samples: {proportions[Label.OUTDOOR]}\n")

print(f"total indoor class: {total_indoor} samples: {proportions[Label.INDOOR]}\n")
print(f"total outdoor class: {total_outdoor} samples: {proportions[Label.OUTDOOR]}\n")

In [None]:
# responsible for the data split
# uncomment the saves to rewite images, otherwise just DON'T

def pad_ds_imgs(total_samples, train_split=0.8):
    val_thresh = ceil(total_samples * (1-train_split))
    df = pd.read_csv(annotation_file,sep=" ",
                 names= ["Img_Name","indoor","outdoor","person","day",
                         "night", "water","road or pathway","vegetation",
                         "tree","mountains","beach","buildings","sky","sunny",
                         "partly cloudy","overcast","animal"])    
    
    total_train = 0
    total_val = 0
    total = 0
    print(f"validation should be: {val_thresh}")
    for index, row in df.iterrows():
        try:
            img_label = row.values.tolist()[1:]
            img_name = row["Img_Name"].strip()
                                       
            path = os.path.join(TRAINING_PATH, img_name)
            img = cv2.imread(path, cv2.IMREAD_COLOR)
            if img is None:
                print(f"could not load img {img_name}, proceeding to next")
            
            if total_val < val_thresh:
                if not img_name.startswith("generated"):
                    #save_training_sample(img, validation_folder ,img_name, img_label, validation_file)
                    total_val += 1
                    total += 1
                else:
                    #save_training_sample(img, train_folder, img_name, img_label, train_file)
                    total_train += 1
                    total += 1
            
            else:
                #save_training_sample(img, train_folder, img_name, img_label, train_file)
                total_train += 1
                total += 1
                    
        except Exception as e:
            continue
    print(f"total_val {total_val}\ntotal_train {total_train}\ntotal {total}")
        

pad_ds_imgs(5088)