In [1]:
import pandas as pd

import numpy as np
from PIL import Image
from pathlib import Path

import torch
from torchvision.transforms import v2

In [2]:
dataset_path = Path("./Houses-dataset/Houses Dataset")
txt_dataset_path = dataset_path / "HousesInfo.txt"

In [3]:
columns_name = ["Number of Bedrooms", "Number of bathrooms", "Area", "Zipcode", "Price"]
structured_df = pd.read_csv(txt_dataset_path, sep=' ', names=columns_name)

## Image Concatenation

Concatenation of the fourth images to create a single one for each house

In [None]:
out_folder = Path("./data/concatenated_images")
Path.mkdir(out_folder, parents=True, exist_ok=True)

In [None]:
def pad_image(image, target_width, target_height):
        padded_image = Image.new("RGB", (target_width, target_height), (0, 0, 0))  # Black background
        x_offset = (target_width - image.width) // 2
        y_offset = (target_height - image.height) // 2
        padded_image.paste(image, (x_offset, y_offset))
        return padded_image

In [None]:
def concatenate_imgs(images: list[Image]) -> Image:

    max_width = max(image.width for image in images)
    max_height = max(image.height for image in images)
    
    padded_images = [pad_image(img, max_width, max_height) for img in images]

    new_width = max_width * 2
    new_height = max_height * 2
    new_image = Image.new("RGB", (new_width, new_height), (0, 0, 0))  # Add a black background

    new_image.paste(padded_images[0], (0, 0))  # Top-left
    new_image.paste(padded_images[1], (max_width, 0))  # Top-right
    new_image.paste(padded_images[2], (0, max_height))  # Bottom-left
    new_image.paste(padded_images[3], (max_width, max_height))  # Bottom-right

    return new_image



In [None]:
for idx in structured_df.index:

    idx_ = idx+1

    bathroom = Image.open(dataset_path / f"{idx_}_bathroom.jpg")
    bedroom = Image.open(dataset_path / f"{idx_}_bedroom.jpg")
    frontal = Image.open(dataset_path / f"{idx_}_frontal.jpg")
    kitchen = Image.open(dataset_path / f"{idx_}_kitchen.jpg")

    images = [bathroom, bedroom, frontal, kitchen]

    new_image = concatenate_imgs(images)

    new_image.save(out_folder / f"{idx+1}_house.jpg")

## Image Data Augmentation

In [None]:
out_folder = Path("./data/concatenated_augm_images")
Path.mkdir(out_folder, parents=True, exist_ok=True)

In [None]:
transform = v2.Compose([
    # Spatial augmentations
    v2.RandomHorizontalFlip(p=1),  # Horizontal flip with 50% chance
    v2.RandomRotation(degrees=10),   # Random rotation within ±10 degrees
    v2.RandomAffine(degrees=0, translate=(0.1, 0.1)),  # Random translation
    # v2.RandomResizedCrop(size=(224, 224), scale=(0.8, 1.0)),  # Random crop and resize
    v2.Lambda(lambda x: random_resized_crop(x, min_scale=0.9, max_scale=1.1)),  # Custom resizing function

    # Color augmentations
    v2.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # Random color adjustments

    # Noise augmentation (adding Gaussian noise)
    v2.Lambda(lambda x: add_gaussian_noise(x))  # Custom function to add noise
])

# Custom function to add Gaussian noise
def add_gaussian_noise(image: Image, mean: float=0, std: float=0.05):
    """
        Add a gaussian noise to disturb colors of the image.
    """
    
    np_image = np.array(image)  # Convert PIL image to numpy array
    noise = np.random.normal(mean, std, np_image.shape)  # Add Gaussian noise
    noisy_image = np.clip(np_image + noise * 255, 0, 255).astype(np.uint8)
    return Image.fromarray(noisy_image)

def random_resized_crop(image: Image, min_scale: float=0.9, max_scale: float=1.1):
    """
        Resize the image with scale +- 10% of the original size. Then crop borders.
    """
    width, height = image.size
    scale_factor = np.random.uniform(min_scale, max_scale)
    new_width = int(width * scale_factor)
    new_height = int(height * scale_factor)
    return image.resize((new_width, new_height), Image.BILINEAR)

In [None]:
for idx in structured_df.index:

    idx_ = idx+1

    places = ["bathroom", "bedroom", "frontal", "kitchen"]

    src_imgs = [Image.open(dataset_path / f"{idx_}_{place}.jpg") for place in places]
    transformed_imgs = [transform(img) for img in src_imgs]

    mask = np.random.choice([0, 1], size=4)

    house1 = []
    house2 = []

    for p in range(len(places)):
        if mask[p] == 1:
            house1.append(src_imgs[p])
            house2.append(transformed_imgs[p])
        else:
            house1.append(transformed_imgs[p])
            house2.append(src_imgs[p])

    house1_concat = concatenate_imgs(house1)
    house2_concat = concatenate_imgs(house2)

    house1_concat.save(out_folder / f"{idx_}_1-house.jpg")
    house2_concat.save(out_folder / f"{idx_}_2-house.jpg")


## Text Data Processing

In [None]:
structured_df["ID_augm"] = structured_df.index

In [None]:
structured_df_1 = structured_df.copy()
structured_df_1["ID_augm"] = structured_df_1["ID_augm"].apply(lambda x: str(x+1)+"_1")

structured_df_2 = structured_df.copy()
structured_df_2["ID_augm"] = structured_df_2["ID_augm"].apply(lambda x: str(x+1)+"_2")

In [None]:
structured_df_augm = pd.concat([structured_df_1, structured_df_2], axis=0).set_index("ID_augm")

In [4]:
import matplotlib.pyplot as plt

In [5]:
plt.hist(structured_df['Zipcode'], bins=10, edgecolor='black')
plt.show()

: 