# Preprocessing images for model training

In [1]:
from PIL import Image
import os

def load_images_from_folder(folder):
    return [Image.open(os.path.join(folder, fname)) for fname in os.listdir(folder) if fname.endswith('.jpg') or fname.endswith('.png')]

train_van_gogh_images = load_images_from_folder("../data/raw/vgdb_2016/train/vg")
train_non_van_gogh_images = load_images_from_folder("../data/raw/vgdb_2016/train/nvg")

test_van_gogh_images = load_images_from_folder("../data/raw/vgdb_2016/test/vg")
test_non_van_gogh_images = load_images_from_folder("../data/raw/vgdb_2016/test/nvg")




In [6]:
import sys
sys.path.append('../src')  # Adjust as needed

from data_preprocessing import basic_transform, apply_transform, create_dataset

# Step 1: Get the transform function
transform = basic_transform()

# Step 2: Apply transforms to all image groups
train_van_gogh_tensors = apply_transform(train_van_gogh_images, transform)
train_non_van_gogh_tensors = apply_transform(train_non_van_gogh_images, transform)

test_van_gogh_tensors = apply_transform(test_van_gogh_images, transform)
test_non_van_gogh_tensors = apply_transform(test_non_van_gogh_images, transform)

# Step 3: Create labels for both sets
train_labels = [1] * len(train_van_gogh_tensors) + [0] * len(train_non_van_gogh_tensors)
test_labels = [1] * len(test_van_gogh_tensors) + [0] * len(test_non_van_gogh_tensors)

# Step 4: Create datasets
train_dataset = create_dataset(train_van_gogh_tensors + train_non_van_gogh_tensors, train_labels)
test_dataset = create_dataset(test_van_gogh_tensors + test_non_van_gogh_tensors, test_labels)


In [13]:
import torch
# Save datasets (optional)
torch.save(train_dataset, '../data/processed/train_dataset.pt')
torch.save(test_dataset, '../data/processed/test_dataset.pt')

# Patch preprocess

In [4]:
import sys
sys.path.append('../src')  # Adjust as needed
import torch
from torchvision import transforms
from PIL import Image
import random
from preprocessing_patches import extract_patches_and_labels, transform


In [5]:
# ✅ Train Set
train_patches_vg, train_labels_vg = extract_patches_and_labels(train_van_gogh_images, 1, transform)
train_patches_non, train_labels_non = extract_patches_and_labels(train_non_van_gogh_images, 0, transform)

train_patch_tensors = train_patches_vg + train_patches_non
train_patch_labels = train_labels_vg + train_labels_non

train_patch_dataset = torch.utils.data.TensorDataset(
    torch.stack(train_patch_tensors),
    torch.tensor(train_patch_labels)
)
torch.save(train_patch_dataset, '../data/processed/train_patch_dataset.pt')


In [6]:
# ✅ Test Set
test_patches_vg, test_labels_vg = extract_patches_and_labels(test_van_gogh_images, 1, transform)
test_patches_non, test_labels_non = extract_patches_and_labels(test_non_van_gogh_images, 0, transform)

test_patch_tensors = test_patches_vg + test_patches_non
test_patch_labels = test_labels_vg + test_labels_non

test_patch_dataset = torch.utils.data.TensorDataset(
    torch.stack(test_patch_tensors),
    torch.tensor(test_patch_labels)
)
torch.save(test_patch_dataset, '../data/processed/test_patch_dataset.pt')
