In [26]:
import tensorflow as tf
from PIL import Image
import numpy as np
import os

In [27]:
# set the path to your dataset
dataset_dir = "./image_scrape_code/screenshots"

# list all image files in the dataset directory
image_files = [
    os.path.join(dataset_dir, filename)
    for filename in os.listdir(dataset_dir)
    if filename.endswith(".png")
]

for f in image_files:
    print(f)

./image_scrape_code/screenshots/person-screenshot-2024-01-27T23-23-03.548Z.png
./image_scrape_code/screenshots/person-screenshot-2024-01-27T23-21-03.546Z.png
./image_scrape_code/screenshots/person-screenshot-2024-01-27T23-22-03.547Z.png
./image_scrape_code/screenshots/person-screenshot-2024-01-27T23-25-03.552Z.png
./image_scrape_code/screenshots/screenshot-2024-01-27T23-29-03.557Z.png
./image_scrape_code/screenshots/person-screenshot-2024-01-27T23-31-03.559Z.png


In [28]:
# resize
size = 128, 128

# create the dir if it doesn't exist
processed_dir = 'processed_images'
os.makedirs(processed_dir, exist_ok=True)

for i in image_files:
    try:
        im = Image.open(i)
        im.thumbnail(size, Image.Resampling.LANCZOS)
        
        # filename without the path
        filename = os.path.basename(i)
        
        # create new path for the processed image
        processed_path = os.path.join(processed_dir, filename)
        
        # save 
        im.save(processed_path, "PNG")
        
        print(f"Processed {i} saved as {processed_path}")
        
    except IOError:
        print(f"Error rescale for {i}")
        

Processed ./image_scrape_code/screenshots/person-screenshot-2024-01-27T23-23-03.548Z.png saved as processed_images/person-screenshot-2024-01-27T23-23-03.548Z.png
Processed ./image_scrape_code/screenshots/person-screenshot-2024-01-27T23-21-03.546Z.png saved as processed_images/person-screenshot-2024-01-27T23-21-03.546Z.png
Processed ./image_scrape_code/screenshots/person-screenshot-2024-01-27T23-22-03.547Z.png saved as processed_images/person-screenshot-2024-01-27T23-22-03.547Z.png
Processed ./image_scrape_code/screenshots/person-screenshot-2024-01-27T23-25-03.552Z.png saved as processed_images/person-screenshot-2024-01-27T23-25-03.552Z.png
Processed ./image_scrape_code/screenshots/screenshot-2024-01-27T23-29-03.557Z.png saved as processed_images/screenshot-2024-01-27T23-29-03.557Z.png
Processed ./image_scrape_code/screenshots/person-screenshot-2024-01-27T23-31-03.559Z.png saved as processed_images/person-screenshot-2024-01-27T23-31-03.559Z.png


In [10]:
# labels based on file names 
labels = [1 if "person" in filename else 0 for filename in image_files]

for l in labels:
    print(l)

1
1
1
1
1
1


In [20]:
# load and preprocess the images
def load_and_preprocess_image(image_path, label):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, (224, 224))  # resize to desired dimensions
    image = tf.cast(image, tf.float32) / 255.0  # normalize pixel values between 0 and 1
    return image, label


In [21]:
# create a TensorFlow dataset from the image files and labels
dataset = tf.data.Dataset.from_tensor_slices((image_files, labels))

# map the preprocessing function to the dataset
dataset = dataset.map(load_and_preprocess_image)

# shuffle and batch the dataset
dataset = dataset.shuffle(buffer_size=len(image_files))
dataset = dataset.batch(batch_size=32)

print(dataset)


<_BatchDataset element_spec=(TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.int32, name=None))>
