In [9]:
import pandas as pd
import shutil
import os

def copy_images_with_tags(source_folder, csv_file, destination_folder, new_csv_file, tags, max_images):
    # Read the CSV file
    df = pd.read_csv(csv_file)

    # Create the search pattern for tags and include 'clear' as a constant requirement
    search_pattern = 'clear' + '|' + '|'.join(tags)

    # Filter out the images based on the tags and 'clear'
    filtered_df = df[df['tags'].str.contains('clear') & df['tags'].str.contains(search_pattern)]

    # Limit the number of images based on max_images
    filtered_df = filtered_df.head(max_images)

    # Ensure the destination folder exists
    if not os.path.exists(destination_folder):
        os.makedirs(destination_folder)

    # Copy the filtered images to the destination folder and save their info to a new CSV
    for image_name in filtered_df['image_name']:
        source_path = os.path.join(source_folder, image_name + '.jpg')
        destination_path = os.path.join(destination_folder, image_name + '.jpg')
        shutil.copy(source_path, destination_path)

    # Save the filtered DataFrame to a new CSV file
    filtered_df.to_csv(os.path.join(destination_folder, new_csv_file), index=False)


In [10]:
# Only get clear ones with agriculture and cultivation

source_folder = r"C:\Users\felix\Repos\amazonas_from_space\planet\train-jpg"
csv_file = r"C:\Users\felix\Repos\amazonas_from_space\planet\train_classes.csv"
destination_folder = r"C:\Users\felix\Repos\amazonas_from_space\planet\agriculture_clear"
target_csv = r"C:\Users\felix\Repos\amazonas_from_space\planet\filtered_images.csv"

tags = ["agriculture", "cultivation"]  # List of additional tags to include
max_images = 100  # Maximum number of images to copy

copy_images_with_tags(source_folder, csv_file, destination_folder, target_csv, tags, max_images)

In [None]:
# Get random images with different labels

source_folder = r"C:\Users\felix\Repos\amazonas_from_space\planet\train-jpg"
csv_file = r"C:\Users\felix\Repos\amazonas_from_space\planet\train_classes.csv"
destination_folder = r"C:\Users\felix\Repos\amazonas_from_space\planet\random_clear"
target_csv = r"C:\Users\felix\Repos\amazonas_from_space\planet\filtered_images_random.csv"
tags = []  # Empty list for additional tags
max_images = 100  # Maximum number of images to copy

copy_images_with_tags(source_folder, csv_file, destination_folder, target_csv, tags, max_images)
