# Filter Data hourly out of 5700 images collected

In [4]:
import os
import shutil
from datetime import datetime

# Define the source and destination directories
source_dir = 'data/all_collected'
destination_dir = 'data/hourly_interval'

# Delete the destination directory if it exists
if os.path.exists(destination_dir):
    shutil.rmtree(destination_dir)

# Create the destination directory
os.makedirs(destination_dir, exist_ok=True)

# Function to extract the timestamp from the filename
def extract_timestamp(filename):
    try:
        # Assuming the filename format is '2701_YYYY-MM-DD_HH-MM-SS.jpg'
        timestamp_str = filename.split('_')[1] + '_' + filename.split('_')[2].split('.')[0]
        return datetime.strptime(timestamp_str, '%Y-%m-%d_%H-%M-%S')
    except Exception as e:
        print(f"Error extracting timestamp from {filename}: {e}")
        return None

# Filter images to only include those taken at hourly intervals
last_hour = None
file_count = 0
for filename in sorted(os.listdir(source_dir)):
    timestamp = extract_timestamp(filename)
    if timestamp and (last_hour is None or timestamp.hour != last_hour.hour):
        shutil.copy(os.path.join(source_dir, filename), os.path.join(destination_dir, filename))
        last_hour = timestamp
        file_count += 1

# Print the total number of files saved
print(f"Total number of files saved: {file_count}")

Total number of files saved: 880


# Filter Data hourly on 12th of October 2024 (the dateset for analysis)

In [2]:
import os
import shutil
from datetime import datetime

# Define the source and destination directories
source_dir = 'data/all_collected'
destination_dir = 'data/12_oct_hourly'

# Delete the destination directory if it exists
if os.path.exists(destination_dir):
    shutil.rmtree(destination_dir)

# Create the destination directory
os.makedirs(destination_dir, exist_ok=True)

# Function to extract the timestamp from the filename
def extract_timestamp(filename):
    try:
        # Assuming the filename format is '2701_YYYY-MM-DD_HH-MM-SS.jpg'
        timestamp_str = filename.split('_')[1] + '_' + filename.split('_')[2].split('.')[0]
        return datetime.strptime(timestamp_str, '%Y-%m-%d_%H-%M-%S')
    except Exception as e:
        print(f"Error extracting timestamp from {filename}: {e}")
        return None

# Filter images to only include those taken at hourly intervals on October 12, 2024
last_hour = None
file_count = 0
target_date = datetime(2024, 10, 12)  # Set the target date

for filename in sorted(os.listdir(source_dir)):
    timestamp = extract_timestamp(filename)
    if timestamp and timestamp.date() == target_date.date():
        if last_hour is None or timestamp.hour != last_hour.hour:
            shutil.copy(os.path.join(source_dir, filename), os.path.join(destination_dir, filename))
            last_hour = timestamp
            file_count += 1

# Print the total number of files saved
print(f"Total number of files saved: {file_count}")

Total number of files saved: 72
