In [10]:
import pandas as pd
import os
import shutil

# Assume your data is in a CSV file named 'dataset.csv'
df = pd.read_csv('./train/_classes.csv')

# Strip leading spaces from column names
df.columns = [col.strip() for col in df.columns]

# Filter rows where only 'car' or 'person' is present
df_car = df[(df['bicycle'] == 0) & (df['car'] == 1) & (df['dog'] == 0) & (df['person'] == 0)]
df_person = df[(df['bicycle'] == 0) & (df['car'] == 0) & (df['dog'] == 0) & (df['person'] == 1)]

# Concatenate the two dataframes
df_filtered = pd.concat([df_car, df_person])

# Create a new directory 'filtered_data' to store the selected images and CSV
os.makedirs('filtered_data', exist_ok=True)

# Initialize an index counter for the new filenames
index = 0

# Assuming that your images are in a folder named 'images'
# Copy the selected images to the new directory with new names
for _, row in df_filtered.iterrows():
    filename = row['filename']
    # Create new filename with leading zeros
    new_filename = f"{index:04d}.jpeg"
    shutil.copy(os.path.join('train', filename), os.path.join('filtered_data', new_filename))
    # Update filename in the dataframe
    df_filtered.loc[df_filtered['filename'] == filename, 'filename'] = new_filename
    # Increment the index counter
    index += 1

# Write this dataframe to a new CSV file in 'filtered_data'
df_filtered.to_csv(os.path.join('filtered_data', 'filtered_dataset.csv'), index=False)


In [11]:
# Create an empty list to store the labels
labels = []

# Iterate over the filtered dataframe
for _, row in df_filtered.iterrows():
    if row['car'] == 1:
        labels.append(0)  # If car, append 0
    else:
        labels.append(1)  # If person, append 1

# Create a new dataframe from the labels list
df_labels = pd.DataFrame(labels, columns=['label'])

# Write this dataframe to a new CSV file in 'filtered_data'
df_labels.to_csv(os.path.join('filtered_data', 'labels.csv'), index=False, header=False)
