In [5]:
#make a copy of a file with x number of rows

import csv

def copy_csv_with_first_20_rows(input_file, output_file):
    with open(input_file, 'r') as file:
        reader = csv.reader(file)
        data = list(reader)[:20]

    with open(output_file, 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerows(data)

    print(f"CSV copied with the first 20 rows: {output_file}")

# Specify the input and output file paths
input_file = 'birds.csv'
output_file = 'birds_20.csv'

# Call the function to copy the CSV
copy_csv_with_first_20_rows(input_file, output_file)


CSV copied with the first 20 rows: birds_20.csv


In [6]:
#download images from source

import os
import csv
import requests

def download_images(csv_file):
    # Create the output folder if it doesn't exist
    output_folder = 'bird_images'
    os.makedirs(output_folder, exist_ok=True)

    # Read the CSV file with tab delimiter
    with open(csv_file, 'r') as file:
        reader = csv.DictReader(file, delimiter='\t')
        data = list(reader)

        # Add a new column header for the filepath
        header = data[0].keys()
        header = list(header) + ['filepath']

        # Process each row in the CSV
        for row in data:
            image_url = row['url']
            image_path = row['path']

            # Download the image
            response = requests.get(image_url)
            if response.status_code == 200:
                # Generate the file path based on the original path value
                folder_name, file_name = os.path.split(image_path)
                file_path = os.path.join(output_folder, folder_name, file_name)

                # Create the necessary subdirectories
                os.makedirs(os.path.dirname(file_path), exist_ok=True)

                # Save the image to the output folder
                with open(file_path, 'wb') as image_file:
                    image_file.write(response.content)

                # Update the row with the file path
                row['filepath'] = file_path
            else:
                print(f"Failed to download image from {image_url}")

    # Write the updated data to a new CSV file
    output_file = 'updated_data.csv'
    with open(output_file, 'w', newline='') as file:
        writer = csv.DictWriter(file, fieldnames=header, delimiter=',')
        writer.writeheader()
        writer.writerows(data)

    print(f"Images downloaded and CSV updated. Output file: {output_file}")

# Specify the CSV file path
csv_file = 'birds_20.csv'

# Call the function to download images and update the CSV
download_images(csv_file)


Images downloaded and CSV updated. Output file: updated_data.csv


In [11]:
#write calculated bouding box columns

import pandas as pd

# Read the CSV file with tab as the delimiter
df = pd.read_csv('birds_20.csv', delimiter='\t')

# Calculate the width, height, and center coordinates of the bounding box
df['width'] = df['bb_x2'] - df['bb_x1']
df['height'] = df['bb_y2'] - df['bb_y1']
df['center_x'] = df['bb_x1'] + (df['width'] / 2)
df['center_y'] = df['bb_y1'] + (df['height'] / 2)

# Save the updated DataFrame to a new CSV file
df.to_csv('birds_20_bb.csv', index=False)


In [14]:
import pandas as pd

# Read the CSV file
df = pd.read_csv('birds_20_bb.csv')

# Columns to remove
columns_to_remove = ['url','bb_x1', 'bb_y1', 'bb_x2', 'bb_y2', 'back_x', 'back_y', 'beak_x', 'beak_y', 'belly_x', 'belly_y',
                     'breast_x', 'breast_y', 'crown_x', 'crown_y', 'forehead_x', 'forehead_y', 'left_cheek_x',
                     'left_cheek_y', 'left_eye_x', 'left_eye_y', 'left_leg_x', 'left_leg_y', 'left_wing_x',
                     'left_wing_y', 'nape_x', 'nape_y', 'right_cheek_x', 'right_cheek_y', 'right_eye_x', 'right_eye_y',
                     'right_leg_x', 'right_leg_y', 'right_wing_x', 'right_wing_y', 'tail_x', 'tail_y', 'throat_x',
                     'throat_y']

# Drop the columns from the DataFrame
df = df.drop(columns=columns_to_remove)

# Save the modified DataFrame back to CSV
df.to_csv('birds_20_bb_clean.csv', index=False)


In [17]:
#add class column

import pandas as pd

# Read the CSV file
df = pd.read_csv('birds_20_bb_clean.csv')

# Extract the category from the "path" field
df['category'] = df['path'].apply(lambda x: x.split('/')[0].lower().replace(' ', '_'))

# Save the updated DataFrame to a new CSV file
df.to_csv('birds_20_final.csv', index=False)
