In [None]:
import csv
import requests
from PIL import Image
from io import BytesIO

def download_and_resize_image(url, output_path, target_size=(512, 512)):
    try:
        response = requests.get(url)
        response.raise_for_status()  # Check for any errors in the response
    except requests.exceptions.RequestException as e:
        print(f"Error downloading the image: {e}")
        return e
    
    try:
        image = Image.open(BytesIO(response.content))
        resized_image = image.resize(target_size)
        resized_image.save(output_path)
    except IOError as e:
        print(f"Error resizing the image: {e}")


def filter_process_images(readfile, writefile):
  with open(readfile) as read_file:
    with open(writefile, 'w', newline='') as write_file:
      csv_reader = csv.reader(read_file, delimiter=',')
      csv_writer = csv.writer(write_file)

      next(csv_reader)  # Skip the first row (header)
      id = 0

      for row in csv_reader:

          if row[7] != '' and row[7] != None: # only keep the images with text

            if id == 0: # write header to write csv file
              header = ['id', 'image_path', 'original_width', 'original_height', 'text']
              # new width and height are 512, 512
              csv_writer.writerow(header)

            image_url = row[1]
            width, height, text = row[3], row[4], row[7]

            # save the image to img folder
            save_path = "img/"+str(id)+".jpg"
            e = download_and_resize_image(image_url, save_path)
            if not e:
              # save the metadata of each image to metadata.csv in img folder
              data = [id, save_path, width, height, text]
              csv_writer.writerow(data)

              id += 1
  return

if __name__ == "__main__":
    read_file = 'published_images.csv'
    write_file = "img/metadata.csv"
    filter_process_images(read_file, write_file)
