<a href="https://colab.research.google.com/github/bergerbastian/inria1358/blob/master/aerial.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [62]:
import os
import tempfile

from google.cloud import storage

import cv2

from PIL import Image
import pandas as pd

# Setting Up

## Colab Sign In


In [3]:
from google.colab import auth
auth.authenticate_user()

In [4]:
client = storage.Client()

In [70]:
bucket = client.get_bucket('aerial_images_inria1358')
storage_client = storage.Client()

## Functions

In [46]:
def printif(condition, string):
    """Prints string if condition is met

    Args:
        condition: boolean value
        string: string to print
    """
    if condition:
      print(string)

In [36]:
def get_blobs(bucket_name, folder="") -> list[storage.blob]:
    """Returns lists of all blobs in bucket / folder of that bucket

    Args:
        bucket_name: Name of cloud storage bucket
        folder: Name of folder in that bucket that we want to look in
    """

    storage_client = storage.Client()

    return list(storage_client.list_blobs(bucket_name, prefix=folder))

In [47]:
def get_size_of_image(current_blob, verbose=False) -> tuple:
    """Returns image dimensions (height, width, channels)

    Args:
        current_blob: a Cloud Storage blob
        verbose: Whether or not to log
    """
    file_name = current_blob.name
    _, temp_local_filename = tempfile.mkstemp()

    # Download file from bucket.
    current_blob.download_to_filename(temp_local_filename)
    printif(verbose, f"Image {file_name} was downloaded to {temp_local_filename}.")

    # Read Image Shape
    try:
      img = cv2.imread(temp_local_filename)
      printif(verbose, f"Image {file_name} was opened with dimensions: {img.shape}")
      return img.shape
    except:
      print("An error occured.")
      return None


In [74]:
def convert_image(current_blob, verbose=False):
    """Converts images to JPG

    Args:
        current_blob: a Cloud Storage blob
        verbose: Whether or not to log
    """
    file_name = current_blob.name
    _, temp_local_filename = tempfile.mkstemp()

    # Download file from bucket.
    current_blob.download_to_filename(temp_local_filename)
    printif(verbose, f"Image {file_name} was downloaded to {temp_local_filename}.")
    # Convert Image
    try:
      # Open Image
      img = Image.open(temp_local_filename)
      printif(verbose, f"Generating jpeg for {temp_local_filename}")

      # Save image as JPG
      img.save(temp_local_filename, "JPEG", quality=100)

      # Upload image to gcloud
      bucket = storage_client.bucket('aerial_images_inria1358')
      new_blob = bucket.blob("Converted_"+file_name.replace("tif", "jpg"))
      new_blob.upload_from_filename(temp_local_filename)

      # Delete the temporary file.
      os.remove(temp_local_filename)
    except:
      print("An error occured.")
      return None

# Analysis of Dimensions

In [49]:
blobs = get_blobs('aerial_images_inria1358', folder='AerialImageDataset/train/')

In [50]:
# This downloads the whole dataset (temporarily), taking ~12 min
#dimensions = [get_size_of_image(blob) for blob in blobs]

In [58]:
heights = [dim[0] for dim in dimensions]
widths = [dim[1] for dim in dimensions]
channels = [dim[2] for dim in dimensions]

dimensions_data = pd.DataFrame.from_dict({"heights":heights, "widths":widths, "channels":channels})

In [61]:
dimensions_data.describe()

Unnamed: 0,heights,widths,channels
count,360.0,360.0,360.0
mean,5000.0,5000.0,3.0
std,0.0,0.0,0.0
min,5000.0,5000.0,3.0
25%,5000.0,5000.0,3.0
50%,5000.0,5000.0,3.0
75%,5000.0,5000.0,3.0
max,5000.0,5000.0,3.0


# Convert Images to JPG

In [76]:
for blob in blobs:
  convert_image(blob, verbose=True)

Image AerialImageDataset/train/gt/austin1.tif was downloaded to /tmp/tmpwze4m2y1.
Generating jpeg for /tmp/tmpwze4m2y1
Image AerialImageDataset/train/gt/austin10.tif was downloaded to /tmp/tmpdrwcwb8a.
Generating jpeg for /tmp/tmpdrwcwb8a
Image AerialImageDataset/train/gt/austin11.tif was downloaded to /tmp/tmpj3rclpiw.
Generating jpeg for /tmp/tmpj3rclpiw
Image AerialImageDataset/train/gt/austin12.tif was downloaded to /tmp/tmpszguhzp4.
Generating jpeg for /tmp/tmpszguhzp4
Image AerialImageDataset/train/gt/austin13.tif was downloaded to /tmp/tmp4d5ncedc.
Generating jpeg for /tmp/tmp4d5ncedc
Image AerialImageDataset/train/gt/austin14.tif was downloaded to /tmp/tmp8orfmft5.
Generating jpeg for /tmp/tmp8orfmft5
Image AerialImageDataset/train/gt/austin15.tif was downloaded to /tmp/tmpnf_6op78.
Generating jpeg for /tmp/tmpnf_6op78
Image AerialImageDataset/train/gt/austin16.tif was downloaded to /tmp/tmpyiljqrmn.
Generating jpeg for /tmp/tmpyiljqrmn
Image AerialImageDataset/train/gt/austin1

In [None]:
from google.colab import drive
drive.mount('/content/drive')