<a href="https://colab.research.google.com/github/bergerbastian/inria1358/blob/master/notebooks/aerial.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import tempfile

from google.cloud import storage

import cv2

from PIL import Image
import pandas as pd

import tensorflow as tf

# Setting Up

## Colab Sign In


In [2]:
from google.colab import auth
auth.authenticate_user()

In [3]:
client = storage.Client()

In [4]:
bucket = client.get_bucket('aerial_images_inria1358')
storage_client = storage.Client()

## Functions

In [5]:
def printif(condition, string):
    """Prints string if condition is met

    Args:
        condition: boolean value
        string: string to print
    """
    if condition:
      print(string)

In [6]:
def get_blobs(bucket_name, folder="") -> list[storage.blob]:
    """Returns lists of all blobs in bucket / folder of that bucket

    Args:
        bucket_name: Name of cloud storage bucket
        folder: Name of folder in that bucket that we want to look in
    """

    storage_client = storage.Client()

    return list(storage_client.list_blobs(bucket_name, prefix=folder))

In [7]:
def get_size_of_image(current_blob, verbose=False) -> tuple:
    """Returns image dimensions (height, width, channels)

    Args:
        current_blob: a Cloud Storage blob
        verbose: Whether or not to log
    """
    file_name = current_blob.name
    _, temp_local_filename = tempfile.mkstemp()

    # Download file from bucket.
    current_blob.download_to_filename(temp_local_filename)
    printif(verbose, f"Image {file_name} was downloaded to {temp_local_filename}.")

    # Read Image Shape
    try:
      img = cv2.imread(temp_local_filename)
      printif(verbose, f"Image {file_name} was opened with dimensions: {img.shape}")
      return img.shape
    except:
      print("An error occured.")
      return None


In [8]:
def convert_image(current_blob, verbose=False):
    """Converts images to JPG

    Args:
        current_blob: a Cloud Storage blob
        verbose: Whether or not to log
    """
    file_name = current_blob.name
    _, temp_local_filename = tempfile.mkstemp()

    # Download file from bucket.
    current_blob.download_to_filename(temp_local_filename)
    printif(verbose, f"Image {file_name} was downloaded to {temp_local_filename}.")
    # Convert Image
    try:
      # Open Image
      img = Image.open(temp_local_filename)
      printif(verbose, f"Generating jpeg for {temp_local_filename}")

      # Save image as JPG
      img.save(temp_local_filename, "JPEG", quality=100)

      # Upload image to gcloud
      bucket = storage_client.bucket('aerial_images_inria1358')
      new_blob = bucket.blob("Converted_"+file_name.replace("tif", "jpg"))
      new_blob.upload_from_filename(temp_local_filename)

      # Delete the temporary file.
      os.remove(temp_local_filename)
    except:
      print("An error occured.")
      return None

In [37]:
def download_images(onlyTrain=True, verbose=False, processed=True):
  """Downloads images from google cloud bucket

    Args:
        onlyTrain: Download only train images
        verbose: Whether or not to log
        processed: Download processed images
  """
  bucket = client.get_bucket('aerial_images_inria1358')
  storage_client = storage.Client()

  addConverted = "Converted_" if processed else ""

  if onlyTrain:
    blobs = get_blobs('aerial_images_inria1358', folder=f'{addConverted}AerialImageDataset/train/')
  else:
    blobs = get_blobs('aerial_images_inria1358', folder=f'{addConverted}AerialImageDataset/')

  for blob in blobs:
    # Download file from bucket.

    path = "/".join(blob.name.split("/")[0:-1])
    # Check whether the specified path exists or not
    isExist = os.path.exists(path)
    if not isExist:
      # Create a new directory because it does not exist
      os.makedirs(path)

    f = open(blob.name, "w")
    blob.download_to_filename(blob.name)
    printif(verbose, f"Image {blob.name} was downloaded to {blob.name}.")
    break

# Analysis of Dimensions

In [49]:
blobs = get_blobs('aerial_images_inria1358', folder='AerialImageDataset/train/')

In [50]:
# This downloads the whole dataset (temporarily), taking ~12 min
#dimensions = [get_size_of_image(blob) for blob in blobs]

In [58]:
heights = [dim[0] for dim in dimensions]
widths = [dim[1] for dim in dimensions]
channels = [dim[2] for dim in dimensions]

dimensions_data = pd.DataFrame.from_dict({"heights":heights, "widths":widths, "channels":channels})

In [61]:
dimensions_data.describe()

Unnamed: 0,heights,widths,channels
count,360.0,360.0,360.0
mean,5000.0,5000.0,3.0
std,0.0,0.0,0.0
min,5000.0,5000.0,3.0
25%,5000.0,5000.0,3.0
50%,5000.0,5000.0,3.0
75%,5000.0,5000.0,3.0
max,5000.0,5000.0,3.0


# Convert Images to JPG

In [9]:
# Warning! This takes a while and converts all images in train to JPG
#blobs = get_blobs('aerial_images_inria1358', folder='AerialImageDataset/train/')
#for blob in blobs:
#  convert_image(blob)

# Tensorflow

In [35]:
download_processed_images()

In [13]:
†pythodef normalize(input_image, input_mask):
  input_image = tf.cast(input_image, tf.float32) / 255.0
  input_mask -= 1
  return input_image, input_mask

def load_image(datapoint):
  input_image = tf.image.resize(datapoint['image'], (128, 128))
  input_mask = tf.image.resize(
    datapoint['segmentation_mask'],
    (128, 128),
    method = tf.image.ResizeMethod.NEAREST_NEIGHBOR,
  )

  input_image, input_mask = normalize(input_image, input_mask)

  return input_image, input_mask

class Augment(tf.keras.layers.Layer):
  def __init__(self, seed=42):
    super().__init__()
    # both use the same seed, so they'll make the same random changes.
    self.augment_inputs = tf.keras.layers.RandomFlip(mode="horizontal", seed=seed)
    self.augment_labels = tf.keras.layers.RandomFlip(mode="horizontal", seed=seed)

  def call(self, inputs, labels):
    inputs = self.augment_inputs(inputs)
    labels = self.augment_labels(labels)
    return inputs, labels

In [14]:
IMG_SIZE = (5000,5000)
BATCH_SIZE = 32

In [15]:
data_dir = "/path/to/image/directory"
filenames = tf.constant([os.path.join(data_dir, fname) for fname in os.listdir(data_dir)])
dataset = tf.data.Dataset.from_tensor_slices((filenames))

FileNotFoundError: ignored

In [30]:
"/".join('AerialImageDataset/train/gt/austin1.tif'.split("/")[0:-1])

'AerialImageDataset/train/gt'