<a href="https://colab.research.google.com/github/WRFitch/fyp/blob/main/src/fyp_preliminary_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Setup


*   Install & import necessary libraries
*   Set up Earth Engine datastores. 



In [None]:
!pip install fastai
!pip install fastai2
!pip install tensorflow

In [30]:
import ee
import folium
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf

from fastai.tabular import *
from fastai.vision import *
from IPython.display import Image
from pprint import pprint

In [None]:
ee.Authenticate()
ee.Initialize()

#print(fastai.__version__)
print(folium.__version__)
print(tf.__version__)


### Import the following datasets into google drive beforehand:

*   [Sentinel-2 Satellite photography](https://developers.google.com/earth-engine/datasets/catalog/COPERNICUS_S2_SR)
*   [Sentinel-5 Precursor Data](https://developers.google.com/earth-engine/datasets/catalog/sentinel)
  *   [Aerosol](https://developers.google.com/earth-engine/datasets/catalog/COPERNICUS_S5P_OFFL_L3_AER_AI)
  *   [Cloud](https://developers.google.com/earth-engine/datasets/catalog/COPERNICUS_S5P_OFFL_L3_CLOUD)
  *   [Carbon Monoxide](https://developers.google.com/earth-engine/datasets/catalog/COPERNICUS_S5P_OFFL_L3_CO)
  *   [Formaldehyde](https://developers.google.com/earth-engine/datasets/catalog/COPERNICUS_S5P_OFFL_L3_HCHO)
  *   [Nitrogen Dioxide](https://developers.google.com/earth-engine/datasets/catalog/COPERNICUS_S5P_OFFL_L3_NO2)
  *   [Ozone](https://developers.google.com/earth-engine/datasets/catalog/COPERNICUS_S5P_OFFL_L3_O3)
  *   [Sulphur Dioxide](https://developers.google.com/earth-engine/datasets/catalog/COPERNICUS_S5P_OFFL_L3_SO2)
  *   [Methane](https://developers.google.com/earth-engine/datasets/catalog/COPERNICUS_S5P_OFFL_L3_CH4)
*   [ODIAC Fossil Fuel CO2 Emissions](https://db.cger.nies.go.jp/dataset/ODIAC/DL_odiac2019.html)


In [24]:
# Earth engine username, used to import classified image into ee assets folder
USERNAME = 'wrfitch'
OUTPUT_DIR = USERNAME + "/out/"

# Define image collections for each dataset to be used 
s2 = ee.ImageCollection("COPERNICUS/S2_SR")
s5_CO = ee.ImageCollection("COPERNICUS/S5P/OFFL/L3_CO")
s5_HCHO = ee.ImageCollection("COPERNICUS/S5P/OFFL/L3_HCHO") 
s5_NO2 = ee.ImageCollection("COPERNICUS/S5P/OFFL/L3_NO2")
s5_O3 = ee.ImageCollection("COPERNICUS/S5P/OFFL/L3_O3")
s5_SO2 = ee.ImageCollection("COPERNICUS/S5P/OFFL/L3_SO2")
s5_CH4 = ee.ImageCollection("COPERNICUS/S5P/OFFL/L3_CH4")

# Define dataset boundaries for britain and london 
great_britain = ee.Geometry.Polygon(
        [[[-1.836112801004015, 59.808076330562756],
          [-8.779472176004015, 58.82140293049428],
          [-7.988456551004015, 55.71069203454839],
          [-11.196464363504015, 54.42753859549109],
          [-11.328300301004015, 50.967746003015044],
          [-9.526542488504015, 50.77361752815123],
          [-6.274589363504015, 51.81776248652293],
          [-5.395683113504015, 51.21615275310099],
          [-6.582206551004015, 49.56332371186494],
          [-3.110526863504015, 49.904165426606255],
          [1.240059073995985, 50.80139967619036],
          [2.426582511495985, 52.33095407387208],
          [1.767402823995985, 53.4183511305661],
          [0.5369340739959849, 53.44453305344514],
          [-1.616386238504015, 56.32474216074427],
          [-0.7814253010040151, 57.805828290000164]]])

london = ee.Geometry.Polygon(
        [[[-1.0666833726431624, 51.89360084338857],
          [-0.9321008531119124, 51.38908166135181],
          [-0.18503054061191238, 51.08470683562287],
          [0.4741491468881076, 51.193274483099074],
          [0.9822668226693576, 51.60282356474035],
          [0.2269567640756076, 52.071221592742454]]])

# Other variables that only need to be defined once. 
start_date = '2020-01-01'
end_date = '2020-12-31'
vis_palette = ['black', 'blue', 'purple', 'cyan', 'green', 'yellow', 'red']

In [23]:
# define utility methods - TODO remove to utils script 

# pre-filter to remove clouds - we can add them back in as data points from sentinel 5
def maskS2clouds(image) :
  qa = image.select('QA60');

  # Bits 10 and 11 are clouds and cirrus, respectively.
  cloudBitMask = 1 << 10
  cirrusBitMask = 1 << 11

  # Both flags should be set to zero, indicating clear conditions.
  mask = qa.bitwiseAnd(cloudBitMask).eq(0) \
      .And(qa.bitwiseAnd(cirrusBitMask).eq(0))

  return image.updateMask(mask).divide(10000)

In [28]:
# Import datasets 
# TODO analyse whether these min/max values are valid, recalibrate for highest variance where necessary
# TODO analyse whether it makes sense to analyse these on a highly localised level

# High-resolution satellite photograph 
s2img = ee.ImageCollection('COPERNICUS/S2_SR') \
                  .filterDate(start_date, end_date) \
                  .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 20)) \
                  .filterBounds(great_britain) \
                  .map(maskS2clouds).median()
s2_id = s2img.getMapId({'bands': ['B4', 'B3', 'B2'], \
                        'min': 0, \
                        'max': 0.3})

# Carbon monoxide
# Minmax scale is a bit off - recalibrate for Britain
CO_img = s5_CO.filterDate(start_date, end_date) \
              .filterBounds(great_britain) \
              .select('CO_column_number_density').mean()
CO_id = CO_img.getMapId( \
    {'palette': vis_palette, \
    'min': 0, \
    'max': 0.05})

# Formaldehyde
# Minmax scale is a bit off - recalibrate for Britain
HCHO_img = s5_HCHO.filterDate(start_date, end_date) \
                  .filterBounds(great_britain) \
                  .select('tropospheric_HCHO_column_number_density').mean()
HCHO_id = HCHO_img.getMapId( \
    {'palette': vis_palette, \
    'min': 0.0, \
    'max': 0.0003})

# Nitrogen Dioxide
NO2_img = s5_NO2.filterDate(start_date, end_date) \
                .filterBounds(great_britain) \
                .select('tropospheric_NO2_column_number_density').mean()
NO2_id = NO2_img.getMapId( \
    {'palette': vis_palette, \
    'min': 0.0, \
    'max': 0.0002})

# Ozone
O3_img = s5_O3.filterDate(start_date, end_date) \
              .filterBounds(great_britain) \
              .select('O3_column_number_density').mean()
O3_id = O3_img.getMapId( \
    {'palette': vis_palette, \
    'min': 0.12, \
    'max': 0.15})

# Sulphur Dioxide
SO2_img = s5_SO2.filterDate(start_date, end_date) \
                .filterBounds(great_britain) \
                .select('SO2_column_number_density').mean()
SO2_id = SO2_img.getMapId( \
    {'palette': vis_palette, \
    'min': 0.0, \
    'max': 0.0005})

# Methane
CH4_img = s5_CH4.filterDate(start_date, end_date) \
                .filterBounds(great_britain) \
                .select('CH4_column_volume_mixing_ratio_dry_air').mean()
CH4_id = CH4_img.getMapId( \
    {'palette': vis_palette, \
    'min': 1750, \
    'max': 1900})

In [None]:
# Visualise data on a Folium map 
map = folium.Map(location=[51.5, 0.1], \
                    prefer_canvas=True)

folium.TileLayer(
    tiles=s2_id['tile_fetcher'].url_format,
    #TODO update attr field
    attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
    overlay=True,
    name='median composite',
  ).add_to(map)

folium.TileLayer(
    tiles=CO_id['tile_fetcher'].url_format,
    #TODO update attr field
    attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
    overlay=True,
    name='Carbon Monoxide',
  ).add_to(map)

folium.TileLayer(
    tiles=HCHO_id['tile_fetcher'].url_format,
    #TODO update attr field
    attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
    overlay=True,
    name='Formaldehyde',
  ).add_to(map)

folium.TileLayer(
    tiles=NO2_id['tile_fetcher'].url_format,
    #TODO update attr field
    attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
    overlay=True,
    name='Nitrogen Dioxide',
  ).add_to(map)

folium.TileLayer(
    tiles=O3_id['tile_fetcher'].url_format,
    #TODO update attr field
    attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
    overlay=True,
    name='Ozone',
  ).add_to(map)

folium.TileLayer(
    tiles=SO2_id['tile_fetcher'].url_format,
    #TODO update attr field
    attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
    overlay=True,
    name='Sulphur Dioxide',
  ).add_to(map)

folium.TileLayer(
    tiles=CH4_id['tile_fetcher'].url_format,
    #TODO update attr field
    attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
    overlay=True,
    name='Methane',
  ).add_to(map)
  
map.add_child(folium.LayerControl())
map

In [None]:
# take a sample of the image at the points given and add a random column
# TODO combine datasets into one. can tabular recommenders include images? 
sample = image.sampleRegions(
    collection=)

In [7]:
# Export images from imagecollections

training_task = ee.batch.Export.table.toCloudStorage(
  collection=training,
  description='Training Export',
  fileNamePrefix=TRAIN_FILE_PREFIX,
  bucket=OUTPUT_BUCKET,
  fileFormat='TFRecord',
  selectors=FEATURE_NAMES)

testing_task = ee.batch.Export.table.toCloudStorage(
  collection=testing,
  description='Testing Export',
  fileNamePrefix=TEST_FILE_PREFIX,
  bucket=OUTPUT_BUCKET,
  fileFormat='TFRecord',
  selectors=FEATURE_NAMES)

In [None]:
# Start the tasks.
training_task.start()
testing_task.start()

In [None]:
pprint(ee.batch.Task.list())

In [8]:
path = untar_data(URLs.PETS)/'images'

Downloading https://s3.amazonaws.com/fast-ai-imageclas/oxford-iiit-pet.tgz


In [21]:
type(get_image_files(path))
print(get_image_files(path)[0])

/root/.fastai/data/oxford-iiit-pet/images/Siamese_167.jpg
/root/.fastai/data/oxford-iiit-pet/images/japanese_chin_59.jpg
