# World Airport Imagery Retrieval


This notebook uses the Aviation Fanatic dataset to query the Sentinel2 API using geometries constructed using Shapely and constructs the URLS needed for each airport image and stores it into a JSON file which can be referenced later as metadata. 

In [1]:
%config IPCompleter.greedy=True
import folium
import os
import requests
from tqdm import tqdm
from sentinelsat import SentinelAPI
import geopandas as gpd
import pandas as pd
import numpy as np
import csv
from shapely.geometry import Polygon
import fiona
from pyproj import Proj, CRS,transform
from datetime import datetime
import pygc
from io import StringIO
import shutil
import json
import pickle

import json
from tqdm import tqdm
import requests
import os
import rasterio as rio
from rasterio.mask import mask
from osgeo import gdal
import pickle
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


### File Paths

In [2]:
main_datafile_path = "estingAustralia.csv"

### Ranking all airports based on IATA

In [None]:
treecoords = pd.read_csv(main_datafile_path)
treecoords

### Reading current file and removing duplicates

In [None]:
batch = treecoords.loc[0:0]
batch

In [None]:
# sitelist = treecoords['site']
# lat = list(treecoords['lat'])
# lon = list(treecoords['long'])
# projectlist = list(treecoords["project"])

sitelist = batch['site']
lat = list(batch['lat'])
lon = list(batch['long'])
projectlist = list(batch["project"])

__________________________________ END OF DATA WRANGLING ______________________________________

So far we have only played in getting the right data. This step could be much shorter and with less data files if your dataset is already clean. In short, you need a dataset with the names tagged to their right coordinates (lat-lon) and you can skip this entire above section

### Geo Boundary Construction around the airport 

In the below step we write a function that takes a coordinate point and draws a square with the specified dimensions

In [None]:
def latLonBoxByWandH(lat,lon,ew_width,ns_height):
    lats, lons = [], []
    #distance in m, az (in deg), lat (in deg), long (in deg)

    res = pygc.great_circle(distance=ew_width/2, azimuth=90, latitude=lat, longitude=lon)
    lat, lon = res['latitude'], res['longitude']

    res = pygc.great_circle(distance=ns_height/2, azimuth=180, latitude=lat, longitude=lon)
    lat, lon = res['latitude'], res['longitude']
    lats.append(lat), lons.append(lon)

    res = pygc.great_circle(distance=ew_width, azimuth=270, latitude=lat, longitude=lon)
    lat, lon = res['latitude'], res['longitude']
    lats.append(lat), lons.append(lon)

    res = pygc.great_circle(distance=ns_height, azimuth=0, latitude=lat, longitude=lon)
    lat, lon = res['latitude'], res['longitude']
    lats.append(lat), lons.append(lon)

    res = pygc.great_circle(distance=ew_width, azimuth=90, latitude=lat, longitude=lon)
    lat, lon = res['latitude'], res['longitude']
    lats.append(lat), lons.append(lon)
    
    return {'lats':lats,'lons':lons}

### Setting Boundary parameters

The below numbers are the side lengths of the square in m. The length also decides the size of the final image as Sentinel2 provides images at a resolution of 10/pixel

In [None]:
#lengths in m
ew_width = 2000
ns_height = 2000
size = int(ew_width/1000)

In [None]:
#loc_lon,loc_lat
loc_lat = np.asfarray(lat,float)
loc_lon = np.asfarray(lon,float)
len(loc_lat),len(loc_lon)

### Drawing Geo Boundaries from coordinates

In [None]:
polygons,footprint =[], []

for lat, lon, code in tqdm(zip(loc_lat,loc_lon, sitelist), total = len(loc_lat)):

    box = latLonBoxByWandH(lat,lon,ew_width,ns_height)
    
    polygon_geom = Polygon(zip(box['lons'], box['lats']))
    footprint.append(polygon_geom)
    crs = CRS('epsg:4326')
    polygon = gpd.GeoDataFrame(index=[0], crs=crs, geometry=[polygon_geom])  
    
    # Save polygon to disk for later use
    with open("copingmarkers.shp", "wb") as poly_file:
        pickle.dump(polygon, poly_file, pickle.HIGHEST_PROTOCOL)
        
    # polygon.to_file("copingmarkers.shp")

    polygons.append(polygon)

In [None]:
polygons

### Generating the Basemap with all Airports

In [None]:
Proj("epsg:4326")

m = folium.Map([loc_lat[0],loc_lon[0]], zoom_start=12,tiles = "https://{s}.basemaps.cartocdn.com/dark_nolabels/{z}/{x}/{y}.png",
attr = '&copy; <a href="https://www.openstreetmap.org/copyright">OpenStreetMap</a> contributors &copy; <a href="https://carto.com/attributions">CARTO</a>')
for polygon in polygons:
    folium.GeoJson(polygon).add_to(m)
    
m

### Optionally save the Basemap to HTML file

m.save('abovemap.html')

### Setting Query parameters, start and end conditions

### Register for an account and replace XXX with your credentials

https://scihub.copernicus.eu/dhus/#/home

In [None]:
user = 'fries' 
password = 'astorea4358' 

api = SentinelAPI(user, password, 'https://scihub.copernicus.eu/dhus')
start = '20220101'
end = '20220601'
cloudperc = 50

In [None]:
def apiQuery(iatalist,geometry,dateStart,dateEnd,cloud):
    query = api.query(geometry,
                # date = (str(getDateStamp(i)[0]),str(getDateStamp(i)[1])),
                # date = ("NOW-400DAYS", "NOW"),
                date = (str(dateStart), str(dateEnd)),
                platformname = 'Sentinel-2',
                processinglevel = 'Level-2A',
                area_relation = ('Contains'),
                cloudcoverpercentage = (0,cloud))
    return query
    #print(ia, len(query))

### Querying SentinelAPI based on the constructed geo boundaries

In [None]:
apiq = []
# for ia,boundary in tqdm(zip(iatalist,footprint),total=len(iatalist)):
for ia,boundary in tqdm(zip(sitelist,footprint), total=len(sitelist)):
    cloudperc = 80
    queried = apiQuery(ia,boundary,start,end,cloudperc)
    # while len(queried) == 0:
    #     cloudperc +=10
    #     minperc = min(cloudperc, 100)
    #     # print(ia, minperc)
    #     queried = apiQuery(ia,boundary,start,end,minperc)
    apiq.append(queried)
#     print(ia,len(queried))
   

In [None]:
products_list, products_list_sorted,images,titlelist,bestlist,datestamp,cloudiness =[],[], [], [], [] , [],[]
for products in apiq:
        products_list.append(api.to_geodataframe(products))
        
#Sorting the list of products within our array of locations for minimum cloudcover
for products in tqdm(products_list, total = len(sitelist)):
    try:
        products_list_sorted.append(products.sort_values(['cloudcoverpercentage'],ascending = [True]))
    except:
        continue

# for i in range(0,len(products_list_sorted)):
#         images.append(products_list_sorted[i].head(1))

for i in range(0,len(images)):
    titlelist.append(products_list_sorted[i].title[0])
    bestlist.append(products_list_sorted[i].uuid[0])
    d = products_list_sorted[i].beginposition[0].date().strftime("%Y%m%d")
    c = products_list_sorted[i].cloudcoverpercentage
    datestamp.append(d)
    cloudiness.append(c)

### Debugging code to check the Dataframes generated

In [None]:
prodlist = bestlist
len(prodlist)

In [None]:
len(products_list_sorted)

In [None]:
products_list_sorted[0].columns

In [None]:
products_list_sorted[0]['link'][1]

In [None]:
products_list_sorted[0]['title'][0] #title
products_list_sorted[0]['granuleidentifier'][0][13:16] #granuleName
products_list_sorted[0]['granuleidentifier'][0][49:55] #tileName
products_list_sorted[0]['granuleidentifier'][0][41:48] #granuleName
products_list_sorted[1]['datastripidentifier'][0][42:57] #granuleDate
products_list_sorted[1]['identifier'][0][11:26] #tileDate

### Construction of Download URL from the Dataframe Parameters 

In [202]:
def urlConstructor(uuid, title, granuleID, tileID, granuleName, granuleDate, tileDate):
    header = "https://apihub.copernicus.eu/apihub/odata/v1"
    fileName = f"{tileID}_{tileDate}_TCI_10m.jp2"
    queryUrl = f"{header}/Products('{uuid}')/Nodes('{title}.SAFE')/Nodes('GRANULE')/Nodes('{granuleID}_{tileID}_{granuleName}_{granuleDate}')/Nodes('IMG_DATA')/Nodes('R10m')/Nodes('{fileName}')/$value"
    thumbUrl = f"{header}/Products('{uuid}')"
    
    return queryUrl, fileName

In [None]:
# len(products_list[1301][products_list[1301]['granuleidentifier'].notna()])
clearedProducts_list = []
for products in products_list_sorted:
    clearedProducts = products[products['granuleidentifier'].notna()]
    clearedProducts_list.append(clearedProducts)
# products_list_sorted[1301]['granuleidentifier']

In [None]:
airportMetaDB = {}
qUrls, fNames = [], []
for product,iataCode in tqdm(zip(clearedProducts_list,sitelist), total=len(sitelist)):
    valueDict = {}
    uuid = product['uuid'][0] #3cf16779-b786-462f-8bd5-6fe43a73d213
    title = product['title'][0] #S2A_MSIL2A_20211130T162631_N0301_R040_T16SGC_20211130T191654
    preview_thumb = product["link_icon"][0] #thumbnail-preview
    # print(product['granuleidentifier'][0])
    granuleID = product['granuleidentifier'][0][13:16] #L2A
    tileID = product['granuleidentifier'][0][49:55] #T50TMK
    granuleName = product['granuleidentifier'][0][41:48] #A033642
    granuleDate = product['datastripidentifier'][0][42:57] #20211125T030026
    tileDate = product['identifier'][0][11:26] #20211125T030029
    u = urlConstructor(uuid, title, granuleID, tileID, granuleName, granuleDate, tileDate)[0]
    f = urlConstructor(uuid, title, granuleID, tileID, granuleName, granuleDate, tileDate)[1]

    
    valueDict['uuid'] = uuid
    valueDict['title'] = title
    valueDict['thumbnail'] = preview_thumb
    valueDict['granule_id'] = granuleID
    valueDict['tile_id'] = tileID
    valueDict['granule_name'] = granuleName
    valueDict['granule_date'] = granuleDate
    valueDict['tile_date'] = tileDate
    valueDict['product_url'] = u
    valueDict['product_filename'] = f
    valueDict['processed_filename'] = f"S_{iataCode}_{tileDate[:8]}.tiff"
    
    airportMetaDB[iataCode] = valueDict
    qUrls.append(u)
    fNames.append(f)

In [None]:
googURL = []
for iatas in list(airportMetaDB.keys()):
    currentObj = airportMetaDB[iatas]
    header = f'https://storage.googleapis.com/gcp-public-data-sentinel-2/L2/tiles'
    folder = f'{currentObj["tile_id"][1:3]}/{currentObj["tile_id"][3:4]}/{currentObj["tile_id"][4:6]}'
    product_title = f'{currentObj["title"]}.SAFE'
    granule_title = f'{currentObj["granule_id"]}_{currentObj["tile_id"]}_{currentObj["granule_name"]}_{currentObj["granule_date"]}'
    file_title = f'{currentObj["product_filename"]}'
    
    
    finalURL = f'{header}/{folder}/{product_title}/GRANULE/{granule_title}/IMG_DATA/R10m/{file_title}'
    currentObj["goog_url"] = finalURL    

### Dumping all data from Query process onto JSON file

In [None]:
from datetime import datetime
key = datetime.now().strftime("%Y%m%d")

In [None]:
with open("ForestCoords.json", "w") as outfile:
    json.dump(airportMetaDB, outfile)


### Making raw, warped and clipped files

In [None]:
batch = "2kmx2km"
current_set = "test"

In [None]:
with open("ForestCoords.json", "r") as infile:
    airportDownloadDriver = json.load(infile)

In [None]:
fNames = []
qUrls = []
datestamp = []
processedfNames = []

for codes in tqdm(sitelist, total = len(sitelist)):
    # qUrls.append(airportDownloadDriver[codes]["product_url"])
#     testing with google URL
    qUrls.append(airportDownloadDriver[codes]["goog_url"])
    fNames.append(airportDownloadDriver[codes]["product_filename"])
    processedfNames.append(airportDownloadDriver[codes]["processed_filename"])
    datestamp.append(airportDownloadDriver[codes]["tile_date"][:8])

In [None]:
raw_directory = f"../img_data/{batch}/raw/"
os.makedirs(raw_directory, exist_ok=True)

In [None]:
def getImages(fNames,urls,iata):
    for names,urls,ia in tqdm(zip(fNames,urls,iata),total = len(iata), leave = False):
        try:
#             Chceking if existing file is valid
            productName = f"{ia}_{names}"
            if productName in os.listdir(raw_directory):
                productSize = int(str(os.stat(f"{raw_directory}/{productName}").st_size))
                if productSize > 1000:
                    # print("File already exists! skipping")
                    continue
            # else:
            # print("File doesn't exist! Downloading")
            
            # r = requests.get(urls,auth = (user,password))
            
#             Without authentication for google storage
            r = requests.get(urls)

            # if r.status_code ==200
            with open(f"{raw_directory}/{str(ia)}_{str(names)}",'wb') as f: 
                f.write(r.content) 
            f.close()
            
        except Exception as e:
            print(ia,e)
            # print("Error Downloading File! Storing Metadata")
            with open(f"../datafiles/missing/{str(ia)}.json",'wb') as missingTile:
                json.dump(airportDownloadDriver[ia], missingTile)

In [None]:
getImages(fNames,qUrls,sitelist)

In [200]:
brokenList = []
for file in os.listdir(raw_directory):
    if os.stat(f"{raw_directory}/{file}").st_size < 2048:
        brokenList.append(airportDownloadDriver[file[:3]]["product_url"])

In [201]:
brokenList

["https://apihub.copernicus.eu/apihub/odata/v1/Products('aedd4c4f-f1d4-45be-8dc6-f649fa34aa5f')/Nodes('S2B_MSIL2A_20220506T021339_N0400_R060_T50HMG_20220506T061534.SAFE')/Nodes('GRANULE')/Nodes('L2A_T50HMG_A026970_20220506T022353')/Nodes('IMG_DATA')/Nodes('R10m')/Nodes('T50HMG_20220506T021339_B02_60m.jp2')/$value",
 "https://apihub.copernicus.eu/apihub/odata/v1/Products('aedd4c4f-f1d4-45be-8dc6-f649fa34aa5f')/Nodes('S2B_MSIL2A_20220506T021339_N0400_R060_T50HMG_20220506T061534.SAFE')/Nodes('GRANULE')/Nodes('L2A_T50HMG_A026970_20220506T022353')/Nodes('IMG_DATA')/Nodes('R10m')/Nodes('T50HMG_20220506T021339_B02_60m.jp2')/$value"]

In [None]:
sortedFileList = []
for ia in tqdm(sitelist):
    for files in os.listdir(raw_directory):
        newstring = [ia, '_']
        x = ''.join(newstring)
        if files.endswith(".jp2") and x in files:
            sortedFileList.append(files)

In [None]:
dataArr = []
for items in tqdm(sortedFileList, total=len(sitelist)):
    data = rio.open(f"{raw_directory}/{str(items)}")
    #print(items)
    #print(data.meta['crs'])
    dataArr.append(data)
    data.close()

In [None]:
targetPath = f"../img_data/{batch}/warped/"
os.makedirs(targetPath, exist_ok=True)

In [None]:
len(sortedFileList)

In [None]:
fileNames,file = [],[]
for items,meta,ia in tqdm(zip(sortedFileList,dataArr,sitelist),total = len(sitelist)):
    openFile = f"{raw_directory}/{str(items)}"
    # saveFileName = str(sitelist.index(str(ia)) + 1) + "_"  + str(items)[:-4] + ".tiff"
    saveFileName = str(items)[:-4] + ".tiff"
    saveFile = targetPath + saveFileName
    fileNames.append(saveFile)
    if saveFileName not in os.listdir(targetPath):
    #print(openFile)
        input_raster = gdal.Open(openFile)
        #print(saveFile)
        gdal.Warp(saveFile,
                  openFile,
                  dstSRS = 'epsg:4326',
                  width = str(meta.meta['width']),
                  height = str(meta.meta['height'])
                 )
        continue
    

In [None]:
clipped_path = f"../img_data/{batch}/clipped/{current_set}"
os.makedirs(clipped_path, exist_ok=True)

In [None]:
polygons

In [None]:
geoms = []
for i in range(0,len(datestamp)):
    geoms.append(polygons[i]['geometry'])

In [None]:
geoms

In [None]:
for names,outNames, g,date,ia in tqdm(zip(fileNames, processedfNames,geoms,datestamp,sitelist),total = len(fileNames)):
    try:
        with rio.open(names) as src:
                out_image, out_transform = mask(src, g, crop=True, filled=True)
                out_meta = src.meta.copy()

        out_meta.update({"driver": "GTiff",
                             "height": out_image.shape[1],
                             "width": out_image.shape[2],
                             "transform": out_transform})

        with rio.open(f"{clipped_path}/{outNames}", "w", **out_meta) as dest:
            dest.write(out_image)
        # os.remove(titlelist[k][i])
        # print(ia)
        src.close()
        dest.close()
    except Exception as e:
        print(e)
        continue