## Downloading Satellite Imagery Procedurally from Metadata 

This notebook will utilise the metadata generated from the other one - to procedurally download, warp and store images of airports based on the Aviation Fanatic scraped dataset. 

In [None]:
import json
from tqdm import tqdm
import requests
import os
import rasterio as rio
from rasterio.mask import mask
from osgeo import gdal
import pickle
import numpy as np

### Register for an account and replace XXX with your credentials

https://scihub.copernicus.eu/dhus/#/home

In [None]:
batch = "prelim"

In [None]:
with open("ForestCoords.json", "r") as infile:
    airportDownloadDriver = json.load(infile)

In [None]:
fNames = []
qUrls = []
datestamp = []
processedfNames = []

for codes in tqdm(sitelist, total = len(sitelist)):
    # qUrls.append(airportDownloadDriver[codes]["product_url"])
#     testing with google URL
    qUrls.append(airportDownloadDriver[codes]["goog_url"])
    fNames.append(airportDownloadDriver[codes]["product_filename"])
    processedfNames.append(airportDownloadDriver[codes]["processed_filename"])
    datestamp.append(airportDownloadDriver[codes]["tile_date"][:8])

In [None]:
raw_directory = f"../img_data/{batch}/raw/"
os.makedirs(raw_directory, exist_ok=True)

In [None]:
# os.stat(f"{raw_directory}/MJF_T32WPU_20210828T104619_TCI_10m.jp2").st_size 

In [None]:
def getImages(fNames,urls,iata):
    for names,urls,ia in tqdm(zip(fNames,urls,iata),total = len(iata), leave = False):
        try:
#             Chceking if existing file is valid
            productName = f"{ia}_{names}"
            if productName in os.listdir(raw_directory):
                productSize = int(str(os.stat(f"{raw_directory}/{productName}").st_size))
                if productSize > 1000:
                    # print("File already exists! skipping")
                    continue
            # else:
            # print("File doesn't exist! Downloading")
            
            # r = requests.get(urls,auth = (user,password))
            
#             Without authentication for google storage
            r = requests.get(urls)

            # if r.status_code ==200
            with open(f"{raw_directory}/{str(ia)}_{str(names)}",'wb') as f: 
                f.write(r.content) 
            f.close()
            
        except Exception as e:
            print(ia,e)
            # print("Error Downloading File! Storing Metadata")
            with open(f"../datafiles/missing/{str(ia)}.json",'wb') as missingTile:
                json.dump(airportDownloadDriver[ia], missingTile)

In [None]:
getImages(fNames,qUrls,sitelist)

In [None]:
brokenList = []
for file in os.listdir(raw_directory):
    if os.stat(f"{raw_directory}/{file}").st_size < 2048:
        brokenList.append(airportDownloadDriver[file[:3]]["product_url"])

In [None]:
len(brokenList)

## Warping the Products

In [None]:
sortedFileList = []
for ia in tqdm(sitelist):
    for files in os.listdir(raw_directory):
        if files.endswith(".jp2") and files[:3] == ia[:3]:
            sortedFileList.append(files)
# sortedFileList

In [None]:
dataArr = []
for items in tqdm(sortedFileList, total=len(sitelist)):
    data = rio.open(f"{raw_directory}/{str(items)}")
    #print(items)
    #print(data.meta['crs'])
    dataArr.append(data)
    data.close()

In [None]:
dataArr

In [None]:
dataArr[0].meta

In [None]:
targetPath = f"../img_data/{batch}/warped/"
os.makedirs(targetPath, exist_ok=True)

os.listdir(targetPath)

In [None]:
fileNames,file = [],[]
for items,meta,ia in tqdm(zip(sortedFileList,dataArr,sitelist),total = len(sitelist)):
    openFile = f"{raw_directory}/{str(items)}"
    # saveFileName = str(sitelist.index(str(ia)) + 1) + "_"  + str(items)[:-4] + ".tiff"
    saveFileName = str(items)[:-4] + ".tiff"
    saveFile = targetPath + saveFileName
    fileNames.append(saveFile)
    if saveFileName not in os.listdir(targetPath):
    #print(openFile)
        input_raster = gdal.Open(openFile)
        #print(saveFile)
        gdal.Warp(saveFile,
                  openFile,
                  dstSRS = 'epsg:4326',
                  width = str(meta.meta['width']),
                  height = str(meta.meta['height'])
                 )
        continue
    

In [None]:
fileNames

## Loading Polygons generated from Shapely

In [None]:
current_set = "10"

In [None]:
polygons = []
for codes in tqdm(sitelist[0:10], total = len(sitelist[0:10])):
    # Load polygon from disc
    with open("copingmarkers.shp", "rb") as poly_file:
        polygons.append(pickle.load(poly_file))
    # f = open("copingmarkers.shp", "rb")
    # content = f.read()
    # polygons.append(content)
    # f.close()

In [None]:
polygons[0]

In [None]:
geoms = []
for i in range(0,len(datestamp)):
    geoms.append(polygons[i]['geometry'])

In [None]:
clipped_path = f"../img_data/{batch}/clipped/{current_set}"
os.makedirs(clipped_path, exist_ok=True)

In [None]:
len(datestamp)

In [None]:
for names,outNames, g,date,ia in tqdm(zip(fileNames, processedfNames,geoms,testdatestamp,sitelist),total = len(fileNames)):
    try:
        with rio.open(names) as src:
                out_image, out_transform = mask(src, g, crop=True, filled=True)
                out_meta = src.meta.copy()

        out_meta.update({"driver": "GTiff",
                             "height": out_image.shape[1],
                             "width": out_image.shape[2],
                             "transform": out_transform})

        with rio.open(f"{clipped_path}/{outNames}", "w", **out_meta) as dest:
            dest.write(out_image)
        # os.remove(titlelist[k][i])
        # print(ia)
        src.close()
        dest.close()
    except Exception as e:
        print(e)
        continue