In [1]:
import os
import requests
from google.cloud import bigquery
from google.oauth2 import service_account
import pandas as pd
from datetime import datetime
import urllib

In [2]:
def query_sentinel(BASE_URL, key_json, project_id, start, end, tile, cloud=100.):
    """
    Queries the Google Cloud BigQuery database to retrieve Sentinel-2 imagery metadata based on specified criteria.
    
    Parameters:
    - BASE_URL (str): The base URL used to construct download URLs.
    - key_json (str): Path to the JSON key file for Google Cloud service account.
    - project_id (str): Google Cloud project ID.
    - start (datetime): Start date for filtering Sentinel-2 scenes.
    - end (datetime): End date for filtering Sentinel-2 scenes.
    - tile (str): MGRS tile identifier.
    - cloud (float, optional): Maximum allowed cloud cover percentage. Default is 100%.

    Returns:
    - good_scenes (list): A list of URLs for Sentinel-2 scenes with cloud cover below the specified threshold.
    """
    credentials = service_account.Credentials.from_service_account_file(key_json)
    client = bigquery.Client(credentials=credentials, project=project_id)
    query = client.query("""
                SELECT * FROM `bigquery-public-data.cloud_storage_geo_index.sentinel_2_index` 
                    WHERE mgrs_tile IN ("{t}") 
                    AND DATE(sensing_time) BETWEEN DATE("{s}") AND DATE("{e}")
                """.format(t=tile, s=start, e=end))
    results = query.result()
    df = results.to_dataframe()
    good_scenes = []
    for i, row in df.iterrows():
        print (row['product_id'], '; cloud cover:', row['cloud_cover'])
        if float(row['cloud_cover']) <= cloud:
            good_scenes.append(row['base_url'].replace('gs://', BASE_URL))
    return good_scenes

In [3]:
def download_file(url, dst_name):
    """
    Downloads a file from a given URL and saves it to the specified destination.

    Parameters:
    - url (str): The URL of the file to be downloaded.
    - dst_name (str): The local destination path where the file will be saved.

    Returns:
    None
    """
    try:
        data = requests.get(url, stream=True)
        with open(dst_name, 'wb') as out_file:
            for chunk in data.iter_content(chunk_size=100 * 100):
                out_file.write(chunk)
    except:
        print ('\t ... {f} FAILED!'.format(f=url.split('/')[-1]))
    return

In [4]:
def make_safe_dirs(scene, outpath):
    """
    Creates a directory structure for a Sentinel-2 scene and downloads the manifest file.

    Parameters:
    - scene (str): URL of the Sentinel-2 scene.
    - outpath (str): Local path where the scene directory will be created.

    Returns:
    list: A list of tuples containing download links and local paths for files in the scene.
    """
    scene_name = os.path.basename(scene)
    scene_path = os.path.join(outpath, scene_name)
    manifest = os.path.join(scene_path, 'manifest.safe')
    manifest_url = scene + '/manifest.safe'
    if os.path.exists(manifest):
        os.remove(manifest)
    download_file(manifest_url, manifest)
    with open(manifest, 'r') as f:
        manifest_lines = f.read().split()
    download_links = []
    load_this = False
    for line in manifest_lines:
        if(len(manifest_lines)>1600):
            if 'href' in line:
                online_path = line[7:line.find('><')]
                tile = scene_name.split('_')[-2]
                if online_path.startswith('/GRANULE/'):
                    if '_' + tile + '_' in online_path:
                        load_this = True
                else:
                    load_this = True
                if load_this:
                    local_path = os.path.join(scene_path, *online_path.split('/')[1:])
                    online_path = scene + online_path
                    download_links.append((online_path, local_path))
        else:
            if 'href' in line:
                online_path = line[7:line.find('><') - 2]
                tile = scene_name.split('_')[-2]
                if online_path.startswith('/GRANULE/'):
                    if '_' + tile + '_' in online_path:
                        load_this = True
                else:
                    load_this = True
                if load_this:
                    local_path = os.path.join(scene_path, *online_path.split('/')[1:])
                    online_path = scene + online_path
                    download_links.append((online_path, local_path))
        load_this = False
    for extra_dir in ('AUX_DATA', 'HTML','rep_info'):
        if not os.path.exists(os.path.join(scene_path, extra_dir)):
            os.makedirs(os.path.join(scene_path, extra_dir))
        if(extra_dir == 'rep_info'):
            url = scene +'/rep_info/S2_User_Product_Level-1C_Metadata.xsd'
            urllib.request.urlretrieve(url, os.path.join(scene_path, extra_dir)+'/S2_User_Product_Level-1C_Metadata.xsd')

    return download_links

In [5]:
def download_sentinel(scene, dst):
    """
    Downloads files associated with a Sentinel-2 scene to a specified destination.

    Parameters:
    - scene (str): URL of the Sentinel-2 scene.
    - dst (str): Local directory where scene files will be downloaded.

    Returns:
    None
    """
    scene_name = scene.split('/')[-1]
    scene_path = os.path.join(dst, scene_name)
    if not os.path.exists(scene_path):
        os.mkdir(scene_path)
    print ('Downloading scene {s} ...'.format(s=scene_name))
    download_links = sorted(make_safe_dirs(scene, dst))
    for l in download_links:
        if not os.path.exists(os.path.dirname(l[1])):
            os.makedirs(os.path.dirname(l[1]))
        if os.path.exists(l[1]):
            os.remove(l[1])
        if l[1].endswith('.jp2'):
            print ('\t ... *{b}'.format(b=l[1].split('_')[-1]))
        if download_file(l[0], l[1]) is False:
            print ('\t ... {f} failed to download! Download for this scene is cancelled here!'.format(f=l[0]))
            return

This jupyter notebook can be used in three ways presented bellow:
- The first one is with the user interpreting with the environment variables, changing the saving destination folder, the cloud percentage etc. 
- The second is reading a .csv file with all necessary information like tile, product tile, image_date etc. 

### 1st Way

In [6]:
# Download one product
BASE_URL = 'http://storage.googleapis.com/'
key_json = ".../path-to-key.json"
project_id = '-'.join(key_json.split('/',-1)[-1].split('.',-1)[0].split('-',-1)[:-1])
outdir = '.../path-to-output'
tile = '37QDD'
cloud = 20

start = datetime.strptime('2019/10/12',"%Y/%m/%d")
end = start

scene_list = query_sentinel(BASE_URL, key_json, project_id, start, end, tile, cloud)
for s in scene_list:
    download_sentinel(s, outdir)

S2A_MSIL1C_20191012T075851_N0208_R035_T37QDD_20191012T092728 ; cloud cover: 7.2512
Downloading scene S2A_MSIL1C_20191012T075851_N0208_R035_T37QDD_20191012T092728.SAFE ...
	 ... *B01.jp2
	 ... *B02.jp2
	 ... *B03.jp2
	 ... *B04.jp2
	 ... *B05.jp2
	 ... *B06.jp2
	 ... *B07.jp2
	 ... *B08.jp2
	 ... *B09.jp2
	 ... *B10.jp2
	 ... *B11.jp2
	 ... *B12.jp2
	 ... *B8A.jp2
	 ... *TCI.jp2
	 ... *PVI.jp2


### 2nd way

In [None]:
#Download multiple products
csv_path = ".../path-to-csv.csv"
df = pd.read_csv(csv_path, index_col=0, header=0,low_memory=False)
df = df.sort_values(by=['Lat'])
df.index = range(df.shape[0])
df = df.iloc[34:,:]
df = df[df.Product_title!='0']
df.index = range(df.shape[0])
for k in range(df.shape[0]):
    tile = df.tile.iloc[k]
    cloud = 20
    strip = df.Image_date.iloc[k].split("/",2)
    start = datetime.strptime(strip[2]+str('/')+strip[0]+str('/')+strip[1],"%Y/%m/%d")
    end = datetime.strptime(strip[2]+str('/')+strip[0]+str('/')+strip[1],"%Y/%m/%d")
    scene_list = query_sentinel(BASE_URL, key_json, project_id, start, end, tile, cloud)
    for s in scene_list:
        download_sentinel(s, outdir)