# Downloading Data

## Import libraries

In [1]:
import ee
import pandas as pd
import altair as alt
import numpy as np
import folium
import matplotlib.pyplot as plt
import pprint
from sys import exit

## Connecting to Google Engine

In [2]:
# Trigger the authentication flow.
ee.Authenticate()

# Initialize the library.
ee.Initialize()

To authorize access needed by Earth Engine, open the following URL in a web browser and follow the instructions. If the web browser does not start automatically, please manually browse the URL below.

    https://code.earthengine.google.com/client-auth?scopes=https%3A//www.googleapis.com/auth/earthengine%20https%3A//www.googleapis.com/auth/devstorage.full_control&request_id=R4lYqvoV6CfMX4q82uypEH8K4XmVgoX6WDWHOS5ZYqM&tc=JXQV3BRhF9Z5CpRzO5DmhqHR17ZGIN8k3QtHeBPr18U&cc=c0apIqR1FoOZEFaxiMQVsmVFMie-tdlSs4qCdr2vsS0

The authorization workflow will generate a code, which you should paste in the box below.
Enter verification code: 4/1Adeu5BXlLT-ZqCJGsQSUvxFmta5UvZY9hDswieUTumHmrEnCklCHD7OnJlI

Successfully saved authorization token.


## Apply functions

In [3]:
def Get_Bands_For_Collection(_collection):

  if _collection == "LANDSAT/LT05/C02/T1_L2":
    return ['SR_B1', 'SR_B2', 'SR_B3', 'SR_B4', 'SR_B5', 'SR_B7']

  elif _collection == "LANDSAT/LC08/C02/T1_L2":
    return ['SR_B2', 'SR_B3', 'SR_B4', 'SR_B5', 'SR_B6', 'SR_B7']

  elif _collection == "LANDSAT/LT05/C02/T1":
    return ['B1', 'B2', 'B3', 'B4', 'B5', 'B7']

  else:
    exit('There is no such collection')

In [4]:
def Stack_The_Years(_images_properties_table):
  images_properties_table = _images_properties_table.sort_values(['year', 'cloud cover'])
  all_the_years = np.array(images_properties_table.get('year'))

  stack_years = []
  year = -1

  for image_year in all_the_years:
    if image_year != year:
      stack_years.append(image_year)
    year = image_year
  return stack_years

In [5]:
# get- ee.collection
# return- table (pd.DataFrame) of the images properties (year, month, day, week of the year and cloud cover)
# note- the function takes time, so it prints the amount of images in the collocation and on which image it is working

def Create_Images_Properties_Table(_images_collection):

  number_of_images = _images_collection.size().getInfo()
  images_list = _images_collection.toList(number_of_images)

  columns = ['year', 'month', 'day', 'week of the year', 'cloud cover']
  images_properties_table = pd.DataFrame(index=range(number_of_images), columns=columns)

  print('Number of images: ' + str(number_of_images))

  for i in range(number_of_images):

    print('Working on image number: ' + str(i + 1))

    image = ee.Image(images_list.get(i))
    date = ee.Date(image.get('system:time_start'))

    year = date.get('year').getInfo()
    month = date.get('month').getInfo()
    week = date.get('week').getInfo()
    day = date.get('day').getInfo()
    cloud_cover = image.get('CLOUD_COVER').getInfo()

    images_properties_table.at[i, 'year'] = year
    images_properties_table.at[i, 'month'] = month
    images_properties_table.at[i, 'day'] = day
    images_properties_table.at[i, 'week of the year'] = week
    images_properties_table.at[i, 'cloud cover'] = cloud_cover

  return images_properties_table

In [6]:
# Takes from the collection one image from each year with minimal cloud cover
# get- ee.collection
#      images properties table (pd.DataFrame) if not provided the function creates this using Create_Images_Properties_Table
# return- list of images from each year with minimal cloud cover (List)

def Get_One_Image_Per_Year_With_Minimum_Cloud_Cover(_images_collection, _images_properties_table = pd.DataFrame()):

  if _images_properties_table.empty == True:
    print('Create images properties table')
    _images_properties_table = Create_Images_Properties_Table(_images_collection)

  images_properties_table = _images_properties_table.sort_values(['year', 'cloud cover'])
  years = Stack_The_Years(images_properties_table)
  images_list = _images_collection.toList(len(images_properties_table))
  my_images_list = []

  for year in years:
    image_index = images_properties_table[images_properties_table['year'] == year].first_valid_index()
    my_images_list.append(ee.Image(images_list.get(image_index)))
  return my_images_list

### NOT WORKING

In [7]:
def Get_One_Image_Per_Year_With_Minimum_Cloud_Cover_And_Save_To_Draive(_images_collection,  _roi, _images_properties_table = pd.DataFrame(), _image_bands = ['B2', 'B3', 'B4', 'B5', 'B6', 'B7'], _maximum_pixels = 10**13):
  if _images_properties_table.empty == True:
    print('Create images properties table')
    _images_properties_table = Create_Images_Properties_Table(_images_collection)

  images_properties_table = _images_properties_table.sort_values(['year', 'cloud cover'])
  years = Stack_The_Years(images_properties_table)
  images_list = _images_collection.toList(len(images_properties_table))

  for i in range(len(years)):
    image_year = years[i]
    image_index = images_properties_table[images_properties_table['year'] == image_year].first_valid_index()

    for band in _image_bands:
      image = ee.Image(images_list.get(image_index)).select(band)
      name = str(image_year) + band
      folder_name = 'image of ' + str(image_year)

      task = ee.batch.Export.image.toDrive(**{
        'image': image,
        'description': name,
        'folder': folder_name,
        'scale': 30,
        'region': _roi,
        'maxPixels': _maximum_pixels
        })
      task.start()

In [8]:
# get- ee.collection (_)

# return- table (pd.DataFrame) of the images properties (year, month, day, week of the year and cloud cover)
# note- the function takes time, so it prints the amount of images in the collocation and on which image it is working

def Create_Images_Properties_Table_2(_images_collection, _properties):

  number_of_images = _images_collection.size().getInfo()
  images_list = _images_collection.toList(number_of_images)

  columns = ['year', 'month', 'day', 'week of the year', 'cloud cover']
  images_properties_table = pd.DataFrame(index=range(number_of_images), columns=columns)

  print('Number of images: ' + str(number_of_images))

  for i in range(number_of_images):

    print('Working on image number: ' + str(i + 1))

    image = ee.Image(images_list.get(i))
    date = ee.Date(image.get('system:time_start'))

    year = date.get('year').getInfo()
    month = date.get('month').getInfo()
    week = date.get('week').getInfo()
    day = date.get('day').getInfo()
    cloud_cover = image.get('CLOUD_COVER').getInfo()

    images_properties_table.at[i, 'year'] = year
    images_properties_table.at[i, 'month'] = month
    images_properties_table.at[i, 'day'] = day
    images_properties_table.at[i, 'week of the year'] = week
    images_properties_table.at[i, 'cloud cover'] = cloud_cover

  return images_properties_table

In [9]:
##########  NOT WORKING  ##########

# like the function above (Create_Images_Properties_Table) just gets a list of ee.image.Image

def Create_Images_Properties_Table_For_Images_List(_images_list):

  number_of_images = len(_images_list)
  columns = ['year', 'month', 'day', 'week of the year', 'cloud cover']
  images_properties_table = pd.DataFrame(index=range(number_of_images), columns=columns)

  print('Number of images: ' + str(number_of_images))

  for i in range(number_of_images):

    print('Working on image number: ' + str(i + 1))

    image = _images_list[i]
    date = ee.Date(image.get('system:time_start'))

    year = date.get('year').getInfo()
    month = date.get('month').getInfo()
    week = date.get('week').getInfo()
    day = date.get('day').getInfo()
    cloud_cover = image.get('CLOUD_COVER').getInfo()

    images_properties_table.at[i, 'year'] = year
    images_properties_table.at[i, 'month'] = month
    images_properties_table.at[i, 'day'] = day
    images_properties_table.at[i, 'week of the year'] = week
    images_properties_table.at[i, 'cloud cover'] = cloud_cover

  return images_properties_table

## Finding images

### Collections

In [10]:
# Filter by collection and bands
# explanations of the collocations on the website of Google Engine

# USGS Landsat 5 Level 2, Collection 2, Tier 1 (16.3.1984 - 5.5.2012)
# This dataset contains atmospherically corrected surface reflectance
landsat_5_sr_t1 = "LANDSAT/LT05/C02/T1_L2"

# USGS Landsat 8 Level 2, Collection 2, Tier 1 (18.3.2013 - 29.12.2022)
# This dataset contains atmospherically corrected surface reflectance
landsat_8_sr_t1 = "LANDSAT/LC08/C02/T1_L2"


# USGS Landsat 5 TM Collection 2 Tier 1 DN values, representing scaled, calibrated at-sensor radiance.
landsat_5 = "LANDSAT/LT05/C02/T1"

### Setting the region of interest (roi)

In [11]:
# Coordinates can be determined with Google Engine

duck = ee.Geometry.Polygon(
        [[[60.56563678074698, 42.482434693528965],
          [60.56563678074698, 42.454450370527695],
          [60.60477557469229, 42.454450370527695],
          [60.60477557469229, 42.482434693528965]]]);

north_center = ee.Geometry.Polygon(
        [[[60.69708606189233, 43.390349938544766],
          [60.69708606189233, 43.373195119121426],
          [60.71699878161889, 43.373195119121426],
          [60.71699878161889, 43.390349938544766]]]);

geometry = ee.Geometry.Polygon(
        [[[66.30478921553372, 42.40867483090145],
          [66.30478921553372, 42.35846304490728],
          [66.36727395674465, 42.35846304490728],
          [66.36727395674465, 42.40867483090145]]]);

round_southeast_to_geometry = ee.Geometry.Polygon(
        [[[66.54801535967952, 42.2970632891509],
          [66.54801535967952, 42.27636345005415],
          [66.5784852540887, 42.27636345005415],
          [66.5784852540887, 42.2970632891509]]])

north_of_geometry = ee.Geometry.Polygon(
        [[[66.53957253115146, 42.641280278484906],
          [66.53957253115146, 42.60603938609627],
          [66.57579308168857, 42.60603938609627],
          [66.57579308168857, 42.641280278484906]]])

west_and_south = ee.Geometry.Polygon(
        [[[66.13225268318429, 42.14590945043169],
          [66.13225268318429, 42.12636951803808],
          [66.15886019661203, 42.12636951803808],
          [66.15886019661203, 42.14590945043169]]])

big_field_nourth_west = ee.Geometry.Polygon(
        [[[65.03133877084339, 44.21873826092204],
          [65.03133877084339, 44.18840436579028],
          [65.07099254891956, 44.18840436579028],
          [65.07099254891956, 44.21873826092204]]])

big_field_east = ee.Geometry.Polygon(
        [[[65.77774695249072, 44.049297352785146],
          [65.77774695249072, 44.03041717163485],
          [65.8012645611333, 44.03041717163485],
          [65.8012645611333, 44.049297352785146]]])

north_east = ee.Geometry.Polygon(
        [[[60.48099572591193, 43.41688485297308],
          [60.48099572591193, 43.4046328168259],
          [60.494385313314275, 43.4046328168259],
          [60.494385313314275, 43.41688485297308]]])

east_to_duck  = ee.Geometry.Polygon(
        [[[60.83250808298165, 42.43267915783082],
          [60.83250808298165, 42.39719361664269],
          [60.86546706735665, 42.39719361664269],
          [60.86546706735665, 42.43267915783082]]])

west_west_to_duck = ee.Geometry.Polygon(
        [[[60.242235352464725, 42.543972307724246],
          [60.242235352464725, 42.497159760684326],
          [60.280687500902225, 42.497159760684326],
          [60.280687500902225, 42.543972307724246]]])

north_really_east = ee.Geometry.Polygon(
        [[[60.77966253028565, 43.38594628785249],
          [60.77966253028565, 43.371629106416755],
          [60.79438249335938, 43.371629106416755],
          [60.79438249335938, 43.38594628785249]]]);

long_field_north = ee.Geometry.Polygon(
        [[[63.05538765244788, 41.512191036042026],
          [63.05538765244788, 41.481204865570675],
          [63.082166827252564, 41.481204865570675],
          [63.082166827252564, 41.512191036042026]]])

long_field_center = ee.Geometry.Polygon(
        [[[62.74542635327075, 41.02800958660507],
          [62.74542635327075, 40.97878142209725],
          [62.78559511547778, 40.97878142209725],
          [62.78559511547778, 41.02800958660507]]])

long_field_south = ee.Geometry.Polygon(
        [[[62.64568972961815, 40.3656717400198],
          [62.64568972961815, 40.313334098450525],
          [62.6978747882119, 40.313334098450525],
          [62.6978747882119, 40.3656717400198]]])

long_field_small_round = ee.Geometry.Polygon(
        [[[63.05878328856112, 41.45076730199397],
          [63.05878328856112, 41.43172216695157],
          [63.08075594481112, 41.43172216695157],
          [63.08075594481112, 41.45076730199397]]])

big_field_south_center = ee.Geometry.Polygon(
        [[[65.2000750112745, 43.89262720416123],
          [65.2000750112745, 43.844609891224046],
          [65.2440203237745, 43.844609891224046],
          [65.2440203237745, 43.89262720416123]]])

big_field_west = ee.Geometry.Polygon(
        [[[64.87000945492088, 43.98541617736425],
          [64.87000945492088, 43.94847424036691],
          [64.90760329647361, 43.94847424036691],
          [64.90760329647361, 43.98541617736425]]])

west_to_big_field = ee.Geometry.Polygon(
        [[[64.30821019002406, 44.0988445969242],
          [64.30821019002406, 44.06049303321364],
          [64.35181217977015, 44.06049303321364],
          [64.35181217977015, 44.0988445969242]]])

sout_in_geometry_arae = ee.Geometry.Polygon(
        [[[66.59036802658952, 42.17657207071661],
          [66.59036802658952, 42.14386944829474],
          [66.63002180466569, 42.14386944829474],
          [66.63002180466569, 42.17657207071661]]])

In [12]:
roi = north_of_geometry
name_for_the_folder_to_save = 'north_of_geometry'

In [13]:
# Seting the collection
collection = landsat_8_sr_t1

# Seting the bands
image_bands = Get_Bands_For_Collection(collection)

# Filter by months
month_start = 6
month_end = 6

# Filter by maximum cloud cover
maximum_cloud_cover = 5   # percentage

## Print coordinates

In [14]:
coordinates = roi.coordinates().getInfo()
coordinates = np.array(coordinates[0])
longitude = (coordinates[:,0]).mean()
latitude = (coordinates[:,1]).mean()

print(name_for_the_folder_to_save)
print('longitude- ' + str(longitude))
print('latitude- ' + str(latitude))

north_of_geometry
longitude- 66.55406075136631
latitude- 42.62013574305173


###  Find with Google Engine

In [15]:
collection = ee.ImageCollection(collection)\
.filter(ee.Filter.calendarRange(month_start, month_end, 'month'))\
.filter(ee.Filter.bounds(roi))\
.filterMetadata('CLOUD_COVER', 'less_than', maximum_cloud_cover)

## Produces a list with one image (ee.image) per year with minimum cloud cover

In [16]:
images_properties_table = Create_Images_Properties_Table(collection)
images_properties_table = images_properties_table.sort_values(['year', 'cloud cover'])
images_properties_table

Number of images: 12
Working on image number: 1
Working on image number: 2
Working on image number: 3
Working on image number: 4
Working on image number: 5
Working on image number: 6
Working on image number: 7
Working on image number: 8
Working on image number: 9
Working on image number: 10
Working on image number: 11
Working on image number: 12


Unnamed: 0,year,month,day,week of the year,cloud cover
0,2013,6,20,25,0.21
2,2014,6,23,26,0.0
1,2014,6,7,23,2.79
3,2015,6,10,24,0.01
5,2016,6,28,26,0.1
4,2016,6,12,23,4.43
6,2017,6,15,24,0.32
7,2020,6,7,23,1.06
8,2020,6,23,26,3.32
9,2021,6,26,25,0.0


## Download to drive

In [17]:
# Saves the images when the names of the images is their year

# Set folder to save the images

idx = np.array(images_properties_table.index)
number_of_images = collection.size().getInfo()
images_list = collection.toList(number_of_images)

maximum_pixels = 10**13 # Can be changed but not recommended

for i in idx:
  year = str(images_properties_table['year'][i])
  for band in image_bands:
    image = ee.Image(images_list.get(int(i))).select(band)
    task = ee.batch.Export.image.toDrive(**{
            'image': image,
            'description': year + '_' + band,
            'folder': name_for_the_folder_to_save + '_' +  year + '_' + str(i),
            'scale': 30,
            'region': roi,
            'maxPixels': maximum_pixels
          })
    task.start()

## Download as geotif

In [None]:
# Saves the images when the names of the images is their year

# Set folder to save the images
folder_name = 'geometry4'

idx = np.array(images_properties_table.index)
number_of_images = collection.size().getInfo()
images_list = collection.toList(number_of_images)

maximum_pixels = 10**13 # Can be changed but not recommended

for i in idx:
  image = ee.Image(images_list.get(int(i))).select(image_bands)
  task = ee.batch.Export.image.toDrive(**{
          'image': image,
          'description': str(images_properties_table['year'][i]),
          'folder': folder_name,
          'scale': 30,
          'fileFormat': 'GeoTIFF',
          'region': roi.getInfo()['coordinates'],
          'maxPixels': maximum_pixels
          })
  task.start()