In [None]:
##Import required Google Earth Engine python packages and check if they work in python environment
import ee
ee.Initialize()
import geetools
import geemap
import os
from geemap import cartoee
import matplotlib.pyplot as plt
import pandas as pd 

In [None]:
#import the map module that allows for attaching images to an interactive map
Map = geemap.Map()

In [None]:
#Import the river boundary from the Google Earth Engin Server
#Call in river in from a vector file saved into Google Earth Engine
TN_River = ee.FeatureCollection("projects/pjf927/assets/TN_River_Main_5000m_Divide_4")
#Some function require geometry values to clip features
TN_RiverGeom = TN_River.geometry() #Some function require geometry values to clip features
#Generate a square boundary around the river study area
RiverBounds = TN_RiverGeom.bounds()
#Add river boundary to the map
Map.addLayer(RiverBounds)

In [None]:
#Call in all landsat imagecollections that have Top of Atmosphere (TOA) pre-calcuated 
#Call in Landsat 4, Collection 2, Tier 1 TOA Reflectance 
LS4_TOA = (
    ee.ImageCollection("LANDSAT/LT04/C02/T1_TOA")
    .filterBounds(TN_River) #Filter only swath grids that cover the TN River Boundary
    #.filterDate("2013-2-11", "2022-12-08") #Filter Dates of Landsat acquisitions
    .filter(ee.Filter.eq('WRS_ROW', 36)) #Filter swath grids that completly cover the largets portion of the TN River Boundary
    .sort('system:time_start') #Sort collection by acquisition time
)

#Call in Landsat 5, Collection 2, Tier 1 TOA Reflectance 
LS5_TOA = (
    ee.ImageCollection("LANDSAT/LT05/C02/T1_TOA")
    .filterBounds(TN_River) #Filter only swath grids that cover the TN River Boundary
    #.filterDate("2013-2-11", "2022-12-08") #Filter Dates of Landsat acquisitions
    .filter(ee.Filter.eq('WRS_ROW', 36)) #Filter swath grids that completly cover the largets portion of the TN River Boundary
    .sort('system:time_start') #Sort collection by acquisition time
)

#Call in Landsat 7, Collection 2, Tier 1 TOA Reflectance 
LS7_TOA = (
    ee.ImageCollection("LANDSAT/LE07/C02/T1_TOA")
    .filterBounds(TN_River) #Filter only swath grids that cover the TN River Boundary
    #.filterDate("2013-2-11", "2022-12-08") #Filter Dates of Landsat acquisitions
    .filter(ee.Filter.eq('WRS_ROW', 36)) #Filter swath grids that completly cover the largets portion of the TN River Boundary
    .sort('system:time_start') #Sort collection by acquisition time
)

#Call in Landsat 8, Collection 2, Tier 1 TOA Reflectance 
LS8_TOA = (
    ee.ImageCollection("LANDSAT/LC08/C02/T1_TOA")
    .filterBounds(TN_River) #Filter only swath grids that cover the TN River Boundary
    #.filterDate("2013-2-11", "2022-12-08") #Filter Dates of Landsat acquisitions
    .filter(ee.Filter.eq('WRS_ROW', 36)) #Filter swath grids that completly cover the largets portion of the TN River Boundary
    .sort('system:time_start') #Sort collection by acquisition time
)

#Call in Landsat 9, Collection 2, Tier 1 TOA Reflectance 
LS9_TOA = (
    ee.ImageCollection("LANDSAT/LC09/C02/T1_TOA")
    .filterBounds(TN_River) #Filter only swath grids that cover the TN River Boundary
    #.filterDate("2021-10-31", "2022-12-08") #Filter Dates of Landsat acquisitions
    .filter(ee.Filter.eq('WRS_ROW', 36)) #Filter swath grids that completly cover the largets portion of the TN River Boundary
    .sort('system:time_start') #Sort collection by acquisition time
)

#Merge all image collections to a newly created image collection called All_TOA
All_TOA = LS4_TOA.merge(LS5_TOA.merge(LS7_TOA.merge(LS8_TOA.merge(LS9_TOA))))

#Get a count of all images in the Landsat Top of Atmosphere Collection
pre_count = All_TOA.size().getInfo()
print("Images: ", pre_count)

In [None]:
#Create fucntion that calcualtes the percent of clounds that cover the river geometry
def func_rdd(image):
  cloud = ee.Algorithms.Landsat.simpleCloudScore(image).select('cloud')
  cloudiness = cloud.reduceRegion(
    reducer = ee.Reducer.mean(),
    geometry = TN_RiverGeom,
    scale = 30,
  )
  return image.set(cloudiness)

In [None]:
#Apply each equation to the clipped pixels in each sensor specific image collection that are in the TN River boundary
LS4_Cloudy = LS4_TOA.map(func_rdd)
LS5_Cloudy = LS5_TOA.map(func_rdd)
LS7_Cloudy = LS7_TOA.map(func_rdd)
LS8_Cloudy = LS8_TOA.map(func_rdd)
LS9_Cloudy = LS9_TOA.map(func_rdd)

In [None]:
#Filter out images that are less than 1 percent of clounds in the TN River boundary for each image collection
LS4_Filt = LS4_Cloudy.filter(ee.Filter.lt('cloud', 1))

LS5_Filt = LS5_Cloudy.filter(ee.Filter.lt('cloud', 1))

LS7_Filt = LS7_Cloudy.filter(ee.Filter.lt('cloud', 1))

LS8_Filt = LS8_Cloudy.filter(ee.Filter.lt('cloud', 1))

LS9_Filt = LS9_Cloudy.filter(ee.Filter.lt('cloud', 1))

#Merge all image collections to a newly created image collection called All_Filt
All_Filt = LS4_Filt.merge(LS5_Filt.merge(LS7_Filt.merge(LS8_Filt.merge(LS9_Filt)))).sort('system:time_start')

In [None]:
#Get a count of all images in the Landsat Top of Atmosphere Collection that weren't filtered
print("Images Before Cloud Filter: ", pre_count)

#Get a count of all images in the Landsat Top of Atmosphere Collection that were filtered
filt_count = All_Filt.size().getInfo()
print("Images After Cloud Filter: ", filt_count)

In [None]:
#Create a function that creates a new dictionary in the image collection called 'Date' and converts the 'system:time_start' list to a "YYYY-MM-dd" format
All_Filt_Dates = All_Filt.map(
    lambda img: img.set({"DATE": ee.Date(img.get("system:time_start")).format("YYYY-MM-dd")})
)

#Create a list of dates and print them out
Dates = All_Filt_Dates.aggregate_array("DATE").getInfo()
print("Dates in Imagecollection: ", dates)

In [None]:
#Create a list of sensor acquisitions and print them out
Sensor = All_Filt.aggregate_array("SPACECRAFT_ID").getInfo()
print(Sensor)

In [None]:
#Create a list of swath row acquisitions and print them out
Swath = All_Filt.aggregate_array("WRS_ROW").getInfo()
print(Swath)

In [None]:
#Create a list of scloud percentages and print them out
CLD_Percent = All_Filt.aggregate_array("cloud").getInfo()
print(CLD_Percent)

In [None]:
#Save each list as a pandas dataframe
Acquisition_Data = pd.DataFrame(list(zip(Dates, Sensor, Swath, CLD_Percent)), 
                                columns=['Date','Sensor', 'Swath_Row', 'Cloud_Percent'])

print(Acquisition_Data)

In [None]:
#Save dataframe as a csv to a folder directory
pd.DataFrame(Acquisition_Data).to_csv(r"D:\School\Adv_Data_Analytics\Project\csv\Cloud_Score_Acquisition_Data.csv")