## Import Python Libraries

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
import ee
import geemap
Map= geemap.Map()

## Preparing Lattitude and Longitude of Sample Lakes

In [2]:
import pandas as pd

# Read the CSV file
csv_file_path = r"C:\Users\aisha\Downloads\182_lakes_updated_coordinates.csv"
lakes_182 = pd.read_csv(csv_file_path)

# Drop the 'Lat', 'Lon', and 'Lattitude' columns
lakes_182.drop(columns=['Lat', 'Lon', 'Lattitude', 'wkt_geom', 'W', 'L'], inplace=True)

# Rename columns
lakes_182.rename(columns={'PERMANENT_': 'PERMANENT_ID', 'Lattitude_': 'Lat', 'Longitude_': 'Lon', 'lake.name':'SITE_NAME'}, inplace=True)

# Print the resulting DataFrame
lakes_182

Unnamed: 0,PERMANENT_ID,SITE_NAME,Lat,Lon
0,47725041,Silver Lake,43.294298,-74.429060
1,131844984,Limekiln Lake,43.712164,-74.798791
2,53542293,Canada Lake,43.166322,-74.530215
3,120023153,Piseco Lake,43.420732,-74.535786
4,53541207,Hinckley Reservoir,43.336330,-75.075211
...,...,...,...,...
177,132436448,Clear Pond,44.040612,-74.763285
178,131841654,Shingle Shanty Pond,43.916438,-74.780530
179,131846637,Chub Pond,43.527324,-75.045971
180,53540721,The Floe,43.400910,-74.703971


## Landsat 7 Image Processing

In [3]:
ee.Initialize()

# Define the Landsat 7 bands and their corresponding standard names
LC7_BANDS = ['ST_B6']
STD_NAMES = ['temp_satellite']

# all_lakes=ee.FeatureCollection('projects/ee-mazarderakhsh/assets/ALAP-ALTM-Lakes-195')

def maskL457sr(image):
    qaMask = image.select('QA_PIXEL').bitwiseAnd(int('11111', 2)).eq(0)
    saturationMask = image.select('QA_RADSAT').eq(0)
    waterMask = image.select('QA_PIXEL').eq(5440).Or(image.select('QA_PIXEL').eq(5504))
    opticalBands = image.select('SR_B.').multiply(0.0000275).add(-0.2)
    thermalBand = image.select('ST_B6').multiply(0.00341802).add(-124.15)
    return image.addBands(opticalBands, None, True).addBands(thermalBand, None, True).updateMask(qaMask).updateMask(saturationMask).updateMask(waterMask)

def maskL457sr_SUMMER(image):
    qaMask = image.select('QA_PIXEL').bitwiseAnd(int('11111', 2)).eq(0)
    saturationMask = image.select('QA_RADSAT').eq(0)
    waterMask = image.select('QA_PIXEL').eq(5440).Or(image.select('QA_PIXEL').eq(5504))
    opticalBands = image.select('SR_B.').multiply(0.0000275).add(-0.2)
    thermalBand = image.select('ST_B6').multiply(0.00341802).add(-124.15)
    #39000 is equivalent to 10 degree celcius
    temp_Summer = image.select('ST_B6').gt(40000)
    return image.addBands(opticalBands, None, True).addBands(thermalBand, None, True).updateMask(qaMask).updateMask(saturationMask)
    
    #return image.addBands(opticalBands, None, True).addBands(thermalBand, None, True).updateMask(qaMask).updateMask(saturationMask).updateMask(waterMask).updateMask(temp_Summer)



# Define a function to compute the mean reflectance values for the specified bands within the region of interest (lake)
def reflectance(img, lake):
    reflectance_values = img.reduceRegion(reducer=ee.Reducer.mean(), geometry=lake, scale=30).select(STD_NAMES)
    return img.set('DATE_SMP', img.date().format()).set('reflectance', reflectance_values)

# Initialize an empty list to store the dataframes for each lake
dfs = []

# cslap_aeap['date'] = pd.to_datetime(cslap_aeap['date'])

lakes_182 = lakes_182[(lakes_182['Lat'].notnull()) & (lakes_182['Lon'].notnull())]

# Loop through each lake name and retrieve Landsat 5 imagery for that lake
for index, row in lakes_182.iterrows():
    # Extract lake information
    pondId = row['PERMANENT_ID']
    pondname = row['SITE_NAME']
    Lon = row['Lon']
    Lat = row['Lat']
    
  
    # if isinstance(date, str):
    #     date = pd.to_datetime(date)
 
    # # Create a 2-day window around the current date
    # date_window_start = date - pd.Timedelta(days=3)
    # date_window_end = date + pd.Timedelta(days=3)

    lake_point = ee.Geometry.Point(row['Lon'], row['Lat'])

    #Create a 100-meter buffered box around the lake point
    lake_polygon = lake_point.buffer(100)
 
    # Retrieve Landsat 7 imagery for the specific lake
    l7 = ee.ImageCollection('LANDSAT/LE07/C02/T1_L2') \
        .filter(ee.Filter.calendarRange(1, 12, 'month')) \
        .filterBounds(lake_polygon) \
        .filter(ee.Filter.lt('CLOUD_COVER', 15)) \
        .map(maskL457sr_SUMMER) \
        .select(LC7_BANDS, STD_NAMES)


      # .filter(ee.Filter.calendarRange(5, 12, 'month')) \
      #   .filter(ee.Filter.calendarRange(1984, 2011, 'year')) \
    
    # Map the reflectance function over the Landsat 5 ImageCollection for the specific lake
    map_reflectance = l7.map(lambda img: reflectance(img, lake_polygon))

    # Reduce the mapped image collection to get reflectance values for the specific lake
    list_reflectance = map_reflectance.reduceColumns(ee.Reducer.toList(2), ['DATE_SMP', 'reflectance']).values().get(0)

    # Convert the results to a pandas DataFrame
    df_reflectance = pd.DataFrame(list_reflectance.getInfo(), columns=['DATE_SMP', 'reflectance'])
    df_reflectance['DATE_SMP'] = pd.to_datetime(df_reflectance['DATE_SMP'])
    df_reflectance['DATE_SMP'] = df_reflectance['DATE_SMP'].dt.date
    df_reflectance['reflectance'] = df_reflectance['reflectance'].apply(lambda x: {k: v for k, v in x.items() if v is not None})

    # Unpack the 'reflectance' dictionary and create separate columns for each band
    df_reflectance = pd.concat([df_reflectance.drop('reflectance', axis=1),
                                df_reflectance['reflectance'].apply(pd.Series).astype('float64', errors='ignore')], axis=1)
    
    df_reflectance['PERMANENT_ID'] = pondId
    df_reflectance['SITE_NAME'] = pondname
    df_reflectance['Lon'] = Lon
    df_reflectance['Lat'] = Lat


    # Add the DataFrame to the list
    dfs.append(df_reflectance)


# Concatenate all DataFrames into a single DataFrame
df_all_lakes_l7 = pd.concat(dfs, ignore_index=True)

# Sort the DataFrame by 'DATE_SMP' in ascending order
df_all_lakes_l7.sort_values(by='DATE_SMP', inplace=True)

# df_all_lakes.dropna(inplace=True)
df_all_lakes_l7

Unnamed: 0,DATE_SMP,temp_satellite,PERMANENT_ID,SITE_NAME,Lon,Lat
5345,1999-07-12,,132434562,Eagle Crag Lake,-74.607299,44.176303
36484,1999-07-12,22.583435,115353781,Connery Pond,-73.934543,44.311625
15656,1999-07-12,,132436509,Little Tupper Lake,-74.625509,44.032402
30894,1999-07-12,,131841333,Clear Lake,-74.920201,43.988143
15978,1999-07-12,22.289709,132434404,Gull Pond,-74.528169,44.210139
...,...,...,...,...,...,...
32969,2023-10-03,17.378286,132437546,Shallow Lake,-74.739886,43.819320
7110,2023-10-03,17.382583,131844637,Seventh Lake,-74.732752,43.746527
15009,2023-10-03,17.540526,131842322,Shallow Pond,-75.031684,43.930207
37562,2023-10-03,15.829418,131846637,Chub Pond,-75.045971,43.527324


In [4]:
# Remove duplicates from specified columns
# First, remove any leading or trailing whitespace characters from column names
df_all_lakes_l7.columns = df_all_lakes_l7.columns.str.strip()

# Now drop duplicates
df_all_lakes_l7.dropna(inplace=True)
df_all_lakes_l7 = df_all_lakes_l7.drop_duplicates(subset=['DATE_SMP', 'PERMANENT_ID', 'SITE_NAME', 'Lon', 'Lat'])

df_all_lakes_l7

Unnamed: 0,DATE_SMP,temp_satellite,PERMANENT_ID,SITE_NAME,Lon,Lat
36484,1999-07-12,22.583435,115353781,Connery Pond,-73.934543,44.311625
15978,1999-07-12,22.289709,132434404,Gull Pond,-74.528169,44.210139
30697,1999-07-12,22.341759,129690817,Lake Kushaqua,-74.111981,44.520620
16387,1999-07-12,21.916373,132876250,Indian Lake,-74.134163,44.716591
1151,1999-07-12,21.971668,131843856,Big Moose Lake,-74.859253,43.825698
...,...,...,...,...,...,...
32969,2023-10-03,17.378286,132437546,Shallow Lake,-74.739886,43.819320
7110,2023-10-03,17.382583,131844637,Seventh Lake,-74.732752,43.746527
15009,2023-10-03,17.540526,131842322,Shallow Pond,-75.031684,43.930207
37562,2023-10-03,15.829418,131846637,Chub Pond,-75.045971,43.527324


### Calculating Landsat 7 Temperature Slope per Decade

In [5]:
import pandas as pd
from scipy import stats
import numpy as np

# Create a list to store the results
results = []
df_all_lakes_l7['DATE_SMP'] = pd.to_datetime(df_all_lakes_l7['DATE_SMP'])

# Loop over each lake in the DataFrame
for lake_id, lake_data in df_all_lakes_l7.groupby('PERMANENT_ID'):
    pond_n = df_all_lakes_l7.loc[df_all_lakes_l7['PERMANENT_ID'] == lake_id, 'SITE_NAME'].iloc[0]

    # Loop over each month
    for month in range(1, 13):
        month_data = lake_data[lake_data['DATE_SMP'].dt.month == month]

        # Remove NaN or blank values from x_values and corresponding y_values
        x_values = month_data['DATE_SMP'].values.astype(np.int64) // (10 ** 9)  # Convert to seconds
        y_values = month_data['temp_satellite'].values
        mask = ~np.isnan(x_values) & ~np.isnan(y_values)
        x_values = x_values[mask]
        y_values = y_values[mask]

        # Check if x and y contain more than one distinct value
        if len(np.unique(x_values)) > 1:
            slope, _, r_value, p_value, _ = stats.linregress(x_values, y_values)

            # Check if the trend is significant (p-value < 0.05)
            slope_per_decade = slope * 10 * 365 * 24 * 3600  # Convert to °C/decade
            result = {
                'Lake_ID': lake_id,
                'Lake_name': pond_n,
                'Month': month,
                'Slope_per_Decade': slope_per_decade,
                'P-value': p_value,
                'R-value': r_value,
                'Temp_satellite': y_values.mean()  # Calculate mean temperature for non-blank values
            }
            results.append(result)

# Create a DataFrame from the results
results_df_l7 = pd.DataFrame(results)

results_df_l7

Unnamed: 0,Lake_ID,Lake_name,Month,Slope_per_Decade,P-value,R-value,Temp_satellite
0,47719887,Siamese Ponds,1,6.499539,0.100605,0.548546,-18.862303
1,47719887,Siamese Ponds,3,-0.779584,0.489343,-0.510657,0.461453
2,47719887,Siamese Ponds,4,2.335942,0.185237,0.392017,5.347152
3,47719887,Siamese Ponds,5,2.436875,0.047872,0.486119,13.206454
4,47719887,Siamese Ponds,6,2.003920,0.123744,0.589900,19.902579
...,...,...,...,...,...,...,...
2125,167248139,Chaumont Pond,8,0.832137,0.573641,0.191042,23.015771
2126,167248139,Chaumont Pond,9,-0.097163,0.901260,-0.028835,18.899904
2127,167248139,Chaumont Pond,10,0.424280,0.800779,0.086314,11.255207
2128,167248139,Chaumont Pond,11,2.386663,0.505179,0.399531,4.267515


## Landsat 5 Image Processing

In [6]:
ee.Initialize()

# Define the Landsat 5 bands and their corresponding standard names
LC7_BANDS = ['ST_B6']
STD_NAMES = ['temp_satellite']

# all_lakes=ee.FeatureCollection('projects/ee-mazarderakhsh/assets/ALAP-ALTM-Lakes-195')

def maskL457sr(image):
    qaMask = image.select('QA_PIXEL').bitwiseAnd(int('11111', 2)).eq(0)
    saturationMask = image.select('QA_RADSAT').eq(0)
    waterMask = image.select('QA_PIXEL').eq(5440).Or(image.select('QA_PIXEL').eq(5504))
    opticalBands = image.select('SR_B.').multiply(0.0000275).add(-0.2)
    thermalBand = image.select('ST_B6').multiply(0.00341802).add(-124.15)
    return image.addBands(opticalBands, None, True).addBands(thermalBand, None, True).updateMask(qaMask).updateMask(saturationMask).updateMask(waterMask)

def maskL457sr_SUMMER(image):
    qaMask = image.select('QA_PIXEL').bitwiseAnd(int('11111', 2)).eq(0)
    saturationMask = image.select('QA_RADSAT').eq(0)
    waterMask = image.select('QA_PIXEL').eq(5440).Or(image.select('QA_PIXEL').eq(5504))
    opticalBands = image.select('SR_B.').multiply(0.0000275).add(-0.2)
    thermalBand = image.select('ST_B6').multiply(0.00341802).add(-124.15)
    #39000 is equivalent to 10 degree celcius
    temp_Summer = image.select('ST_B6').gt(40000)
    return image.addBands(opticalBands, None, True).addBands(thermalBand, None, True).updateMask(qaMask).updateMask(saturationMask)

    #return image.addBands(opticalBands, None, True).addBands(thermalBand, None, True).updateMask(qaMask).updateMask(saturationMask).updateMask(waterMask).updateMask(temp_Summer)


# Define a function to compute the mean reflectance values for the specified bands within the region of interest (lake)
def reflectance(img, lake):
    reflectance_values = img.reduceRegion(reducer=ee.Reducer.mean(), geometry=lake, scale=30).select(STD_NAMES)
    return img.set('DATE_SMP', img.date().format()).set('reflectance', reflectance_values)

# Initialize an empty list to store the dataframes for each lake
dfs = []

# cslap_aeap['date'] = pd.to_datetime(cslap_aeap['date'])

lakes_182 = lakes_182[(lakes_182['Lat'].notnull()) & (lakes_182['Lon'].notnull())]

# Loop through each lake name and retrieve Landsat 5 imagery for that lake
for index, row in lakes_182.iterrows():
    # Extract lake information
    pondId = row['PERMANENT_ID']
    pondname = row['SITE_NAME']
    Lon = row['Lon']
    Lat = row['Lat']
    
  
    # if isinstance(date, str):
    #     date = pd.to_datetime(date)
 
    # # Create a 2-day window around the current date
    # date_window_start = date - pd.Timedelta(days=3)
    # date_window_end = date + pd.Timedelta(days=3)

    lake_point = ee.Geometry.Point(row['Lon'], row['Lat'])

    #Create a 100-meter buffered box around the lake point
    lake_polygon = lake_point.buffer(100)
 
    # Retrieve Landsat 7 imagery for the specific lake
    l5 = ee.ImageCollection('LANDSAT/LT05/C02/T1_L2') \
        .filter(ee.Filter.calendarRange(1, 12, 'month')) \
        .filterBounds(lake_polygon) \
        .filter(ee.Filter.lt('CLOUD_COVER', 15)) \
        .map(maskL457sr_SUMMER) \
        .select(LC7_BANDS, STD_NAMES)


      # .filter(ee.Filter.calendarRange(5, 12, 'month')) \
      #   .filter(ee.Filter.calendarRange(1984, 2011, 'year')) \
    
    # Map the reflectance function over the Landsat 5 ImageCollection for the specific lake
    map_reflectance = l5.map(lambda img: reflectance(img, lake_polygon))

    # Reduce the mapped image collection to get reflectance values for the specific lake
    list_reflectance = map_reflectance.reduceColumns(ee.Reducer.toList(2), ['DATE_SMP', 'reflectance']).values().get(0)

    # Convert the results to a pandas DataFrame
    df_reflectance = pd.DataFrame(list_reflectance.getInfo(), columns=['DATE_SMP', 'reflectance'])
    df_reflectance['DATE_SMP'] = pd.to_datetime(df_reflectance['DATE_SMP'])
    df_reflectance['DATE_SMP'] = df_reflectance['DATE_SMP'].dt.date
    df_reflectance['reflectance'] = df_reflectance['reflectance'].apply(lambda x: {k: v for k, v in x.items() if v is not None})

    # Unpack the 'reflectance' dictionary and create separate columns for each band
    df_reflectance = pd.concat([df_reflectance.drop('reflectance', axis=1),
                                df_reflectance['reflectance'].apply(pd.Series).astype('float64', errors='ignore')], axis=1)
    
    df_reflectance['PERMANENT_ID'] = pondId
    df_reflectance['SITE_NAME'] = pondname
    df_reflectance['Lon'] = Lon
    df_reflectance['Lat'] = Lat


    # Add the DataFrame to the list
    dfs.append(df_reflectance)


# Concatenate all DataFrames into a single DataFrame
df_all_lakes_l5 = pd.concat(dfs, ignore_index=True)

# Sort the DataFrame by 'DATE_SMP' in ascending order
df_all_lakes_l5.sort_values(by='DATE_SMP', inplace=True)

# df_all_lakes.dropna(inplace=True)
df_all_lakes_l5

Unnamed: 0,DATE_SMP,temp_satellite,PERMANENT_ID,SITE_NAME,Lon,Lat
39556,1984-03-24,,131843989,Eighth Lake,-74.702732,43.779283
28972,1984-03-24,,131846798,Black Creek Lake,-74.900339,43.452793
13796,1984-03-24,,131845402,Big Otter Lake,-75.105657,43.731364
33290,1984-03-24,-9.143881,132437583,Lower Pond,-74.701499,43.808682
20696,1984-03-24,,47722327,Hamilton Lake,-74.386472,43.435571
...,...,...,...,...,...,...
24166,2011-10-09,14.419731,132433574,Amber Lake,-74.619575,44.398996
30255,2011-10-09,14.931016,131841253,Negro Lake,-74.875446,43.993328
28323,2011-10-09,15.354476,132435441,Handsome Pond,-74.454262,44.056690
35529,2011-10-09,14.511497,131845523,Beaver Lake,-74.742622,43.649207


In [7]:
df_all_lakes_l5.dropna(inplace=True)
df_all_lakes_l5

Unnamed: 0,DATE_SMP,temp_satellite,PERMANENT_ID,SITE_NAME,Lon,Lat
33290,1984-03-24,-9.143881,132437583,Lower Pond,-74.701499,43.808682
10973,1984-03-24,-16.504661,47725937,Woods Lake,-74.313819,43.253777
15450,1984-03-24,-16.889114,53540109,Spruce Lake,-74.611838,43.533795
19309,1984-03-24,-15.560122,89365829,Garnet Lake,-74.024109,43.527467
15683,1984-03-24,-18.183169,47727445,Jackson Summit Reservoir,-74.281605,43.144261
...,...,...,...,...,...,...
24166,2011-10-09,14.419731,132433574,Amber Lake,-74.619575,44.398996
30255,2011-10-09,14.931016,131841253,Negro Lake,-74.875446,43.993328
28323,2011-10-09,15.354476,132435441,Handsome Pond,-74.454262,44.056690
35529,2011-10-09,14.511497,131845523,Beaver Lake,-74.742622,43.649207


In [8]:
# Remove duplicates from specified columns
# First, remove any leading or trailing whitespace characters from column names
df_all_lakes_l5.columns = df_all_lakes_l5.columns.str.strip()

# Now drop duplicates
df_all_lakes_l5.dropna(inplace=True)
df_all_lakes_l5 = df_all_lakes_l5.drop_duplicates(subset=['DATE_SMP', 'PERMANENT_ID', 'SITE_NAME', 'Lon', 'Lat'])

df_all_lakes_l5

Unnamed: 0,DATE_SMP,temp_satellite,PERMANENT_ID,SITE_NAME,Lon,Lat
33290,1984-03-24,-9.143881,132437583,Lower Pond,-74.701499,43.808682
10973,1984-03-24,-16.504661,47725937,Woods Lake,-74.313819,43.253777
15450,1984-03-24,-16.889114,53540109,Spruce Lake,-74.611838,43.533795
19309,1984-03-24,-15.560122,89365829,Garnet Lake,-74.024109,43.527467
15683,1984-03-24,-18.183169,47727445,Jackson Summit Reservoir,-74.281605,43.144261
...,...,...,...,...,...,...
35296,2011-10-09,15.950951,131841333,Clear Lake,-74.920201,43.988143
24166,2011-10-09,14.419731,132433574,Amber Lake,-74.619575,44.398996
30255,2011-10-09,14.931016,131841253,Negro Lake,-74.875446,43.993328
28323,2011-10-09,15.354476,132435441,Handsome Pond,-74.454262,44.056690


### Calculating Landsat 5 Temperature Slope per Decade

In [9]:
import pandas as pd
from scipy import stats
import numpy as np

# Create a list to store the results
results = []
df_all_lakes_l5['DATE_SMP'] = pd.to_datetime(df_all_lakes_l5['DATE_SMP'])

# Loop over each lake in the DataFrame
for lake_id, lake_data in df_all_lakes_l5.groupby('PERMANENT_ID'):
    pond_n = df_all_lakes_l5.loc[df_all_lakes_l5['PERMANENT_ID'] == lake_id, 'SITE_NAME'].iloc[0]

    # Loop over each month
    for month in range(1, 13):
        month_data = lake_data[lake_data['DATE_SMP'].dt.month == month]

        # Remove NaN or blank values from x_values and corresponding y_values
        x_values = month_data['DATE_SMP'].values.astype(np.int64) // (10 ** 9)  # Convert to seconds
        y_values = month_data['temp_satellite'].values
        mask = ~np.isnan(x_values) & ~np.isnan(y_values)
        x_values = x_values[mask]
        y_values = y_values[mask]

        # Check if x and y contain more than one distinct value
        if len(np.unique(x_values)) > 1:
            slope, _, r_value, p_value, _ = stats.linregress(x_values, y_values)

            # Check if the trend is significant (p-value < 0.05)
            slope_per_decade = slope * 10 * 365 * 24 * 3600  # Convert to °C/decade
            result = {
                'Lake_ID': lake_id,
                'Lake_name': pond_n,
                'Month': month,
                'Slope_per_Decade': slope_per_decade,
                'P-value': p_value,
                'R-value': r_value,
                'Temp_satellite': y_values.mean()  # Calculate mean temperature for non-blank values
            }
            results.append(result)

# Create a DataFrame from the results
results_df_l5 = pd.DataFrame(results)

results_df_l5

Unnamed: 0,Lake_ID,Lake_name,Month,Slope_per_Decade,P-value,R-value,Temp_satellite
0,47719887,Siamese Ponds,1,2.173006,0.343327,0.358575,-14.555904
1,47719887,Siamese Ponds,2,-2.435896,0.398425,-0.347890,-13.762352
2,47719887,Siamese Ponds,3,3.741756,0.397510,0.602490,-3.186486
3,47719887,Siamese Ponds,4,1.775775,0.167836,0.447228,2.167968
4,47719887,Siamese Ponds,5,2.806756,0.010230,0.573637,14.108672
...,...,...,...,...,...,...,...
2176,167248139,Chaumont Pond,8,1.406257,0.104058,0.663669,21.194552
2177,167248139,Chaumont Pond,9,2.793671,0.002594,0.783074,16.547347
2178,167248139,Chaumont Pond,10,-0.239632,0.884136,-0.053116,9.270602
2179,167248139,Chaumont Pond,11,-14.774398,0.089885,-0.990049,-1.449592
