Landsat 7

In [1]:
import ee
ee.Initialize()
# ee.Authenticate()

ALTM_50_LAKES = ee.FeatureCollection('projects/ee-mazarderakhsh/assets/ALAP-ALTM-195-centroids') \
    .filter(ee.Filter.gte('Field1', 150)) \
    .filter(ee.Filter.lte('Field1', 199))  # pick only ALTM lakes

# Convert the lakes FeatureCollection to a List
lakesList = ALTM_50_LAKES.toList(ALTM_50_LAKES.size())

In [2]:
import pandas as pd
import ee

ee.Initialize()

# Define the Landsat 5 bands and their corresponding standard names
LC5_BANDS = ['ST_B6']
LC7_BANDS = ['ST_B6']
STD_NAMES = ['temp_satellite']


# all_lakes=ee.FeatureCollection('projects/ee-mazarderakhsh/assets/ALAP-ALTM-Lakes-195')

def maskL457sr(image):
    qaMask = image.select('QA_PIXEL').bitwiseAnd(int('11111', 2)).eq(0)
    saturationMask = image.select('QA_RADSAT').eq(0)
    waterMask = image.select('QA_PIXEL').eq(5440).Or(image.select('QA_PIXEL').eq(5504))
    opticalBands = image.select('SR_B.').multiply(0.0000275).add(-0.2)
    thermalBand = image.select('ST_B6').multiply(0.00341802).add(-124.15)
    return image.addBands(opticalBands, None, True).addBands(thermalBand, None, True).updateMask(qaMask).updateMask(saturationMask).updateMask(waterMask)



# Define a function to compute the mean reflectance values for the specified bands within the region of interest (lake)
def reflectance(img, lake):
    reflectance_values = img.reduceRegion(reducer=ee.Reducer.mean(), geometry=lake, scale=30).select(STD_NAMES)
    return img.set('DATE_SMP', img.date().format()).set('reflectance', reflectance_values)

# Initialize an empty list to store the dataframes for each lake
dfs = []



# Filter out rows with empty latitude or longitude values
# df_coord_filtered = df_coord[(df_coord['Lat'].notnull()) & (df_coord['Lon'].notnull())]

# Loop through each lake name and retrieve Landsat 5 imagery for that lake
for i in range(lakesList.size().getInfo()):
    lake = ee.Feature(lakesList.get(i))
    lat = ee.Number(lake.geometry().coordinates().get(1))
    lon = ee.Number(lake.geometry().coordinates().get(0))
    lake_point = ee.Geometry.Point([lon, lat]).buffer(90)
    lakeName = ee.String(lake.get('NAME')).getInfo()
    lakeID = ee.String(lake.get('Field1')).getInfo()

    lat_value = float(ee.Number(lake.geometry().coordinates().get(1)).getInfo())
    lon_value = float(ee.Number(lake.geometry().coordinates().get(0)).getInfo())

  
  

    # Retrieve Landsat 7 imagery for the specific lake
    l7 = ee.ImageCollection('LANDSAT/LE07/C02/T1_L2') \
        .filter(ee.Filter.calendarRange(1, 12, 'month')) \
        .filterBounds(lake_point) \
        .filter(ee.Filter.lt('CLOUD_COVER', 50)) \
        .map(maskL457sr) \
        .select(LC7_BANDS, STD_NAMES)

  


    # Map the reflectance function over the Landsat 5 ImageCollection for the specific lake
    map_reflectance = l7.map(lambda img: reflectance(img, lake_point))

    # Reduce the mapped image collection to get reflectance values for the specific lake
    list_reflectance = map_reflectance.reduceColumns(ee.Reducer.toList(2), ['DATE_SMP', 'reflectance']).values().get(0)

    # Convert the results to a pandas DataFrame
    df_reflectance = pd.DataFrame(list_reflectance.getInfo(), columns=['DATE_SMP', 'reflectance'])
    df_reflectance['DATE_SMP'] = pd.to_datetime(df_reflectance['DATE_SMP'])
    df_reflectance['DATE_SMP'] = df_reflectance['DATE_SMP'].dt.date
    df_reflectance['reflectance'] = df_reflectance['reflectance'].apply(lambda x: {k: v for k, v in x.items() if v is not None})

    # Unpack the 'reflectance' dictionary and create separate columns for each band
    df_reflectance = pd.concat([df_reflectance.drop('reflectance', axis=1),
                                df_reflectance['reflectance'].apply(pd.Series).astype('float64', errors='ignore')], axis=1)
    
    df_reflectance['SITE_ID'] = lakeID
    df_reflectance['SITE_NAME'] = lakeName
    df_reflectance['Lon'] = lon_value
    df_reflectance['Lat'] = lat_value

    # Add the DataFrame to the list
    dfs.append(df_reflectance)


# Concatenate all DataFrames into a single DataFrame
df_all_lakes_Landsat_7 = pd.concat(dfs, ignore_index=True)

# Sort the DataFrame by 'DATE_SMP' in ascending order
df_all_lakes_Landsat_7.sort_values(by='DATE_SMP', inplace=True)

# df_all_lakes.dropna(inplace=True)
df_all_lakes_Landsat_7

Unnamed: 0,DATE_SMP,temp_satellite,SITE_ID,SITE_NAME,Lon,Lat
0,1999-07-05,,198,Windfall Pond,-74.828821,43.805388
6215,1999-07-05,,187,Queer Lake,-74.799690,43.811038
12773,1999-07-05,,195,West Pond,-74.881100,43.810806
22519,1999-07-05,,171,Jockeybush Lake,-74.594290,43.303249
2877,1999-07-05,,189,Sagamore Lake,-74.619377,43.767687
...,...,...,...,...,...,...
16240,2023-10-03,17.412764,199,Woods Lake,-74.952392,43.870121
15571,2023-10-15,,172,Lake Colden,-73.979612,44.122702
10873,2023-10-15,,159,Avalanche Lake,-73.966783,44.132869
20028,2023-10-15,,183,Nate Pond,-74.090528,43.857497


In [3]:
# Remove duplicates from specified columns
df_all_lakes_Landsat_7 = df_all_lakes_Landsat_7.drop_duplicates(subset=['DATE_SMP', 'SITE_ID', 'SITE_NAME', 'Lon', 'Lat'])
df_all_lakes_Landsat_7.dropna(inplace=True)

# Now df_all_lakes_Landsat8 has duplicates removed from the specified columns
df_all_lakes_Landsat_7

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_all_lakes_Landsat_7.dropna(inplace=True)


Unnamed: 0,DATE_SMP,temp_satellite,SITE_ID,SITE_NAME,Lon,Lat
20879,1999-07-05,20.801359,196,Willis Lake,-74.242343,43.369172
5136,1999-07-05,18.924134,156,"Rondaxe, Lake",-74.901830,43.766425
16891,1999-07-05,15.981304,167,G Lake,-74.632893,43.413879
16465,1999-07-05,17.839811,170,Indian Lake,-74.755483,43.617250
20452,1999-07-05,19.362261,185,Otter Lake,-74.499173,43.188512
...,...,...,...,...,...,...
1106,2023-10-03,16.863695,181,Middle Pond,-74.378613,44.339499
13878,2023-10-03,15.528493,154,Copperas Pond,-74.376174,44.313918
17317,2023-10-03,17.338340,167,G Lake,-74.632893,43.413879
18356,2023-10-03,17.938355,182,Middle Settlement Lake,-75.097268,43.685064


In [4]:
# Define the file path for the Excel file
excel_file_path = 'Landsat_7_50Lakes_4232024_WaterMask.xlsx'

# Export the DataFrame to Excel
df_all_lakes_Landsat_7.to_excel(excel_file_path, index=False)

print("DataFrame successfully exported to Excel file:", excel_file_path)

DataFrame successfully exported to Excel file: Landsat_7_50Lakes_4232024_WaterMask.xlsx


In [5]:
import pandas as pd
from scipy import stats
import numpy as np


# Create a list to store the results
results = []
df_all_lakes_Landsat_7['DATE_SMP'] = pd.to_datetime(df_all_lakes_Landsat_7['DATE_SMP'])

# Loop over each lake in the DataFrame
for lake_id, lake_data in df_all_lakes_Landsat_7.groupby('SITE_ID'):
    pond_n = df_all_lakes_Landsat_7.loc[df_all_lakes_Landsat_7['SITE_ID'] == lake_id, 'SITE_NAME'].iloc[0]

    # Combine data for June, July, and August
    summer_data = lake_data[lake_data['DATE_SMP'].dt.month.isin([1,2,3,4,5,6, 7, 8,9,10,11,12])]
    
    # Remove NaN or blank values from x_values and corresponding y_values
    x_values = summer_data['DATE_SMP'].values.astype(np.int64) // (10 ** 9)  # Convert to seconds
    y_values = summer_data['temp_satellite'].values
    mask = ~np.isnan(x_values) & ~np.isnan(y_values)
    x_values = x_values[mask]
    y_values = y_values[mask]
    
    # Check if x and y contain more than one distinct value
    if len(np.unique(x_values)) > 1:
        slope, _, r_value, p_value, _ = stats.linregress(x_values, y_values)
        
        # Check if the trend is significant (p-value < 0.05)
        if p_value < 0.05:
            slope_per_decade = slope * 10 * 365 * 24 * 3600  # Convert to °C/decade
            result = {
                'Lake_ID': lake_id,
                'Lake_name': pond_n,
                'Month': 'Annual',
                'Slope_per_Decade': slope_per_decade,
                'P-value': p_value,
                'R-value': r_value,
                'Temp_satellite': y_values.mean()  # Calculate mean temperature for non-blank values
            }
            results.append(result)

# Create a DataFrame from the results
results_df = pd.DataFrame(results)

# Save the DataFrame to a CSV file
results_df.to_csv('significant_slopes_L7_50_Annual_4232024_WaterMask.csv', index=False)

print("Results saved to significant_slopes_L5_1000_Annual_4122024_1.csv")


Results saved to significant_slopes_L5_1000_Annual_4122024_1.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_all_lakes_Landsat_7['DATE_SMP'] = pd.to_datetime(df_all_lakes_Landsat_7['DATE_SMP'])


In [6]:
import pandas as pd
from scipy import stats
import numpy as np


# Create a list to store the results
results = []
df_all_lakes_Landsat_7['DATE_SMP'] = pd.to_datetime(df_all_lakes_Landsat_7['DATE_SMP'])

# Loop over each lake in the DataFrame
for lake_id, lake_data in df_all_lakes_Landsat_7.groupby('SITE_ID'):
    pond_n = df_all_lakes_Landsat_7.loc[df_all_lakes_Landsat_7['SITE_ID'] == lake_id, 'SITE_NAME'].iloc[0]

    # Combine data for June, July, and August
    summer_data = lake_data[lake_data['DATE_SMP'].dt.month.isin([6, 7, 8])]
    
    # Remove NaN or blank values from x_values and corresponding y_values
    x_values = summer_data['DATE_SMP'].values.astype(np.int64) // (10 ** 9)  # Convert to seconds
    y_values = summer_data['temp_satellite'].values
    mask = ~np.isnan(x_values) & ~np.isnan(y_values)
    x_values = x_values[mask]
    y_values = y_values[mask]
    
    # Check if x and y contain more than one distinct value
    if len(np.unique(x_values)) > 1:
        slope, _, r_value, p_value, _ = stats.linregress(x_values, y_values)
        
        # Check if the trend is significant (p-value < 0.05)
        if p_value < 0.05:
            slope_per_decade = slope * 10 * 365 * 24 * 3600  # Convert to °C/decade
            result = {
                'Lake_ID': lake_id,
                'Lake_name': pond_n,
                'Month': 'June-August',
                'Slope_per_Decade': slope_per_decade,
                'P-value': p_value,
                'R-value': r_value,
                'Temp_satellite': y_values.mean()  # Calculate mean temperature for non-blank values
            }
            results.append(result)

# Create a DataFrame from the results
results_df = pd.DataFrame(results)

# Save the DataFrame to a CSV file
results_df.to_csv('significant_slopes_L7_50_SUMMER_4232024_WaterMask.csv', index=False)

print("Results saved to significant_slopes_L5_1000_SUMMER_4122024_1.csv")


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_all_lakes_Landsat_7['DATE_SMP'] = pd.to_datetime(df_all_lakes_Landsat_7['DATE_SMP'])


Results saved to significant_slopes_L5_1000_SUMMER_4122024_1.csv


In [7]:
import pandas as pd
from scipy import stats
import numpy as np

# Create a list to store the results
results = []
df_all_lakes_Landsat_7['DATE_SMP'] = pd.to_datetime(df_all_lakes_Landsat_7['DATE_SMP'])

# Loop over each lake in the DataFrame
for lake_id, lake_data in df_all_lakes_Landsat_7.groupby('SITE_ID'):
    pond_n = df_all_lakes_Landsat_7.loc[df_all_lakes_Landsat_7['SITE_ID'] == lake_id, 'SITE_NAME'].iloc[0]

    # Combine data for June, July, and August
    summer_data = lake_data[lake_data['DATE_SMP'].dt.month.isin([5,6, 7, 8,9,10,11])]
    
    # Filter out rows with NaN values
    summer_data = summer_data.dropna(subset=['temp_satellite'])
    
    # Check if the data is not empty
    if not summer_data.empty:
        # Compute the linear regression
        x_values = summer_data['DATE_SMP'].values.astype(np.int64) // (10 ** 9)  # Convert to seconds
        y_values = summer_data['temp_satellite'].values
        
        # Check if x and y contain more than one value
        if len(x_values) > 1:
            slope, _, r_value, p_value, _ = stats.linregress(x_values, y_values)
            
            # Check if the trend is significant (p-value < 0.05)
            if p_value < 0.05:
                slope_per_decade = slope * 10 * 365 * 24 * 3600  # Convert to °C/decade
                result = {
                    'Lake_ID': lake_id,
                    'Lake_name': pond_n,
                    'Month': 'May-November',
                    'Slope_per_Decade': slope_per_decade,
                    'P-value': p_value,
                    'R-value': r_value,
                    'Temp_satellite': y_values.mean()  # Calculate mean temperature
                }
                results.append(result)

# Create a DataFrame from the results
results_df = pd.DataFrame(results)

# Save the DataFrame to a CSV file
results_df.to_csv('significant_slopes_L7_50_May_November_4232024_WaterMask.csv', index=False)

print("Results saved to significant_slopes_L5_1000_May_November_4122024_1.csv")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_all_lakes_Landsat_7['DATE_SMP'] = pd.to_datetime(df_all_lakes_Landsat_7['DATE_SMP'])


Results saved to significant_slopes_L5_1000_May_November_4122024_1.csv


In [8]:
import pandas as pd

# Assuming df_all_lakes_Landsat5 is your DataFrame containing Landsat 5 data

# Create an empty list to store the results
results = []

df_all_lakes_Landsat_7['DATE_SMP'] = pd.to_datetime(df_all_lakes_Landsat_7['DATE_SMP'])


# Loop over each lake in the DataFrame
for lake_id, lake_data in df_all_lakes_Landsat_7.groupby('SITE_ID'):
    pond_n = lake_data['SITE_NAME'].iloc[0]  # Get the lake name
    
    # Filter data for the specified years and summer months
    summer_data_2009_2011 = lake_data[
        (lake_data['DATE_SMP'].dt.year >= 2009) & 
        (lake_data['DATE_SMP'].dt.year <= 2011) & 
        (lake_data['DATE_SMP'].dt.month.isin([6, 7, 8]))
    ]
    
    # Calculate the average mean temperature for summer
    average_mean_temp = summer_data_2009_2011['temp_satellite'].mean()
    
    # Append the result to the results list
    results.append({
        'Lake_ID': lake_id,
        'Lake_name': pond_n,
        'Average_mean_temp': average_mean_temp
    })

# Create a DataFrame from the results
average_temps_df = pd.DataFrame(results)

# Save the results to a CSV file
average_temps_df.to_csv('average_mean_temps_landsat7_summer_2009_2011_50LAKES_WaterMask.csv', index=False)

print("Average mean temperatures saved to average_mean_temps_landsat7_summer_2009_2011.csv")


Average mean temperatures saved to average_mean_temps_landsat7_summer_2009_2011.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_all_lakes_Landsat_7['DATE_SMP'] = pd.to_datetime(df_all_lakes_Landsat_7['DATE_SMP'])


In [9]:
import pandas as pd
from scipy import stats
import numpy as np

# Create a list to store the results
results = []
df_all_lakes_Landsat_7['DATE_SMP'] = pd.to_datetime(df_all_lakes_Landsat_7['DATE_SMP'])

# Loop over each lake in the DataFrame
for lake_id, lake_data in df_all_lakes_Landsat_7.groupby('SITE_ID'):
    pond_n = df_all_lakes_Landsat_7.loc[df_all_lakes_Landsat_7['SITE_ID'] == lake_id, 'SITE_NAME'].iloc[0]

    # Loop over each month
    for month in range(1, 13):
        month_data = lake_data[lake_data['DATE_SMP'].dt.month == month]

        # Remove NaN or blank values from x_values and corresponding y_values
        x_values = month_data['DATE_SMP'].values.astype(np.int64) // (10 ** 9)  # Convert to seconds
        y_values = month_data['temp_satellite'].values
        mask = ~np.isnan(x_values) & ~np.isnan(y_values)
        x_values = x_values[mask]
        y_values = y_values[mask]

        # Check if x and y contain more than one distinct value
        if len(np.unique(x_values)) > 1:
            slope, _, r_value, p_value, _ = stats.linregress(x_values, y_values)

            # Check if the trend is significant (p-value < 0.05)
            if p_value < 0.05:
                slope_per_decade = slope * 10 * 365 * 24 * 3600  # Convert to °C/decade
                result = {
                    'Lake_ID': lake_id,
                    'Lake_name': pond_n,
                    'Month': month,
                    'Slope_per_Decade': slope_per_decade,
                    'P-value': p_value,
                    'R-value': r_value,
                    'Temp_satellite': y_values.mean()  # Calculate mean temperature for non-blank values
                }
                results.append(result)

# Create a DataFrame from the results
results_df = pd.DataFrame(results)

# Save the DataFrame to a CSV file
results_df.to_csv('significant_slopes_L7_50_Monthly_4232024_WaterMask.csv', index=False)

print("Results saved to significant_slopes_L5_1000_Monthly_4122024_1.csv")


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_all_lakes_Landsat_7['DATE_SMP'] = pd.to_datetime(df_all_lakes_Landsat_7['DATE_SMP'])


Results saved to significant_slopes_L5_1000_Monthly_4122024_1.csv


In [10]:
import pandas as pd
from scipy import stats
import numpy as np

# Create a list to store the results
results = []
df_all_lakes_Landsat_7['DATE_SMP'] = pd.to_datetime(df_all_lakes_Landsat_7['DATE_SMP'])

# Loop over each lake in the DataFrame
for lake_id, lake_data in df_all_lakes_Landsat_7.groupby('SITE_ID'):
    pond_n = df_all_lakes_Landsat_7.loc[df_all_lakes_Landsat_7['SITE_ID'] == lake_id, 'SITE_NAME'].iloc[0]

    # Loop over each month
    for month in range(1, 13):
        month_data = lake_data[lake_data['DATE_SMP'].dt.month == month]

        # Remove NaN or blank values from x_values and corresponding y_values
        x_values = month_data['DATE_SMP'].values.astype(np.int64) // (10 ** 9)  # Convert to seconds
        y_values = month_data['temp_satellite'].values
        mask = ~np.isnan(x_values) & ~np.isnan(y_values)
        x_values = x_values[mask]
        y_values = y_values[mask]

        # Check if x and y contain more than one distinct value
        if len(np.unique(x_values)) > 1:
            slope, _, r_value, p_value, _ = stats.linregress(x_values, y_values)

            slope_per_decade = slope * 10 * 365 * 24 * 3600  # Convert to °C/decade
            result = {
                'Lake_ID': lake_id,
                'Lake_name': pond_n,
                'Month': month,
                'Slope_per_Decade': slope_per_decade,
                'P-value': p_value,
                'R-value': r_value,
                'Temp_satellite': y_values.mean()  # Calculate mean temperature for non-blank values
            }
            results.append(result)

# Create a DataFrame from the results
results_df = pd.DataFrame(results)

# Save the DataFrame to a CSV file
results_df.to_csv('significant_slopes_L7_50_Monthly_4232024_RemovePValue_WaterMask.csv', index=False)

print("Results saved to significant_slopes_L7_50_Monthly_4232024.csv")


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_all_lakes_Landsat_7['DATE_SMP'] = pd.to_datetime(df_all_lakes_Landsat_7['DATE_SMP'])


Results saved to significant_slopes_L7_50_Monthly_4232024.csv
