# NDSI Ice/Water Classification

The classification is 1 for ice if NDSI > 0.5 else 0 for water.


In [1]:
# imports and directories
import os
import pandas as pd
from tqdm.notebook import tqdm

#### DIRECTORIES ###
hls_dir = r'USER_INPUT'
icp_dir = r'USER_INPUT'

## CLASSIFICATION

In [2]:
# Calculate the total number of rows across all files
total_rows = 0
ndsi_files = [f for f in os.listdir(hls_dir) if f.endswith(".csv")]
for f in ndsi_files:
    path = os.path.join(hls_dir, f)
    try:
        total_rows += sum(1 for _ in open(path)) - 1  # Calculate rows excluding header
    except Exception as e:
        print(f"Error calculating rows for file {f}: {e}")
# Initialize tqdm progress bar with the total row count
with tqdm(total=total_rows, desc='Classifying all pixels', unit='rows') as pbar:
    for f in ndsi_files:
        path = os.path.join(hls_dir, f)
        try:
            df = pd.read_csv(path)
            df['ice_class'] = None  # Initialize the new column
            for index, row in df.iterrows():
                df.at[index, 'ice_class'] = 1 if row['NDSI'] >= 0.5 and (row['NIR_S'] > 0.11 or row['NIR_L'] > 0.11) else 0 # 1 = ice, 0 = water
                pbar.update(1)  # Update progress bar for each processed row
            df.to_csv(path, index=False)  # Overwrite existing file with classification
        except Exception as e:
            print(f"Failed to process file {f}: {e}")

Classifying all pixels:   0%|          | 0/27289840 [00:00<?, ?rows/s]

## ICP (Ice coverage percentage) Calculation.
Note: this method was developed for an earlier version of this project. While it is not used directly in the final analysis, it is retained here for reference and potential future use.

In [3]:
# Define the directory to store the ICP results
def calculate_icp(df):
    # Group by lake_id and date
    grouped = df.groupby(['lake_id', 'date'])
    results = []
    for (lake_id, date), g in grouped:
        total_pixels = len(g)
        ice_pixels = g['ice_class'].sum()  # 1 for ice, 0 for water
        if total_pixels > 0:
            icp = (ice_pixels / total_pixels) * 100
        else:
            icp = 0.0
        # Classify ice cover
        if icp <= 25:
            ice_class = 'open'
        elif 25 < icp < 75:
            ice_class = 'partial'
        else:
            ice_class = 'full'

        results.append({
            'lake_id': lake_id,
            'date': date,
            'hls2_icp_%': icp,
            'ice_class': ice_class
        })
    return pd.DataFrame(results)
print('Beginning ICP classification process...')
# Process each classified CSV file
classified_files = [f for f in os.listdir(hls_dir)]
for f in tqdm(classified_files, desc='Calculating ICP...', unit='file'):
    classified_df = pd.read_csv(os.path.join(hls_dir, f))
    icp_df = calculate_icp(classified_df) # calc ICP
    # Save the ICP results to a new CSV file
    for lake_id in icp_df['lake_id'].unique():
        lake_df = icp_df[icp_df['lake_id'] == lake_id]
        # Save the ICP results to a new CSV file for each lake
        output_file = os.path.join(icp_dir, f'lake_{lake_id}.csv')
        lake_df.to_csv(output_file, index=False)
print("ICP classification complete.")


Beginning ICP classification process...


Calculating ICP...:   0%|          | 0/2872 [00:00<?, ?file/s]

ICP classification complete.
