<a href="https://colab.research.google.com/github/emilymacris/DS-4002/blob/main/project_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
! git clone https://github.com/emilymacris/ds4002-project3

In [None]:
!pip install earthengine-api
!pip install geemap

In [None]:
import ee
import geemap

In [None]:
ee.Authenticate()
ee.Initialize(project='ee-bdf7bz')


In [None]:
aoi = ee.Geometry.Rectangle([-147.5, 60.7, -146.5, 61.3])  # Columbia Glacier area

# Use the new Collection 2 dataset
image = ee.ImageCollection('LANDSAT/LC08/C02/T1_L2') \
    .filterBounds(aoi) \
    .filterDate('2021-06-01', '2021-08-31') \
    .median() \
    .clip(aoi)

Map = geemap.Map(center=[60.9, -147.0], zoom=9)
Map.addLayer(image, {'bands': ['SR_B4', 'SR_B3', 'SR_B2'], 'min': 0, 'max': 30000}, 'True Color')
Map


In [None]:
# Compute NDSI (snow/ice index)
ndsi = image.normalizedDifference(['SR_B3', 'SR_B6']).rename('NDSI')

# Compute NDVI (vegetation index)
ndvi = image.normalizedDifference(['SR_B5', 'SR_B4']).rename('NDVI')

# Add them to your image
image = image.addBands([ndsi, ndvi])

In [None]:
stats = image.reduceRegion(
    reducer=ee.Reducer.mean().combine(
        ee.Reducer.minMax(), sharedInputs=True
    ),
    geometry=aoi,
    scale=30,
    maxPixels=1e9  # set this high enough
)

print(stats.getInfo())

In [None]:
training_points = ee.FeatureCollection('users/bdf7bz/glacier_training')


In [None]:
training_data = image.sampleRegions(
    collection=training_points,
    properties=['class'],  # label column
    scale=30
)

In [None]:
import pandas as pd
import json

# Convert EE FeatureCollection to GeoJSON dictionary
geojson = geemap.ee_to_geojson(training_data)

# Convert to pandas DataFrame
df = pd.json_normalize(geojson['features'])

# Preview
df.head()

In [None]:
# Clean column names (remove 'properties.' prefix)
df.columns = df.columns.str.replace('properties.', '', regex=False)

# Drop unnecessary columns
drop_cols = ['type', 'geometry', 'id']
df_clean = df.drop(columns=drop_cols)

# Preview cleaned data
df_clean.head()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Distribution of NDSI and NDVI by class
sns.boxplot(x='class', y='NDSI', data=df_clean)
plt.title('NDSI by Class')
plt.show()

sns.boxplot(x='class', y='NDVI', data=df_clean)
plt.title('NDVI by Class')
plt.show()

# Correlation heatmap
sns.heatmap(df_clean.corr(), annot=True, fmt=".2f", cmap='coolwarm')
plt.title("Feature Correlation Heatmap")
plt.show()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Plot histograms for NDSI and NDVI
features = ['NDSI', 'NDVI']
for feature in features:
    plt.figure(figsize=(7, 4))
    sns.histplot(data=df_clean, x=feature, hue='class', kde=True, palette='coolwarm', bins=30)
    plt.title(f'Distribution of {feature} by Class')
    plt.xlabel(feature)
    plt.ylabel('Count')
    plt.legend(title='Class', labels=['Non-Glacier (0)', 'Glacier (1)'])
    plt.tight_layout()
    plt.show()

In [None]:
# Only select relevant columns
plot_features = ['NDSI', 'NDVI', 'class']
sns.pairplot(df_clean[plot_features], hue='class', palette='coolwarm', diag_kind='kde')
plt.suptitle("Pairplot of NDSI/NDVI by Class", y=1.02)
plt.show()


In [None]:
# Melt the DataFrame for boxplotting
melted = df_clean.melt(id_vars='class', value_vars=['NDSI', 'NDVI'])

plt.figure(figsize=(8, 5))
sns.boxplot(x='variable', y='value', hue='class', data=melted, palette='Set2')
plt.title("Boxplot of Features by Class")
plt.xlabel("Feature")
plt.ylabel("Value")
plt.tight_layout()
plt.show()


In [None]:
plt.figure(figsize=(12, 10))
sns.heatmap(df_clean.corr(), annot=True, cmap='coolwarm', fmt=".2f")
plt.title("Correlation Matrix of All Features")
plt.tight_layout()
plt.show()
