In [None]:
"""
Along-Track Analyses Script for Operation IceBridge Airborne Topographic Mapper L2 Icessn Elevation, Slope, and Roughness, v.2 Data

This script examines the spatial and temporal trends of surface roughness in the along-track roughness data. 
The ablation and accumulation zones may be analyzed separately, here assuming an equilibrium line altitude of 1500 m. 
This script produces changes in roughness in 100-meter elevation bands over time, along with the changes in
their distributions. 
"""

In [None]:
# Import necessary libraries 

import os
import numpy as np
import pandas as pd
from scipy.stats import skew, linregress
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
first_year = 2009 # Define year range 
last_year = 2019 
years = np.arange(first_year, last_year+1)

In [1]:
def calculate_rms(year, low=100, high=3200, range_step=100):
    """
    Calculate the average root-mean-square fit for elevation bands within a specified range. 

    Parameters: 
    - year (int): Year of the dataset to be processed
    - low (int): Lower bound of the elevation range 
    - high (int): Upper bound of the elevation range 
    - range_step (int): The step size of the elevation bands

    Returns: 
    - rms_data (list): A list of lists. Each inner list contains: [year, upper bound of elevation band, average RMS fit in centimeters]
    """
    print(f'Processing {year}')
    
    dataset = pd.read_csv(f'./yearly/{year}Filtered.csv') # Load the filtered dataset for the year 
    dataset = dataset[ # Filter the dataest to include only points within the specificed elevation range 
        (dataset['WGS84_Ellipsoid_Height(m)'] >= low) & 
         (dataset['WGS84_Ellipsoid_Height(m)'] <= high)
    ] 

    rms_data = [] # Initialize a list to store the RMS data for each band 

    for start in range(low, high, range_step): # Iterate through each elevation band with the step size 
        end = start + range_step # Calculate the upper bound of the current elevation band 

        data = dataset[ # Filter the dataset for the current elevation band 
            (dataset['WGS84_Ellipsoid_Height(m)'] >= start) &
            (dataset['WGS84_Ellipsoid_Height(m)'] < end)
        ]

        avg_rms = data['RMS_Fit(cm)'].mean() if len(data) > 0 else float('nan') # Calculate the average RMS fit for the current elevation band 
        rms_data.append([year, f"{end}", avg_rms]) # Append the result for the current band to the RMS data list 
        
    return rms_data 

In [None]:
def calculate_skew(year, low=100, high=3200, range_step=100):
    """
    Calculate the skewness of the RMS fit distribution for elevation bands. 

    Parameters: 
    - year (int): Year of the dataset to be processed
    - low (int): Lower bound of the elevation range 
    - high (int): Upper bound of the elevation range 
    - range_step (int): The step size of the elevation bands 

    Returns: 
    - skew_data (list): A list of lists, where each inner list contains: [year, upper bound, skewness] 
    """
    print(f'Calculating skew for {year}')

    dataset = pd.read_csv(f'./yearly/{year}Filtered.csv') # Load filtered data for specified year 
    dataset = dataset[ # Filter the dataset to include only points within the specified elevation range
        (dataset['WGS84_Ellipsoid_Height(m)'] >= low) &
        (dataset['WGS84_Ellipsoid_Height(m)'] <= high)
    ] 

    skew_data = [] # Initialize list to store skewness for each elevation band

    for start in range(low, high, range_step): # Iterate through elevation bands 
        end = start + range_step # Calculate the upper bound of the current band 
        data = dataset[ # Filter the dataset for the current elevation band
            (dataset['WGS84_Ellipsoid_Height(m)'] >= start) &
            (dataset['WGS84_Ellipsoid_Height(m)'] < end)
        ]

        if len(data) > 0:
            hist, _ = np.histogram( # Create histogram of RMS fit values within current band
                data['RMS_Fit(cm)'], 
                bins=np.linspace(0, 1000, 100)
            ) 

            hist_norm = hist / np.sum(hist) # Normalize histogram 
            skew_in_range = skew(hist_norm) # Calculate skewness of normalized histogram 
            skew_data.append([year, end, skew_in_range]) # Append skewness

        else:
            skew_data.append([year, end, float('nan')]) # Append NaN if there is no data for the band 
            
    return skew_data

In [None]:
# Calculate the RMS values for the elevation bands 
RMS = [item for year in years for item in calculate_rms(year)]
df_rms = pd.DataFrame(RMS, columns=['year', 'elevation', 'rms'])
df_rms['elevation'] = pd.to_numeric(df_rms['elevation'])

# Calculate the skewness for the elevation bands
RMS_skew = [item for year in years for item in calculate_skew(year)]
df_skew = pd.DataFrame(RMS_skew, columns=['year', 'elevation', 'skew'])
df_skew['elevation'] = pd.to_numeric(df_skew['elevation'])

In [None]:
# Visualize average RMS above/below ELA of 1500 m and all elevations over time. 

# Calculate yearly averages for the three groups 
average_rms_below_1500 = df_rms[df_rms['elevation'] < 1500].groupby('year')['rms'].mean().reset_index()
average_rms_above_1500 = df_rms[df_rms['elevation'] >= 1500].groupby('year')['rms'].mean().reset_index()
total_rms = df_rms.groupby('year')['rms'].mean().reset_index()

plt.figure(figsize=(10, 6))

# Scatter plots + regressions for RMS values in the 3 groups
plt.scatter(
    average_rms_below_1500['year'],
    average_rms_below_1500['rms'],
    color='red', 
    label='Ablation Zone'
)
slope_sub, intercept_sub, _, p_sub, _ = linregress(
    average_rms_below_1500['year'], 
    average_rms_below_1500['rms']
)
plt.plot(
    average_rms_below_1500['year'], 
    intercept_sub + slope_sub * average_rms_below_1500['year'], 
    color='red', 
    linestyle='--'
)

plt.scatter(
    average_rms_above_1500['year'], 
    average_rms_above_1500['rms'],
    color='blue', 
    label='Accumulation Zone'
)
slope_above, intercept_above, _, p_above, _ = linregress(
    average_rms_above_1500['year'],
    average_rms_above_1500['rms']
)
plt.plot(
    average_rms_above_1500['year'], 
    intercept_above + slope_above *average_rms_above_1500['year'], 
    color='blue', 
    linestyle='--'
)

plt.scatter(
    total_rms['year'], 
    total_rms['rms'], 
    color='green',
    label='All Elevations'
    )
slope, intercept, _, p, _ = linregress(
    total_rms['year'],
      total_rms['rms']
    )
plt.plot(
    total_rms['year'],
    intercept + slope * total_rms['year'],
    color='green',
    linestyle='--'
)

# Add text annotations for the slopes and p-values
plt.text(0.05, 0.79, f'\nslope={slope_sub:.2f}, p={p_sub:.2f}',transform=plt.gca().transAxes, color='red', verticalalignment='top')
plt.text(0.05, 0.48, f'\nslope={slope:.2f}, p={p:.2f}',transform=plt.gca().transAxes, color='green', verticalalignment='top')
plt.text(0.05, 0.22, f'\nslope={slope_above:.2f}, p={p_above:.2f}',transform=plt.gca().transAxes, color='blue', verticalalignment='top')

# Formatting, labels, etc...
plt.ylim(0,120)
plt.xlabel('Year', fontsize=14)
plt.ylabel('Average Surface Roughness [cm]', fontsize=14)
plt.title('Average Surface Roughness Over Time', fontsize=16)
plt.legend(loc='upper right', fontsize=11)
plt.grid(True)
plt.show()

In [None]:
# Visualize rate of change in roughness for 100 meter bands

elevations = sorted(df_rms['elevation'].unique()) # Get sorted list of unique elevatiosn 
elevation_vals, slope_vals = [], [] # Initialize lists to store elevations and rates of change 

plt.figure(figsize=(10, 6))

for elev in elevations: # Loop through each elevation to calculate rate of change 
    data_in = df_rms[df_rms['elevation'] == elev] # Filter the dataset for the current elevation band 
    slope, *_ = linregress(data_in['year'], data_in['rms']) # Perform linear regression to calcualte slope 
    elevation_vals.append(elev) # Store elevation and slope
    slope_vals.append(slope)

# Plot rates of change against elevation
plt.plot(elevation_vals, slope_vals, linestyle='--', marker='o', color='b')
plt.xlabel('Elevation [m]', fontsize=14)
plt.ylabel('Rate of Change in Roughness [cm/yr]', fontsize=14)
plt.title('Rate of Change in Surface Roughness with Elevation', fontsize=16)
plt.grid(True)
plt.show()

In [None]:
# Visualize yearly average roughness vs. elevation for 100-meter bands

year_colors = { # Dictionary for color map 
    2009:'red', 2010: 'orange', 2011: 'yellow', 2012: 'lightgreen', 2013:
    'green', 2014: 'lightblue', 2015: 'blue', 2016: 'indigo', 2017: 'violet',
    2018: 'gray', 2019: 'black'
}

plt.figure(figsize=(10, 6))

for year, group in df_rms.groupby('year'): # Iterate over dataset, grouped by year 
    plt.plot( # Plot roughness vs elevation for current year 
        group['elevation'], group['rms'], label=str(year),
        color=year_colors.get(year, 'black'), linestyle='--'
    )
                      
# Formatting, labels, etc... 
plt.xlabel('Elevation [m]', fontsize=16)
plt.ylabel('Average Roughness [cm]', fontsize=16)
plt.title('Yearly Roughness vs. Elevation', fontsize=16)
plt.legend(fontsize=13)
plt.grid(True)
plt.show()

In [None]:
# Calculate average skewness for 100-meter bands, with separate analysis for elevations below 1500m, above 1500m, and overall

RMS_skew = [] # Lists to store skewness values for the 3 groups 
RMS_skew_sub1500 = []
RMS_skew_above1500 = []

for year in years: # Iterate over the list of years 
    dataset = pd.read_csv(f'./yearly/{year}Filtered.csv') # Load the filtered dataset for the current year
    dataset = dataset[(dataset['WGS84_Ellipsoid_Height(m)'] >= 100) &     # Filter the dataset to include only values with elevations between 100m and 3200m 
                      (dataset['WGS84_Ellipsoid_Height(m)'] <= 3200)]
    
    # Calculate the normalized histogram and skewness for the entire dataset
    roughness_bins = np.linspace(0, 1000, 100)  # Define bins for roughness values
    hist, _ = np.histogram(dataset['RMS_Fit(cm)'], bins=roughness_bins)  # Calculate the histogram
    hist_norm = hist / np.sum(hist)  # Normalize the histogram
    skew_total = skew(hist_norm)  # Calculate skewness of the normalized histogram
    RMS_skew.append([year, skew_total])  # Append skewness for the total dataset

    dataset_sub1500 = dataset[(dataset['WGS84_Ellipsoid_Height(m)'] >= 100) &     # Filter data for elevations below 1500m and calculate skewness
                              (dataset['WGS84_Ellipsoid_Height(m)'] <= 1500)]
    hist_sub1500, _ = np.histogram(dataset_sub1500['RMS_Fit(cm)'], bins=roughness_bins)
    hist_norm_sub1500 = hist_sub1500 / np.sum(hist_sub1500)  # Normalize the histogram for sub1500m
    skew_sub1500 = skew(hist_norm_sub1500)  # Calculate skewness for sub1500m data
    RMS_skew_sub1500.append([year, skew_sub1500])  # Append skewness for elevations below 1500m

    dataset_above1500 = dataset[(dataset['WGS84_Ellipsoid_Height(m)'] > 1500) &     # Filter data for elevations above 1500m and calculate skewness
                                (dataset['WGS84_Ellipsoid_Height(m)'] <= 3200)]
    hist_above1500, _ = np.histogram(dataset_above1500['RMS_Fit(cm)'], bins=roughness_bins)
    hist_norm_above1500 = hist_above1500 / np.sum(hist_above1500)  # Normalize the histogram for above1500m
    skew_above1500 = skew(hist_norm_above1500)  # Calculate skewness for above1500m data
    RMS_skew_above1500.append([year, skew_above1500])  # Append skewness for elevations above 1500m


In [None]:
# Visualize skewness for elevations in the 3 groups 

# Create dataframes for total, below 1500m, and above 1500m skew data
df_total = pd.DataFrame(RMS_skew, columns=['year', 'skew'])
df_sub1500 = pd.DataFrame(RMS_skew_sub1500, columns=['year', 'skew'])
df_above1500 = pd.DataFrame(RMS_skew_above1500, columns=['year', 'skew'])

plt.figure(figsize=(10, 6))

# Plot skew for elevations below 1500m (Ablation Zone)
plt.scatter(df_sub1500['year'], df_sub1500['skew'], label='Ablation Zone', marker='o', color='red')
slope_sub, intercept_sub, _, p_sub, _ = linregress(df_sub1500['year'], df_sub1500['skew']) # Perform linear regression for below 1500m
regress_line_sub = slope_sub * df_sub1500['year'] + intercept_sub
plt.plot(df_sub1500['year'], regress_line_sub, linestyle='--', color='red')

# Plot skew for elevations above 1500m (Accumulation Zone)
plt.scatter(df_above1500['year'], df_above1500['skew'], label='Accumulation Zone', marker='o', color='blue')
slope_above, intercept_above, _, p_above, _ = linregress(df_above1500['year'], df_above1500['skew']) # Perform linear regression for above 1500m
regress_line_above = slope_above * df_above1500['year'] + intercept_above
plt.plot(df_above1500['year'], regress_line_above, linestyle='--', color='blue')

# Plot skew for all elevations
plt.scatter(df_total['year'], df_total['skew'], label='All Elevations', marker='o', color='green')
slope, intercept, _, p, _ = linregress(df_total['year'], df_total['skew']) # Perform linear regression for all elevations
regress_line = slope * df_total['year'] + intercept
plt.plot(df_total['year'], regress_line, linestyle='--', color='green')

# Add slope and p-value annotations to the plot
plt.text(0.05, 0.88, f'\nslope={slope_above:.2f}, p={p_above:.2f}',
         transform=plt.gca().transAxes, color='blue', verticalalignment='top')
plt.text(0.05, 0.73, f'\nslope={slope:.2f}, p={p:.2f}', transform=plt.gca().transAxes,
         color='green', verticalalignment='top')
plt.text(0.05, 0.3, f'\nslope={slope_sub:.2f}, p={p_sub:.2f}', transform=plt.gca().transAxes,
         color='red', verticalalignment='top')

# Formatting, labels, etc...
plt.xlabel('Year', fontsize=14)
plt.ylabel('Skew', fontsize=14)
plt.title('Normalized Skew Over Time', fontsize=16)
plt.legend(fontsize=12)
plt.xticks(years, rotation=45)
plt.tight_layout()
plt.grid(True)
plt.show()

# Create a new plot for the skewness of the Ablation Zone with elevation-specific color coding
plt.figure(figsize=(10, 6))

below_1500 = df_skew[df_skew['elevation'] < 1600] # Filter the data for elevations below 1500m
elevation_colors = { # Define colors for different elevation ranges
    200: 'brown', 300: 'red', 400: 'orangered', 500: 'orange', 600: 'gold',
    700: 'lime', 800: 'green', 900: 'lightseagreen', 1000: 'blue', 1100: 'navy',
    1200: 'mediumorchid', 1300: 'deeppink', 1400: 'gray', 1500: 'black',
}
elevation_ranges = { # Define the elevation ranges with corresponding colors
    (100, 200): elevation_colors[200], (200, 300): elevation_colors[300],
    (300, 400): elevation_colors[400], (400, 500): elevation_colors[500],
    (500, 600): elevation_colors[600], (600, 700): elevation_colors[700],
    (700, 800): elevation_colors[800], (800, 900): elevation_colors[900],
    (900, 1000): elevation_colors[1000], (1000, 1100): elevation_colors[1100],
    (1100, 1200): elevation_colors[1200], (1200, 1300): elevation_colors[1300],
    (1300, 1400): elevation_colors[1400], (1400, 1500): elevation_colors[1500]
}

legend_labels = set()  # Initialize an empty set for legend labels

# Iterate over the sorted elevation values for below 1500m
sorted_elevations = sorted(below_1500['elevation'].unique())
for elevation in sorted_elevations:
    data_at_elevation = below_1500[below_1500['elevation'] == elevation].dropna()
    if not data_at_elevation.empty:
        # Assign color based on the elevation and plot 
        color = elevation_colors.get(elevation, 'gray')
        plt.scatter(data_at_elevation['year'], data_at_elevation['skew'], alpha=0.7, color=color)
    
        for (low, high), col in elevation_ranges.items(): # Add the elevation range to the legend
            if low < elevation <= high:
                legend_label = f'{low}-{high} m'
                if legend_label not in legend_labels:
                    plt.scatter([], [], color=col, label=legend_label)
                legend_labels.add(legend_label)

        # Linear regression for each elevation range's skew values 
        slope, intercept, *_ = linregress(data_at_elevation['year'], data_at_elevation['skew'])
        regress_line = slope * data_at_elevation['year'] + intercept
        plt.plot(data_at_elevation['year'], regress_line, linestyle='--', color=color)

# Formatting, labels, etc...
plt.xlabel('Year', fontsize=14)
plt.ylabel('Skew', fontsize=14)
plt.title('Normalized Skew of Ablation Zone Roughness Over Time', fontsize=16)
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), fontsize=11)
plt.xticks(rotation=0)
plt.tight_layout()
plt.grid(True)
plt.show()