In [None]:
"""
Acquisition and Pre-Processing Script for Operation IceBridge Airborne Topographic Mapper L2 Icessn Elevation, Slope, and Roughness, v.2 Data

This script automates requests for data and compiles and filters data to within the ice extent. 
Data are filtered to within the GrIS 2004 ice extent, determined by the USGS EROS National Atlas project (http://cinergi.sdsc.edu/geoportal/rest/metadata/item/6b9cbb294e8d49268c17f396d476113b/html)
"""

In [None]:
"""
Download command -- run in the directory where you want data saved! 

wget --http-user=<username> --http-password=<password> --load-cookies
~/.urs_cookies --save-cookies ~/.urs_cookies --keep-session-cookies
--no-check-certificate --auth-no-challenge=on -r --reject "index.html*"
--reject "*.xml" -q -np -e robots=off
https://n5eil01u.ecs.nsidc.org/ICEBRIDGE/ILATM2.002/
"""

In [1]:
# Import necessary libraries

import os 
import glob 
import numpy as np 
import pandas as pd 
import geopandas as gpd 

In [None]:
def day(fname): 
    """ 
    Process each day's downloaded .csv files and combine them into a single output file. 
    Function extracts date from folder name, adds 'Year', 'Month', and 'Day' columns, and 
    saved the processed data in a structured directory. 

    Parameters: 
    - fname (str): Path to folder containing .csv files for a specific day. 
      Folder name must follow 'YYYYMMDD' format. 

    Returns: 
    - Creates directories in the structure: '../../oib_ATM_test/{year}/{month}/'
    - Writes combined .csv file in created directories with the format: '{day}.csv'
    """

    files=glob.glob(fname+'/*.csv') # Get all .csv files in folder 
    if len(files)==0: # If no files are found, exit 
        return 
    
    date=fname.split('/')[-1].split('.') # Extract date from folder name
    format_dif=0
    if date[0]==['2019']:
        format_dif=1

    os.makedirs('../../oib_ATM_test/{}/{}'.format(date[0],date[1]), exist_ok=True) # Make output directory for the year and month
    
    f=open(files[0],'r') # Open read files
    f_new=open('../../oib_ATM_test/{}/{}/{}.csv'.format(date[0],date[1],date[2]),'w') # Open output file for writing
    
    for i in files: 
        f=open(i,'r')
        if first==False: # Skip headers for subsequent files
            for j in range(10+format_dif):
                f.readline()
        else: # Include headers for the first file 
            for j in range(9+format_dif):
                f.readline()
            f_new.write(f.readline()[2:-1]+', Year, Month, Day\n') # Add year, month, and day columns 
            first=False 

        for j in f: # Add rows and append year, month, and day
            f_new.write(j[:-1]+',{},{},{}\n'.format(date[0],date[1],date[2])),
    
    f.close() # Close read files 
    f_new.close() # Close write files 

In [None]:
files=glob.glob('./*') # Get all .csvs 
for i in files: # Process all .csvs 
    day(i)

In [None]:
files=glob.glob('./*/*/*.csv') # Get processed .csvs 
for i in files: 
    name = i.split('.')[1].split('/') 
    newname = '{}{}'.format(name[-2], name[-1]) # Get month and day from file path 
    os.rename(i,'./{}/{}/{}.csv'.format(name[1],name[2],newname)) # Rename files to MMDD within year folder 

In [2]:
# Function to concatenate each month and filter to ice extent 

def make_years(base_path, output_file, ice_extent_shp): 
    """
    Process monthly .csv files, combining them into yearly datasets, 
    and filter data points to include only those within the Greenland ice extent.

    Parameters:
    - base_path (str): Path to the base directory containing the data organized in YY/MM folders.
    - output_file (str): Path to the output directory where yearly filtered data will be saved.
    - ice_extent_shp (str): Path to the shapefile defining the Greenland ice extent.
    
    Returns:
    - Processed yearly .csv files saved in `/path/to/output/<year>/<year>.csv`.
    """
    ice_extent = gpd.read_file(ice_extent_shp) # Load ice extent shapefile
    ice_extent = ice_extent.to_crs(epsg=3413) # Set .shp CRS to polar stereographic 
    month_folders = glob.glob(os.path.join(base_path, '*/*')) # Get all YY/MM folders within base path 
    yearly_data = {} # Dict to hold yearly files  

    for month_folder in month_folders: 
        parts = month_folder.split(os.sep) # Get YY/MM from folder structure 
        year = parts[-2] 
        month = parts[-1] 

        if year not in yearly_data: # Initialize DataFrame for yera if not already created 
            yearly_data[year] = pd.DataFrame()

        csv_files = glob.glob(os.path.join(month_folder, '*.csv')) # Get all .csv files in the month folder 
        month_df = pd.DataFrame()

        for file in csv_files: # Iterate through each .csv in month folder 
            df = pd.read_csv(file) # Read file into a DataFrame 
            month_df = pd.concat([month_df, df], ignore_index=True) # Append data to the month's DataFrame 

        yearly_data[year] = pd.concat([yearly_data[year], month_df], ignore_index=True) # Append months to yearly DataFrame 
    
    for year, data in yearly_data.items(): # Process each year's data 
        gdf = gpd.GeoDataFrame( # Convert data to GeoDataFrame 
            data, 
            geometry=gpd.points_from_xy(data['Longitude(deg)'],data['Latitude(deg)']), 
            crs='epsg:4326'
        ) 

        gdf = gdf.to_crs(epsg=3413) # Reproject to polar stereographic 
        gdf['Easting'] = gdf.geometry.x # Add Easting and Northing columns 
        gdf['Northing'] = gdf.geometry.y 
        gdf_in = gpd.sjoin(gdf, ice_extent, op='within') # Perform spatila join to filter points within ice extent 
        year_output_dir = os.path.join(output_dir, year) # Define output directory for current year 
        os.makedirs(year_output_dir, exist_ok=True)
        output_file = os.path.join(year_output_dir, f'{year}.csv') # Define output file path 
        gdf_in.drop(columns='geometry', index=False) # Remove geometry column
        gdf_in.to_csv(output_file, index=False) # Save filtered data to .csv file 
        print(f"Saved data for {year} to {output_file}") # Print message confirming the save 

In [None]:
ice_extent_shp = './greenland_ice_extent/greenland_ice_extent.shp' # Path to ice extent .shp
base_path = './' # Path to ILATM2 base directory 
output_dir = '/yearly_data' # Path to output directory
os.makedirs(output_dir, exist_ok=True) # Make output directory if it doesn't already exist. 
make_years(base_path, output_dir, ice_extent_shp) # Process and filter yearly data. 