In [1]:
import sys

if not sys.warnoptions:
    import warnings
    warnings.simplefilter("ignore")

import numpy as np
import scipy
import scipy.signal
import pandas as pd
import datetime
from datetime import timedelta

import os
from scipy.stats import pearsonr

import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import matplotlib.cm as cm
from matplotlib import gridspec
from matplotlib.ticker import MaxNLocator
import matplotlib as mpl

import cartopy.crs as ccrs
import cartopy.feature as cfeature
from cartopy.io.img_tiles import GoogleTiles

import imageio

In [2]:
data_folder = '/Users/dlugardo/Desktop/data/ENU_v2' # path to the folder with the data 
# Loads the Metadata for all the station (latitude, longitude, and elevation)
StationMetaData = df = pd.read_csv('/Users/dlugardo/Documents/GitHub/signal-processing-enu/GreenlandStations.csv')

def get_data(location):
    file_name = str(location) + '.ENU.txt'
    path = os.path.join(data_folder, file_name)

    if os.path.isfile(path):
        data = np.loadtxt(path, skiprows=2)
    else:
        file_name = str(location) + '_ENU.txt'
        path = os.path.join(data_folder, file_name)

        if os.path.isfile(path):
            data = np.loadtxt(path, skiprows=2)
        else:
            raise FileNotFoundError(f"Neither '{location}.ENU.txt' nor '{location}_ENU.txt' found in {data_folder}")
    return data

def decimal_year_to_date(decimal_year):
    """
    Converts a decimal year to a datetime.date object.
    """
    year = int(decimal_year)
    fractional_part = decimal_year - year

    # Determine if it's a leap year for accurate day calculation
    is_leap = (year % 4 == 0 and year % 100 != 0) or (year % 400 == 0)
    days_in_year = 366 if is_leap else 365

    # Calculate the number of days from the start of the year
    days_offset = fractional_part * days_in_year

    # Create a datetime object for January 1st of that year
    start_of_year = datetime.date(year, 1, 1)

    # Add the calculated offset in days
    result_date = start_of_year + datetime.timedelta(days=days_offset)

    return result_date

def doy_to_angle(doy):
    radians = doy * 2 * np.pi / 365
    mean_angle = np.arctan2(np.mean(np.sin(radians)), np.mean(np.cos(radians)))
    cos = np.cos(mean_angle)
    return(cos)

def circular_mean(degrees):
    radians = np.deg2rad(np.array(degrees) * 360/365)
    mean_angle = np.arctan2(np.mean(np.sin(radians)), np.mean(np.cos(radians)))
    mean_angle = np.rad2deg(mean_angle) * 365/360
    if mean_angle < 0:
        mean_angle += 365
    return mean_angle

MAX_GAP_DAYS = 30 

def find_longest_continuous_segment(dates, max_gap_days=30):
    # Ensure dates are sorted
    dates = np.sort(dates)
    segments = []
    current_segment = [dates[0]]

    for i in range(1, len(dates)):
        if (dates[i] - dates[i-1]).days <= max_gap_days:
            current_segment.append(dates[i])
        else:
            segments.append(current_segment)
            current_segment = [dates[i]]
    segments.append(current_segment)

    # Return the longest continuous segment
    return max(segments, key=len)

def InterpRA(data, detrend, INTERP_LIMIT, wdays):   
    time = data[:, 0]

    if detrend:   
        data[:, 1:4] = scipy.signal.detrend(data[:, 1:4], axis=0)
        
    # Convert decimal year to datetime
    converted_dates = np.array([decimal_year_to_date(dy) for dy in time])
    df = pd.DataFrame({'Date': pd.to_datetime(converted_dates)})
    df.set_index('Date', inplace=True)

    df['East'] = data[:, 1]
    df['North'] = data[:, 2]
    df['Vertical'] = data[:, 3]
    df['Horizontal'] =  np.sqrt(data[:, 1] ** 2 + data[:, 2] **2)
    df['3DDisp'] = np.cbrt(data[:, 1] ** 2 + data[:, 2] **2 + data[:, 3] **2)

    df['year'] = df.index.year
    df['doy'] = df.index.dayofyear

    filled_list = []
    for year, group in df.groupby('year'):
        # Determine if leap year
        is_leap = (pd.Timestamp(f'{year}-12-31').is_leap_year)
        days_in_year = 366 if is_leap else 365

        # Create full day-of-year range
        full_range = pd.DataFrame({'doy': np.arange(1, days_in_year + 1)})
        full_range['year'] = year

        # Merge to include missing days as NaN
        group = full_range.merge(group, on=['year', 'doy'], how='left')

        # Remove duplicates by averaging
        group = group.groupby(['year', 'doy'], as_index=False).mean()

        # Restore datetime index
        group['Date'] = pd.to_datetime(
            group['year'].astype(str) + '-' + group['doy'].astype(str),
            format='%Y-%j',
            errors='coerce'
        )
        group = group.set_index('Date').sort_index()

        # 🔹 Interpolate only small gaps
        for col in ['East', 'North', 'Vertical', 'Horizontal', '3DDisp']:
            group[col] = group[col].interpolate(
                method="time", limit=INTERP_LIMIT, limit_direction="both"
            )

        filled_list.append(group)

        
    # Concatenate all years
    df = pd.concat(filled_list).sort_index()
    
    # Apply rolling mean smoothing
    df_rolling = df.rolling(
        window=wdays, 
        center=True, 
        min_periods=wdays - int(wdays / 5)
    ).mean()
    df_rolling = df_rolling.dropna()

    # Remove Feb 29 for consistency
    df_rolling = df_rolling[df_rolling.index.strftime('%m-%d') != '02-29']
    
    # Reattach year, day of year, and month_day columns
    df_rolling['year'] = df_rolling.index.year
    df_rolling['month_day'] = df_rolling.index.strftime('%m-%d')
    df_rolling['doy'] = df_rolling.index.dayofyear

    leap_mask = df_rolling.index.is_leap_year & (df_rolling.index.month > 2)
    df_rolling.loc[leap_mask, 'doy'] -= 1
    
    return df_rolling

In [4]:
stations_names_with_data, metadata_records = [], []

MAX_GAP_DAYS = 60        # Maximum gap acceptable between data
INTERP_LIMIT = 8         # Maximum of days to be interpolated
wdays = 30               # Rolling Average window (number of days considered in the average)
MIN_DAYS_PER_YEAR = 350  # Minimum days required to be considered as a full year. 
min_years_of_data = 3    # Minimum length of the continuous segment

Save = True
Detrend = False

for station_name in StationMetaData.station:
    try:
        raw_data = get_data(station_name)
        time = raw_data[:, 0]
        converted_dates = np.array([decimal_year_to_date(dy) for dy in time])

        # Find longest continuous segment of data
        continuous_segment = find_longest_continuous_segment(
            converted_dates, max_gap_days=MAX_GAP_DAYS
        )

        if len(continuous_segment) < 365 * min_years_of_data:  # optional: skip if segment is too short
            print(f"  → Skipping {station_name}: continuous segment too short.")
            continue

        # Filter both data and error to this segment
        mask = np.isin(converted_dates, continuous_segment)
        data = raw_data[mask, :]

        df_rolling = InterpRA(data, Detrend, INTERP_LIMIT, wdays)

        valid_years = df_rolling.groupby('year').filter(
            lambda x: len(x) >= MIN_DAYS_PER_YEAR
        )['year'].unique()
        df_rolling = df_rolling[df_rolling['year'].isin(valid_years)]
        
        LTM = df_rolling.groupby('month_day')[['East', 'North', 'Vertical','Horizontal', '3DDisp']].mean()
        stds = df_rolling.groupby('month_day')[['East', 'North', 'Vertical','Horizontal', '3DDisp']].std()
        
        stds.columns = ['East_sd', 'North_sd', 'Vertical_sd','Horizontal_sd', '3DDisp_sd']
        LTM = LTM.join(stds)

        LTM[['East','North','Vertical', 'Horizontal', '3DDisp' ]] = (
            LTM[['East','North','Vertical', 'Horizontal', '3DDisp']].rolling(3, center=True, min_periods=1).mean()
        )

        # Save LTM for this station
        if Save is True:
            LTM.to_csv(f'/Users/dlugardo/Documents/GitHub/signal-processing-enu/NotDetrended/LTM/{station_name}_Daily{wdays}RollingLTM.csv', index=True)

        stations_names_with_data.append(station_name)

    except FileNotFoundError:
        print(f"  → File not found for station {station_name}. Skipping.")
        continue
    except Exception as e:
        print(f"  → Error processing station {station_name}: {e}")
        continue

  → Skipping NGFJ: continuous segment too short.
  → Error processing station JWLF: Cannot save file into a non-existent directory: '/Users/dlugardo/Documents/GitHub/signal-processing-enu/NotDetrended/LTM'
  → File not found for station THU4. Skipping.
  → Error processing station JGBL: Cannot save file into a non-existent directory: '/Users/dlugardo/Documents/GitHub/signal-processing-enu/NotDetrended/LTM'
  → Error processing station THU2: Cannot save file into a non-existent directory: '/Users/dlugardo/Documents/GitHub/signal-processing-enu/NotDetrended/LTM'
  → Error processing station NNVN: Cannot save file into a non-existent directory: '/Users/dlugardo/Documents/GitHub/signal-processing-enu/NotDetrended/LTM'
  → File not found for station QENU. Skipping.
  → Error processing station YMER: Cannot save file into a non-existent directory: '/Users/dlugardo/Documents/GitHub/signal-processing-enu/NotDetrended/LTM'
  → File not found for station AAS2. Skipping.
  → Skipping EQNU: contin

KeyboardInterrupt: 