# Interpolate observation data to regular Grid

In [1]:
# import libraries
from datetime import timedelta

import pandas as pd
import numpy as np

In [2]:
# Load data
data = pd.read_csv('data/line_208.csv')

## Interpolate by based on Standard depth levels

In [3]:
# Filter time range
start = pd.Timestamp('1982-01-01').to_julian_date()
end = pd.Timestamp('2022-01-01').to_julian_date()
df = data.loc[(data['julian'] > start) & (data['julian'] < end)]

In [4]:
# Setup
stations = df['station'].unique()
standard_depths = np.array([0, 10, 20, 30, 50, 75, 100, 125, 150, 200, 250, 300, 400, 500])
time_index = np.sort(df['julian'].unique())

result_dict = {}  # store each station’s interpolated dataframe

In [5]:
# Loop through stations
for station in stations:
    df_station = df[df['station'] == station].drop(columns=['cruiseline', 'date', 'station'])
    depth_interp = pd.DataFrame(index=time_index, columns=standard_depths, dtype=float)

    for time in time_index:
        # Extract data for this time and station
        t_obs = df_station.loc[df_station['julian'] == time, ['depth', 'temperature']].dropna()

        # Skip if no data for this time
        if t_obs.empty:
            continue

        # Interpolate onto standard depths
        t_interp = np.interp(
            standard_depths,
            t_obs['depth'],
            t_obs['temperature'],
            left=np.nan,
            right=np.nan
        )

        # Store result in the DataFrame
        depth_interp.loc[time] = t_interp

    result_dict[station] = depth_interp

In [None]:
depth_interp = pd.concat(result_dict, names=['station', 'time'])

# drop empty rows
depth_interp = depth_interp.dropna(axis=0, how='all')

depth_interp = depth_interp.stack(0).reset_index(1)

depth_interp.index.names = ['station', 'depth']

depth_interp.columns = ['time', 'temperature']

## Interpolate by based on monthly time space

In [12]:
# time range
start = pd.Timestamp('1982-01')
end = pd.Timestamp('2021-12')
time_range = pd.date_range(start, end, freq='1MS') + timedelta(days=14)
julian_range = [pd.Timestamp(ts).to_julian_date() for ts in time_range]
julian_range[0:5]

[np.float64(2444984.5),
 np.float64(2445015.5),
 np.float64(2445043.5),
 np.float64(2445074.5),
 np.float64(2445104.5)]

In [37]:
result_dict = {}  # store each station's interpolated dataframe

# Loop through stations
for station in stations:
    selected = depth_interp.loc[station, :]
    time_interp = pd.DataFrame(index=julian_range, columns=standard_depths, dtype=float)
    
    for depth in standard_depths:
        if depth not in selected.index:
            continue
        
        t_obs = selected.loc[depth]
        t_obs = t_obs.sort_index()
        
        # NEW: Check for minimum 2 points required for interpolation
        if t_obs['time'].size < 2:
            continue
        
        # Convert to numeric arrays and extract values
        time_vals = t_obs['time']
        temp_vals = t_obs['temperature']
        
        t_interp = np.interp(
            julian_range,
            time_vals,
            temp_vals,
            left=np.nan,
            right=np.nan,
        )
        time_interp[depth] = t_interp
    
    result_dict[station] = time_interp

In [None]:
result_df = pd.concat(result_dict, names=['station', 'time'])

result_df = result_df.stack(0).reset_index(1)

result_df.index.names = ['station', 'depth']

result_df.columns = ['julian', 'temperature']

In [None]:
# Save interpolated data
result_df.to_csv('data/line_208_interp.csv')