In [1]:
import pandas as pd
import numpy as np
from scipy.interpolate import CubicSpline

In [2]:
def interpolate_dataset(file_path, sat_type):
    """
    Reads a time-series CSV, interpolates it to a 15-minute frequency using a 
    cubic spline, adds noise to new points, and restores original data points.
    """
    # 1. Load and prepare the data
    df = pd.read_csv(file_path, parse_dates=['utc_time'])
    df = df.sort_values('utc_time').reset_index(drop=True)
    df = df.drop_duplicates(subset='utc_time')

    # 2. Convert timestamps to a numeric format for interpolation
    time_sec = df['utc_time'].astype(np.int64) / 1e9
    
    # 3. Create the new, complete 15-minute time range
    full_time = pd.date_range(start=df['utc_time'].min(), end=df['utc_time'].max(), freq='15T')
    full_time_sec = full_time.astype(np.int64) / 1e9
    
    # 4. Initialize the output DataFrame
    interpolated = pd.DataFrame({'utc_time': full_time})
    error_cols = [col for col in df.columns if col != 'utc_time']
    
    # 5. Interpolate each data column and add noise to new points
    for col in error_cols:
        cs = CubicSpline(time_sec, df[col])
        interpolated[col] = cs(full_time_sec)
        
        mask_new_points = ~full_time.isin(df['utc_time'])
        if mask_new_points.any(): # Only add noise if there are new points
            residual_std = df[col].diff().std()
            if pd.notna(residual_std) and residual_std > 0:
                noise = np.random.normal(0, residual_std * 0.5, len(full_time))
                interpolated.loc[mask_new_points, col] += noise[mask_new_points]
        
    # 6. Restore original values efficiently using .update()
    df_indexed = df.set_index('utc_time')
    interpolated.set_index('utc_time', inplace=True)
    interpolated.update(df_indexed)
    interpolated.reset_index(inplace=True)
    
    # 7. Add satellite type and return the final DataFrame
    interpolated['sat_type'] = sat_type
    return interpolated

In [3]:
meo_exp = interpolate_dataset("DATA_MEO_Train.csv", "MEO")
geo_exp = interpolate_dataset("DATA_GEO_Train.csv", "GEO")
meo2_exp = interpolate_dataset("DATA_MEO_Train2.csv", "MEO")

  full_time = pd.date_range(start=df['utc_time'].min(), end=df['utc_time'].max(), freq='15T')
  full_time = pd.date_range(start=df['utc_time'].min(), end=df['utc_time'].max(), freq='15T')
  full_time = pd.date_range(start=df['utc_time'].min(), end=df['utc_time'].max(), freq='15T')


In [4]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(meo2_exp)

               utc_time  x_error (m)  y_error (m)  z_error (m)  \
0   2025-09-03 10:11:00    -0.026800     0.073632     0.078433   
1   2025-09-03 10:26:00    -0.213611     0.115639     0.325873   
2   2025-09-03 10:41:00    -0.107813     0.054688     0.132517   
3   2025-09-03 10:56:00     0.021051     0.177236    -0.225768   
4   2025-09-03 11:11:00    -0.580578     0.102358     0.498177   
5   2025-09-03 11:26:00    -0.965328     0.240847     1.635098   
6   2025-09-03 11:41:00    -1.196745     0.253389     1.959078   
7   2025-09-03 11:56:00    -0.533112     0.014116     1.033974   
8   2025-09-03 12:11:00    -0.047000    -0.011955     0.122663   
9   2025-09-03 12:26:00    -0.498442    -0.080856     0.296704   
10  2025-09-03 12:41:00    -1.875518    -0.063284     1.210342   
11  2025-09-03 12:56:00    -3.183913    -0.027081     2.256807   
12  2025-09-03 13:11:00    -2.413913    -0.324679     1.964500   
13  2025-09-03 13:26:00     0.234787     0.056745    -0.040591   
14  2025-0