In [4]:
import pandas as pd
import numpy as np
from scipy.interpolate import interp1d
import matplotlib.pyplot as plt


In [2]:
# Load CSV files
true_df = pd.read_csv("true.csv")
experiment_df = pd.read_csv("test.csv")

In [10]:
# Clean column names (especially for true_df where headers might have spaces)
experiment_df.columns = experiment_df.columns.str.strip()
true_df.columns = true_df.columns.str.strip()

# Rename columns to 'x' and 'y' for consistency
if len(experiment_df.columns) == 2:
    experiment_df.columns = ['x', 'y']
if len(true_df.columns) == 2:
    true_df.columns = ['x', 'y']

# Convert to numeric and drop NaNs
experiment_df['x'] = pd.to_numeric(experiment_df['x'], errors='coerce')
experiment_df['y'] = pd.to_numeric(experiment_df['y'], errors='coerce')
experiment_df.dropna(inplace=True)

true_df['x'] = pd.to_numeric(true_df['x'], errors='coerce')
true_df['y'] = pd.to_numeric(true_df['y'], errors='coerce')
true_df.dropna(inplace=True)

# Interpolate experiment y-values to true x-points
interp_func = interp1d(experiment_df['x'], experiment_df['y'], kind='linear', fill_value='extrapolate')
interpolated_y = interp_func(true_df['x'])

# Calculate RMSE
rmse = np.sqrt(np.mean((interpolated_y - true_df['y'])**2))


rel_rmse = rmse / (true_df['y'].max() - true_df['y'].min())

print(f"Relative RMSE between experimental and true data: {rel_rmse:.6f}")


Relative RMSE between experimental and true data: 0.189520
