In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns

pd.set_option('display.max_columns', None)
sns.set_style("whitegrid")

In [None]:
input_path = Path("../data/raw/gravity_speed_data.csv")
output_path = Path("../data/processed/cleaned_gravity_speed_data.csv")
output_path.parent.mkdir(parents=True, exist_ok=True)

print(f"Input file: {input_path}")

In [None]:
df = pd.read_csv(input_path)
print(f"Data loaded with {df.shape[0]} rows and {df.shape[1]} columns.")
df.shape

In [None]:
df.replace([np.inf, -np.inf], np.nan, inplace=True)
df.dropna(inplace=True)
print(f"Data cleaned: {df.shape[0]} rows and {df.shape[1]} columns after removing NaN values.")

In [None]:
c = 299_792_458  # Speed of light in m/s

conditions = (
    (df["velocity_m_s"] < c) &
    (df["mass_kg"] > 0) &
    (df["radius_m"] > 0) &
    (df["gravitational_dilation"] > 0) &
    (df["velocity_dilation"] > 0) &
    (df["combined_dilation"] > 0)
)

df = df[conditions]

In [None]:
cols = [
    "mass_kg", "radius_m", "velocity_m_s",
    "gravitational_dilation", "velocity_dilation",
    "combined_dilation", "time_difference_s"
]

plt.figure(figsize=(15, 12))
for i, col in enumerate(cols, 1):
    plt.subplot(3, 3, i)
    sns.histplot(df[col], bins=40, kde=True, color="steelblue")
    plt.title(f"{col}", fontsize=10)
    plt.xlabel("")
    plt.ylabel("")

plt.tight_layout()
plt.show()


In [None]:
df.reset_index(drop=True, inplace=True)

In [None]:
df.to_csv(output_path, index=False)
df.shape