# 1. Import Required Libraries

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Directory for saving outputs
output_result = 'rf_output'

# Ensure directories exist
os.makedirs(output_result, exist_ok=True)

# 2.  Load & Clean Data 

In [None]:
df = pd.read_csv("21.5.2025.csv")  
df = df[['timestamp', 'x_snap', 'y_snap']].dropna()
df = df.sort_values(by='timestamp').reset_index(drop=True)

In [None]:
df

# 3. IQR Outlier Removal + Kalman Filter

In [None]:
def iqr_filter(series):
    Q1 = series.quantile(0.25)
    Q3 = series.quantile(0.75)
    IQR = Q3 - Q1
    lower = Q1 - 1.5 * IQR
    upper = Q3 + 1.5 * IQR
    return series.between(lower, upper)

mask = iqr_filter(df['x_snap']) & iqr_filter(df['y_snap'])
df = df[mask].reset_index(drop=True)

def kalman_filter_1d(data, process_var=1e-3, meas_var=0.108**2):
# def kalman_filter_1d(data, process_var=1e-05, meas_var=1e-7):
    n = len(data)
    xhat = np.zeros(n)
    P = np.zeros(n)
    xhat[0] = data[0]
    P[0] = 1.0
    for k in range(1, n):
        xhat[k] = xhat[k-1]
        P[k] = P[k-1] + process_var
        K = P[k] / (P[k] + meas_var)
        xhat[k] = xhat[k] + K * (data[k] - xhat[k])
        P[k] = (1 - K) * P[k]
    return xhat

df['x_kalman'] = kalman_filter_1d(df['x_snap'].values)
df['y_kalman'] = kalman_filter_1d(df['y_snap'].values)

In [None]:
# --- Remove the last 20 points from the history trajectory ---
# df = df.iloc[:-20].reset_index(drop=True)
df

# 4. Create Sliding Window Dataset

In [None]:
# === Create sliding window input features ===
window_size = 11

X, y = [], []
for i in range(window_size, len(df) - 1):
    window = df[['x_kalman', 'y_kalman']].iloc[i-window_size:i].values.flatten()
    next_pos = df[['x_kalman', 'y_kalman']].iloc[i + 1].values
    X.append(window)
    y.append(next_pos)

X = np.array(X)
y = np.array(y)


In [None]:
X.shape, y.shape

# 5. Train/ Val/ Test/ GT Split

In [None]:
X_trainval, X_test_gt, y_trainval, y_test_gt = train_test_split(X, y, test_size=0.3, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_trainval, y_trainval, test_size=0.125, random_state=42)
X_test, X_gt, y_test, y_gt = train_test_split(X_test_gt, y_test_gt, test_size=0.5, random_state=42)

In [None]:
X_test.shape, X_gt.shape, X_trainval.shape, X_test_gt.shape

# 6. Train Random Forest

In [None]:
model = RandomForestRegressor(
    n_estimators=100, 
    max_depth=None, 
    random_state=42,
    )

# Fit Model 
model.fit(X_train, y_train)

# Predict
y_train_pred = model.predict(X_train)
y_val_pred = model.predict(X_val)
y_test_pred = model.predict(X_test)

# Compute RMSE
train_rmse = np.sqrt(mean_squared_error(y_train, y_train_pred))
val_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))
test_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))

print(f"Train RMSE: {train_rmse:.4f}")
print(f"Validation RMSE: {val_rmse:.4f}")
print(f"Test RMSE: {test_rmse:.4f}")

# 7. Recursive 10-Step Prediction 

In [None]:
def predict_recursive(model, start_window, n_steps=20):
    preds = []
    window = start_window.copy()
    for _ in range(n_steps):
        pred = model.predict(window.reshape(1, -1))[0]
        preds.append(pred)
        window = np.roll(window, -2)
        window[-2:] = pred
    return np.array(preds)

start_window = X_gt[0]
n_steps=10
pred_path = predict_recursive(model, start_window, n_steps=n_steps)

# 8. Extract True Ground Truth Path

In [None]:
# Locate index in DataFrame
start_x, start_y = start_window[-2], start_window[-1]
dists = np.sqrt((df['x_kalman'] - start_x)**2 + (df['y_kalman'] - start_y)**2)
start_idx = dists.idxmin()

true_gt_path = df[['x_kalman', 'y_kalman']].iloc[start_idx + 1: start_idx + 1 + n_steps].values

# 9. Plot Reference, Trajectories & Save

In [None]:
os.makedirs("rf_output", exist_ok=True)

polygon_json = "{\"l1\":[[27.05,12.7,0],[81.19,12.7,0]],\"l2\":[[81.19,12.7,0],[81.19,28.87,0]],\"l3\":[[81.19,28.87,0],[27.05,28.87,0]],\"l4\":[[27.05,28.87,0],[27.05,12.7,0]]}"
polygon = json.loads(polygon_json)

def draw_ref_lines():
    for line in polygon.values():
        x = [p[0] for p in line]
        y = [p[1] for p in line]
        plt.plot(x, y, 'k--')

# Raw + ref
plt.figure(figsize=(8, 6))
draw_ref_lines()
plt.plot(df['x_snap'], df['y_snap'], color='gray', label='Raw')
plt.title("Raw Trajectory + Reference")
plt.savefig("rf_output/raw_reference.png")

# Filtered Path with Polygon Line
plt.scatter(df['x_kalman'].iloc[0], df['y_kalman'].iloc[0], color='green', s=100, marker='s', label='Start')
plt.scatter(df['x_kalman'].iloc[-1], df['y_kalman'].iloc[-1], color='red', s=100, marker='*', label='End')
plt.title("Filtered Trajectory with Reference Polygon")
plt.savefig("rf_output/filtered_with_polygon.png")

# True vs Pred
plt.figure(figsize=(8, 6))
draw_ref_lines()
plt.plot(true_gt_path[:, 0], true_gt_path[:, 1], 'g-o', label='True GT')
plt.plot(pred_path[:, 0], pred_path[:, 1], 'b--o', label='Predicted')
plt.title("10-Step Predicted vs True Ground Truth")
plt.legend()
plt.savefig("rf_output/true_gt_vs_predicted.png")

# 10. Plot Losses as Line Chart

In [None]:
plt.figure(figsize=(6, 5))
loss_labels = ['Train', 'Validation', 'Test']
loss_values = [train_rmse, val_rmse, test_rmse]

plt.plot(loss_labels, loss_values, marker='o', linestyle='-', color='blue')
for i, val in enumerate(loss_values):
    plt.text(i, val + 0.01, f"{val:.3f}", ha='center')
plt.title("RMSE Loss Curve")
plt.ylabel("RMSE")
plt.grid(True)
plt.tight_layout()
plt.savefig("rf_output/loss_line_plot.png")
plt.show()

# 11. Euclidean Distance + Plot + CSV file

In [None]:
def euclidean(p1, p2):
    return np.sqrt((p1[0] - p2[0])**2 + (p1[1] - p2[1])**2)

step_errors = [euclidean(p, g) for p, g in zip(pred_path, true_gt_path)]
avg_error =  np.mean(step_errors)

# Plot
plt.figure(figsize=(7, 4))
plt.plot(range(1, len(step_errors)+1), step_errors, marker='o', color='purple')
plt.axhline(4.0, linestyle='--', color='red', label="4m Threshold")
plt.title(f"Step-wise Euclidean Average Distance Error: {avg_error:.4f} meters")
plt.xlabel("Step")
plt.ylabel("Distance Error")
plt.grid(True)
plt.savefig("rf_output/stepwise_error_plot.png")
plt.show()

# CSV
rows = [{
    'step': i+1,
    'gt_x': gt[0],
    'gt_y': gt[1],
    'pred_x': pr[0],
    'pred_y': pr[1],
    'euclidean_distance': dist
} for i, (gt, pr, dist) in enumerate(zip(true_gt_path, pred_path, step_errors))]

df_error = pd.DataFrame(rows)
df_error.to_csv("rf_output/stepwise_prediction_vs_gt.csv", index=False)
print(f"✅ Exported: stepwise_prediction_vs_gt.csv - Average Error: {avg_error:.4f}")

In [None]:
# === Full Combined Plot: History, Predicted Path, Ground Truth ===
plt.figure(figsize=(8, 6))

# Reference polygon
draw_ref_lines()

# Raw history (all past data)
plt.plot(df['x_kalman'], df['y_kalman'], 'gray', alpha=0.5, label='Raw History')

# Ground Truth path (green)
plt.plot(true_gt_path[:, 0], true_gt_path[:, 1], 'g-o', label='Ground Truth')

# Predicted path (blue dashed)
plt.plot(pred_path[:, 0], pred_path[:, 1], 'b--o', label='Predicted Path')

plt.title("Trajectory Overview: History, Ground Truth, and Prediction")
plt.xlabel("x")
plt.ylabel("y")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("rf_output/full_trajectory_overview.png")
plt.show()

In [None]:
import matplotlib.animation as animation

# Ensure output directory exists
os.makedirs("rf_output", exist_ok=True)

# --- Reference polygon ---
polygon_json = "{\"l1\":[[27.05,12.7,0],[81.19,12.7,0]],\"l2\":[[81.19,12.7,0],[81.19,28.87,0]],\"l3\":[[81.19,28.87,0],[27.05,28.87,0]],\"l4\":[[27.05,28.87,0],[27.05,12.7,0]]}"
polygon = json.loads(polygon_json)

def draw_ref_lines(ax):
    for line in polygon.values():
        x = [p[0] for p in line]
        y = [p[1] for p in line]
        ax.plot(x, y, 'k--', linewidth=1)

# --- Data for animation ---
x_hist = df['x_snap'].values
y_hist = df['y_snap'].values

# --- Setup figure ---
fig, ax = plt.subplots(figsize=(10, 6))
draw_ref_lines(ax)
line_hist, = ax.plot([], [], 'gray', linewidth=2, label='History Trajectory')
start_point = ax.scatter([], [], s=100, color='blue', marker='s', label='Start')
end_point = ax.scatter([], [], s=100, color='red', marker='s', label='End')

ax.set_xlim(20, 90)
ax.set_ylim(10, 35)
ax.set_xlabel('X_snap Coordinate')
ax.set_ylabel('Y_snap Coordinate')
ax.set_title('Animated History Trajectory')
ax.legend()
ax.grid(True)

def init():
    line_hist.set_data([], [])
    start_point.set_offsets(np.empty((0, 2)))
    end_point.set_offsets(np.empty((0, 2)))
    return line_hist, start_point, end_point

def update(frame):
    line_hist.set_data(x_hist[:frame+1], y_hist[:frame+1])
    start_point.set_offsets(np.array([[x_hist[0], y_hist[0]]]))
    end_point.set_offsets(np.array([[x_hist[frame], y_hist[frame]]]))
    return line_hist, start_point, end_point


# --- Create animation ---
ani = animation.FuncAnimation(
    fig, update, frames=len(x_hist), init_func=init,
    interval=100, blit=True, repeat=False
)

# --- Save MP4 ---
video_path = "rf_output/history_trajectory.mp4"
try:
    ani.save(video_path, writer=animation.FFMpegWriter(fps=10))
    print(f"✅ Video saved at {video_path}")
except FileNotFoundError:
    # --- Fallback: Save GIF instead ---
    gif_path = "rf_output/history_trajectory.gif"
    ani.save(gif_path, writer="pillow", fps=10)
    print(f"⚠️ FFmpeg not found. Saved as GIF: {gif_path}")
