In [3]:
# If running locally or in new environment:
!pip install pykalman

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import joblib
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from pykalman import KalmanFilter

# ===============================
# üîß Configuration
# ===============================
CSV_PATH = "/kaggle/input/hehehe123/newdataset.csv"
FEATURES = ["Accel_X", "Accel_Y", "Accel_Z", "Gyro_X", "Gyro_Y", "Gyro_Z",
            "Mag_X", "Mag_Y", "Mag_Z", "Heading_deg", "WiFi_RSSI_dBm"]
TARGETS = ["Latitude", "Longitude"]

# ===============================
# üì¶ Helper Functions
# ===============================
def evaluate_model(y_true, y_pred):
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    return rmse, mae, r2

def plot_results(y_true, y_pred, title, filename):
    plt.figure(figsize=(6, 6))
    plt.scatter(y_true, y_pred, alpha=0.6, edgecolors='k', color='navy')
    plt.plot([min(y_true), max(y_true)], [min(y_true), max(y_true)], 'r--', label='Ideal Fit')
    plt.xlabel("True Values")
    plt.ylabel("Predicted Values")
    plt.title(title)
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(filename, dpi=300)
    plt.close()

def get_model_size(model, filename):
    joblib.dump(model, filename)
    size_kb = os.path.getsize(filename) / 1024
    os.remove(filename)
    return size_kb

def get_object_size(obj, filename="temp_model.pkl"):
    joblib.dump(obj, filename)
    size_kb = os.path.getsize(filename) / 1024
    os.remove(filename)
    return size_kb

# ===============================
# üì• Load and preprocess data
# ===============================
df = pd.read_csv(CSV_PATH, usecols=FEATURES + TARGETS)
X = df[FEATURES].values
y = df[TARGETS].values

scaler_X = StandardScaler().fit(X)
scaler_y = StandardScaler().fit(y)

X_scaled = scaler_X.transform(X)
y_scaled = scaler_y.transform(y)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42)

# ===============================
# üìà Models to Evaluate
# ===============================
models = {
    "Linear Regression": LinearRegression(),
    "Random Forest": RandomForestRegressor(random_state=42),
    "MLP Regressor": MLPRegressor(random_state=42, max_iter=1000)
}

results = []

# ===============================
# üîÑ Train and Evaluate ML Models
# ===============================
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    for i, target in enumerate(["Latitude", "Longitude"]):
        y_true_inv = scaler_y.inverse_transform(np.column_stack((
            y_test[:, 0] if i == 0 else np.zeros_like(y_test[:, 0]),
            y_test[:, 1] if i == 1 else np.zeros_like(y_test[:, 1])
        )))[:, i]

        y_pred_inv = scaler_y.inverse_transform(np.column_stack((
            y_pred[:, 0] if i == 0 else np.zeros_like(y_pred[:, 0]),
            y_pred[:, 1] if i == 1 else np.zeros_like(y_pred[:, 1])
        )))[:, i]

        rmse, mae, r2 = evaluate_model(y_true_inv, y_pred_inv)
        size_kb = get_model_size(model, f"{name.replace(' ', '_')}_{target}.pkl")
        results.append([name, target, rmse, mae, r2, size_kb])

        plot_results(y_true_inv, y_pred_inv, f"{name} - {target}", f"{name.replace(' ', '_')}_{target}_plot.png")

# ===============================
# üå™Ô∏è Kalman Filter Evaluation
# ===============================
kf_lat = KalmanFilter(initial_state_mean=0, n_dim_obs=1)
kf_lon = KalmanFilter(initial_state_mean=0, n_dim_obs=1)

lat_smoothed, _ = kf_lat.smooth(y[:, 0].reshape(-1, 1))
lon_smoothed, _ = kf_lon.smooth(y[:, 1].reshape(-1, 1))

idx = np.arange(len(y))
_, test_idx = train_test_split(idx, test_size=0.2, random_state=42)

lat_true, lat_pred = y[test_idx, 0], lat_smoothed[test_idx].flatten()
lon_true, lon_pred = y[test_idx, 1], lon_smoothed[test_idx].flatten()

rmse_lat, mae_lat, r2_lat = evaluate_model(lat_true, lat_pred)
rmse_lon, mae_lon, r2_lon = evaluate_model(lon_true, lon_pred)

kalman_lat_size = get_object_size(kf_lat, "kalman_lat.pkl")
kalman_lon_size = get_object_size(kf_lon, "kalman_lon.pkl")

results.append(["Kalman Filter", "Latitude", rmse_lat, mae_lat, r2_lat, kalman_lat_size])
results.append(["Kalman Filter", "Longitude", rmse_lon, mae_lon, r2_lon, kalman_lon_size])

plot_results(lat_true, lat_pred, "Kalman Filter - Latitude", "Kalman_Latitude_plot.png")
plot_results(lon_true, lon_pred, "Kalman Filter - Longitude", "Kalman_Longitude_plot.png")

# ===============================
# üî£ Symbolic Regression Model
# ===============================
# Symbolic: Latitude = x10 * ((x2 - x5) * 0.023231797)
lat_symb = X_scaled[:, 10] * (X_scaled[:, 2] - X_scaled[:, 5]) * 0.023231797

# Symbolic: Longitude = sin((x3 * 0.013614469) * x2)
lon_symb = np.sin(X_scaled[:, 3] * 0.013614469 * X_scaled[:, 2])

lat_symb_inv = scaler_y.inverse_transform(np.column_stack([lat_symb, np.zeros_like(lat_symb)]))[:, 0]
lon_symb_inv = scaler_y.inverse_transform(np.column_stack([np.zeros_like(lon_symb), lon_symb]))[:, 1]

rmse_lat, mae_lat, r2_lat = evaluate_model(y[:, 0], lat_symb_inv)
rmse_lon, mae_lon, r2_lon = evaluate_model(y[:, 1], lon_symb_inv)

results.append(["Symbolic Model", "Latitude", rmse_lat, mae_lat, r2_lat, 0.0])
results.append(["Symbolic Model", "Longitude", rmse_lon, mae_lon, r2_lon, 0.0])

plot_results(y[:, 0], lat_symb_inv, "Symbolic Model - Latitude", "latitude_symbolic_300dpi.png")
plot_results(y[:, 1], lon_symb_inv, "Symbolic Model - Longitude", "longitude_symbolic_300dpi.png")

# ===============================
# üìä Final Summary
# ===============================
print("\nüìç Model Evaluation Summary\n")
print(f"{'Model':<20} {'Target':<10} {'RMSE':<10} {'MAE':<10} {'R¬≤ Score':<12} {'Size (KB)':<10}")
print("-" * 75)
for r in results:
    print(f"{r[0]:<20} {r[1]:<10} {r[2]:<10.6f} {r[3]:<10.6f} {r[4]:<12.6f} {r[5]:<10.2f}")



üìç Model Evaluation Summary

Model                Target     RMSE       MAE        R¬≤ Score     Size (KB) 
---------------------------------------------------------------------------
Linear Regression    Latitude   0.000113   0.000098   -0.002990    0.82      
Linear Regression    Longitude  0.000117   0.000101   -0.002340    0.82      
Random Forest        Latitude   0.000114   0.000098   -0.021433    59717.85  
Random Forest        Longitude  0.000119   0.000103   -0.030679    59717.85  
MLP Regressor        Latitude   0.000114   0.000098   -0.013474    38.71     
MLP Regressor        Longitude  0.000117   0.000102   -0.003785    38.71     
Kalman Filter        Latitude   0.000089   0.000060   0.384779     0.32      
Kalman Filter        Longitude  0.000346   0.000070   -7.734472    0.32      
Symbolic Model       Latitude   0.000115   0.000099   0.001347     0.00      
Symbolic Model       Longitude  0.000116   0.000101   0.000683     0.00      
