In [20]:
import pandas as pd

# Load your CSV
df = pd.read_csv(r"C:\Users\debas\AIRTRAJ\dataset-research\validated_data\Oslo6.csv")

# Features to analyze
features = ['lat', 'lon', 'alt', 'time']

# Basic statistics
stats = df[features].describe()

# Extra custom stats
custom_stats = pd.DataFrame({
    'missing_values': df[features].isnull().sum(),
    'unique_values': df[features].nunique(),
    'variance': df[features].var(),
    'range': df[features].max() - df[features].min()
})

# Combine into one table
stats_combined = pd.concat([stats, custom_stats.T])
print(stats_combined)


                       lat         lon           alt          time
count           660.000000  660.000000  6.600000e+02    660.000000
mean             60.145638   11.310012  8.937193e+03    484.290943
std               0.262533    0.105435  5.852830e+03    281.452261
min              59.508120   11.097570  6.783339e+02      0.000000
25%              59.965543   11.192533  4.068830e+03    239.632657
50%              60.250047   11.373324  7.639354e+03    478.981315
75%              60.352030   11.390717  1.327862e+04    733.508310
max              60.434265   11.413091  2.233682e+04    990.259340
missing_values    0.000000    0.000000  0.000000e+00      0.000000
unique_values   659.000000  659.000000  6.600000e+02    660.000000
variance          0.068923    0.011116  3.425562e+07  79215.375230
range             0.926145    0.315521  2.165849e+04    990.259340


In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from tensorflow.keras.models import load_model
import joblib
import json
import os

def predict_from_csv(csv_path):
    # ----------------------
    # Load model & scaler
    # ----------------------
    model_path = r"C:\Users\debas\AIRTRAJ\dataset-research\Data Annotation UI\coord_predictor.keras"
    scaler_path = r"C:\Users\debas\AIRTRAJ\dataset-research\Data Annotation UI\coord_scaler.pkl"
    model = load_model(model_path, compile=False)
    scaler = joblib.load(scaler_path)

    # ----------------------
    # Load data
    # ----------------------
    df = pd.read_csv(csv_path)
    features = ['lat', 'lon', 'alt', 'time']
    data = df[features]
    data_scaled = scaler.transform(data)

    # ----------------------
    # Sequence creation
    # ----------------------
    SEQ_LENGTH = 10
    def create_sequences(data, seq_length=SEQ_LENGTH):
        X, y = [], []
        for i in range(len(data) - seq_length):
            X.append(data[i:i+seq_length])
            y.append(data[i+seq_length])
        return np.array(X), np.array(y)

    X, y = create_sequences(data_scaled, SEQ_LENGTH)

    # ----------------------
    # Prediction
    # ----------------------
    y_pred_scaled = model.predict(X)
    y_pred = scaler.inverse_transform(y_pred_scaled)
    y_true = scaler.inverse_transform(y)

    # Extract coordinates
    true_lat, true_lon, true_alt, true_time = y_true.T
    pred_lat, pred_lon, pred_alt, pred_time = y_pred.T

    # ----------------------
    # Accuracy calculations
    # ----------------------
    def haversine(lat1, lon1, lat2, lon2):
        R = 6371000
        phi1, phi2 = np.radians(lat1), np.radians(lat2)
        dphi = np.radians(lat2 - lat1)
        dlambda = np.radians(lon2 - lon1)
        a = np.sin(dphi/2)**2 + np.cos(phi1)*np.cos(phi2)*np.sin(dlambda/2)**2
        return 2 * R * np.arcsin(np.sqrt(a))

    horizontal_error = haversine(true_lat, true_lon, pred_lat, pred_lon)
    alt_error = np.abs(true_alt - pred_alt)
    error_3d = np.sqrt(horizontal_error**2 + alt_error**2)

    segment_distances = haversine(true_lat[:-1], true_lon[:-1], true_lat[1:], true_lon[1:])
    total_flight_length_m = np.sum(segment_distances)

    avg_3d_error_m = np.mean(error_3d)
    percentage_error = (avg_3d_error_m / total_flight_length_m) * 100

    # ----------------------
    # Save results
    # ----------------------
    results = {
        "sample_predictions": {
            "true_latitude": [round(x, 6) for x in true_lat[:5].tolist()],
            "true_longitude": [round(x, 6) for x in true_lon[:5].tolist()],
            "true_altitude_m": [round(x, 2) for x in true_alt[:5].tolist()],
            "predicted_latitude": [round(x, 6) for x in pred_lat[:5].tolist()],
            "predicted_longitude": [round(x, 6) for x in pred_lon[:5].tolist()],
            "predicted_altitude_m": [round(x, 2) for x in pred_alt[:5].tolist()],
        },
        "metrics": {
            "Total Flight Path Length (km)": round(total_flight_length_m / 1000, 3),
            "Average Horizontal Error (m)": round(float(np.mean(horizontal_error)), 2),
            "Average Altitude Error (m)": round(float(np.mean(alt_error)), 2),
            "Average 3D Error (m)": round(float(avg_3d_error_m), 2),
            "Prediction Error (% of Path)": round(float(percentage_error), 4),
        }
    }

    with open("prediction_results.json", "w") as f:
        json.dump(results, f)

    # ----------------------
    # Save plot
    # ----------------------
    fig = plt.figure(figsize=(14, 6))
    ax = fig.add_subplot(121, projection='3d')
    ax.plot(true_lat, true_lon, true_alt, label='Actual', alpha=0.7)
    ax.plot(pred_lat, pred_lon, pred_alt, label='Predicted', linestyle='--', alpha=0.7)
    ax.set_xlabel('Latitude')
    ax.set_ylabel('Longitude')
    ax.set_zlabel('Altitude')
    ax.set_title('3D Trajectory: Actual vs Predicted')
    ax.legend()

    ax2 = fig.add_subplot(122)
    ax2.plot(true_time, label='Actual Time', alpha=0.7)
    ax2.plot(pred_time, label='Predicted Time', linestyle='--', alpha=0.7)
    ax2.set_title('Time Prediction')
    ax2.set_xlabel('Sequence Step')
    ax2.set_ylabel('Time')
    ax2.legend()

    static_dir = os.path.join(os.path.dirname(__file__), "static")
    os.makedirs(static_dir, exist_ok=True)
    plt.savefig(os.path.join(static_dir, "prediction_plot.png"))
    plt.close()

    return results


[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step
Total actual flight path length: 162.89 km
Average horizontal error: 312.69 m
Average altitude error: 53.11 m
Average total 3D error: 321.04 m
Prediction error as % of total path length: 0.1971%
