In [39]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import joblib
import json

df = pd.read_csv("balloon_trajectory_dataset.csv").dropna().reset_index(drop=True)

def haversine_delta(lat1, lon1, lat2, lon2):
    R = 6371  # Earth radius in km
    lat1_rad, lat2_rad = np.radians(lat1), np.radians(lat2)
    lon1_rad, lon2_rad = np.radians(lon1), np.radians(lon2)

    delta_lat = lat2_rad - lat1_rad
    delta_lon = lon2_rad - lon1_rad

    delta_lon = (delta_lon + np.pi) % (2 * np.pi) - np.pi

    x = delta_lon * np.cos((lat1_rad + lat2_rad) / 2) * R
    y = delta_lat * R
    return y, x  # y = delta_lat_km, x = delta_lon_km

sequence_length = 21  # use last 21 hours to predict next
features_list = []
targets_list = []

balloon_ids = df["balloon_id"].unique()

for b_id in balloon_ids:
    b_df = df[df["balloon_id"] == b_id].sort_values("hour_index", ascending=False).reset_index(drop=True)

    for i in range(len(b_df) - sequence_length):
        seq = b_df.iloc[i:i+sequence_length]
        latitudes = seq["lat"].values
        longitudes = seq["lon"].values
        altitudes = seq["alt"].values
        speeds = seq["balloon_speed"].values
        dirs = seq["balloon_dir"].values
        wind_speeds = seq["windspeed"].values
        wind_dirs = seq["winddir"].values

        # Features: deltas between consecutive points using Haversine
        deltas = []
        for j in range(sequence_length - 1):
            delta_lat_km, delta_lon_km = haversine_delta(latitudes[j], longitudes[j], latitudes[j+1], longitudes[j+1])
            delta_alt = altitudes[j+1] - altitudes[j]
            deltas.extend([delta_lat_km, delta_lon_km, delta_alt, speeds[j], dirs[j], wind_speeds[j], wind_dirs[j]])
        features_list.append(deltas)

        # Target: delta from last in sequence to next hour
        next_row_idx = i + sequence_length
        if next_row_idx < len(b_df):
            next_row = b_df.iloc[next_row_idx]
            delta_lat_km, delta_lon_km = haversine_delta(latitudes[-1], longitudes[-1], next_row.lat, next_row.lon)
            delta_alt = next_row.alt - altitudes[-1]
            targets_list.append([delta_lat_km, delta_lon_km, delta_alt])

X = np.array(features_list)
y = np.array(targets_list)
print("Features shape:", X.shape)
print("Targets shape:", y.shape)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train, y_train)

# Evaluate model
y_pred = model.predict(X_test)
rmse = np.sqrt(np.mean((y_test - y_pred)**2, axis=0))
print(f"RMSE (delta_lat_km, delta_lon_km, delta_alt_km): {rmse}")

joblib.dump(model, "balloon_model_deltas_haversine.pkl")
print("Model saved as 'balloon_model_deltas_haversine.pkl'")

coef_data = {"coef": model.coef_.tolist(), "intercept": model.intercept_.tolist()}
with open("balloon_model.json", "w") as f:
    json.dump(coef_data, f)
print("Model saved as JSON for JS")

Features shape: (100, 140)
Targets shape: (100, 3)
RMSE (delta_lat_km, delta_lon_km, delta_alt_km): [23.19718275 21.88354861  5.73775285]
Model saved as 'balloon_model_deltas_haversine.pkl'
Model saved as JSON for JS
