In [18]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor

# Load datasets
vendee_2020 = pd.read_csv('data/ranking_history_withwind_2020_once_per_day.csv')
vendee_2024 = pd.read_csv('data/ranking_history_withwind_2024.csv')

# Parse dates
vendee_2020['date'] = pd.to_datetime(vendee_2020['date'])
vendee_2024['date'] = pd.to_datetime(vendee_2024['date'])

# Calculate displacement (Δlatitude, Δlongitude)
vendee_2020['delta_lat'] = vendee_2020['latitude'].diff()
vendee_2020['delta_lon'] = vendee_2020['longitude'].diff()

# Normalize wind features and calculate u, v components
for df in [vendee_2020, vendee_2024]:
    df['windspeed'] = df['windspeed'].fillna(0)
    df['winddir'] = df['winddir'].fillna(0)
    df['u'] = df['windspeed'] * np.cos(np.radians(df['winddir']))
    df['v'] = df['windspeed'] * np.sin(np.radians(df['winddir']))

# Drop rows with NaNs from diff operation
vendee_2020 = vendee_2020.dropna(subset=['delta_lat', 'delta_lon'])


In [19]:
# Features and target
features = ['latitude', 'longitude', 'kph', 'heading', 'u', 'v']
target = ['delta_lat', 'delta_lon']

# Prepare training data (2020)
X = vendee_2020[features]
y = vendee_2020[target]

# Split training and testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Random Forest model
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)

# Evaluate the model
score = model.score(X_test, y_test)
print(f"Model R² Score: {score:.2f}")


Model R² Score: 0.77


In [20]:
# Filter 2024 data for the chosen skipper
chosen_skipper = 'Pip Hare'
latest_data = vendee_2024[(vendee_2024['skipper'] == chosen_skipper)&(vendee_2024['date'] == vendee_2024['date'].max())].iloc[-1]

# Predict future positions iteratively
future_positions = []
current_state = latest_data[features].values.reshape(1, -1)

for _ in range(1):  # Predict for the next day
    delta_lat, delta_lon = model.predict(current_state)[0]
    new_lat = current_state[0][0] + delta_lat
    new_lon = current_state[0][1] + delta_lon

    # Update current state
    current_state = np.array([[new_lat, new_lon, 
                               current_state[0][2],  # Keep speed constant
                               current_state[0][3],  # Keep heading constant
                               current_state[0][4],  # Keep wind u constant
                               current_state[0][5]]])  # Keep wind v constant
    future_positions.append((new_lat, new_lon))

# Create a DataFrame of future positions
future_df = pd.DataFrame(future_positions, columns=['latitude', 'longitude'])
print(future_df)


    latitude  longitude
0  23.476778 -31.813425




In [21]:
pip_hare_data = vendee_2024[vendee_2024['skipper'] == chosen_skipper]

In [22]:
import folium

# Create a map centered on the last known position
m = folium.Map(location=[latest_data['latitude'], latest_data['longitude']], zoom_start=5)

for _, row in pip_hare_data.iterrows():
    folium.Marker(
        location=[row['latitude'], row['longitude']],
        popup=f"Pip Hare (Observed) | Date: {row['date']}"
    ).add_to(m)
    
# Add current position
folium.Marker(
    location=[latest_data['latitude'], latest_data['longitude']],
    popup="Current Position"
).add_to(m)

# Add predicted positions
for i, row in future_df.iterrows():
    folium.Marker(
        location=[row['latitude'], row['longitude']],
        popup=f"Day {i+1} Prediction",
        icon=folium.Icon(color="red")
    ).add_to(m)

# Save and display the map
m.save("predicted_route.html")
m
