In [1]:
import numpy as np
import pandas as pd
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, r2_score
import joblib
import plotly.graph_objects as go
from src.data.Segment_Slicer import SegmentSlicer 
ss = SegmentSlicer()

In [2]:
raw_parquet = pd.read_parquet('../data/raw/reunion_segments.parquet')
df_parquet = pd.DataFrame(raw_parquet)

In [3]:
ss.cut_segment(df_parquet['altitude_profile'].iloc[0], df_parquet['distance_profile'].iloc[0], df_parquet['coordinates'].iloc[0])

[{'type': 'downhill',
  'category': 'Uncategorized',
  'start_distance': np.float64(0.0),
  'end_distance': np.float64(96.0),
  'distance': np.float64(96.0),
  'start_altitude': np.float64(67.2),
  'end_altitude': np.float64(63.6),
  'elevation_gain': 0,
  'elevation_loss': np.float64(3.6000000000000014),
  'elevation_change': np.float64(-3.6000000000000014),
  'grade': np.float64(-2.3396626569104186),
  'max_grade': np.float64(2.0112995145784027),
  'min_grade': np.float64(-5.043683747123649),
  'grade_variance': np.float64(6.042728849353839),
  'start_idx': 0,
  'end_idx': np.int64(9)},
 {'type': 'climb',
  'category': 'Cat 4',
  'start_distance': np.float64(96.0),
  'end_distance': np.float64(2088.5),
  'distance': np.float64(1992.5),
  'start_altitude': np.float64(63.6),
  'end_altitude': np.float64(163.8),
  'elevation_gain': np.float64(99.80000000000001),
  'elevation_loss': 0,
  'elevation_change': np.float64(99.80000000000001),
  'grade': np.float64(5.008782936010038),
  'max_g

In [37]:
def plot_elevation_profile(segments):
    fig = go.Figure()
    distances = []
    altitudes = []

    # Extract distances and altitudes for the elevation profile line
    for seg in segments:
        distances.extend([seg['start_distance'], seg['end_distance']])
        altitudes.extend([seg['start_altitude'], seg['end_altitude']])

    # Add thick black line for elevation profile
    fig.add_trace(go.Scatter(
        x=distances,
        y=altitudes,
        mode='lines',
        line=dict(width=2, color='black'),
        name='Elevation Profile'
    ))

    # Add colored trapezoids for each segment type
    for seg in segments:
        if seg['type'] == 'climb':
            color = 'red'
        elif seg['type'] == 'uphill':
            color = 'orange'
        elif seg['type'] == 'flat':
            color = 'blue'
        elif seg['type'] == 'downhill':
            color = 'grey'
        elif seg['type'] == 'descent':
            color = 'green'
        else:
            color = 'blue'

        # Update layout
        min_alt = min(altitudes)
        max_alt = max(altitudes)

        # Create trapezoid using Scatter with fill="toself"
        fig.add_trace(go.Scatter(
            x=[seg['start_distance'], seg['end_distance'], seg['end_distance'], seg['start_distance']],
            y=[seg['start_altitude'], seg['end_altitude'], min(altitudes) * 0.9, min(altitudes) * 0.9],
            fill="toself",
            fillcolor=color,
            line=dict(width=0),
            opacity=0.5,
            showlegend=False,
            hoverinfo='skip'
        ))

        # Add annotations for each segment type
        x_mid = (seg['start_distance'] + seg['end_distance']) / 2
        y_mid = (seg['start_altitude'] + seg['end_altitude']) / 2

        if seg['type'] == 'climb':
            annotation_text = f"üìè {seg['distance']:.0f}m <br> üèîÔ∏è {seg['elevation_change']:.0f}m <br> üìà {seg['grade']:.0f}%"
        elif seg['type'] == 'uphill':
            annotation_text = f"üìè {seg['distance']:.0f}m <br> üèîÔ∏è {seg['elevation_change']:.0f}m"
        elif seg['type'] == 'flat':
            annotation_text = f"üìè {seg['distance']:.0f}m"
        elif seg['type'] == 'downhill':
            annotation_text = f"üìè {seg['distance']:.0f}m <br> üèîÔ∏è {abs(seg['elevation_change']):.0f}m"
        elif seg['type'] == 'descent':
            annotation_text = f"üìè {seg['distance']:.0f}m <br> üèîÔ∏è {abs(seg['elevation_change']):.0f}m <br> üìâ {abs(seg['grade']):.0f}%"

        fig.add_annotation(
            x=x_mid,
            y=y_mid + (max_alt-min_alt) * 0.1,  # Slightly above the segment line
            text=annotation_text,
            showarrow=False,
            font=dict(color='black', size=10),
            bgcolor='rgba(255, 255, 255, 0.7)',
            bordercolor='black',
            borderwidth=1,
            borderpad=3
        )

    
    fig.update_layout(
        title='Trail Elevation Profile with Segment Types',
        xaxis_title='Distance (m)',
        yaxis_title='Altitude (m)',
        yaxis=dict(
            range=[min_alt * 0.9, max_alt * 1.1],
            showgrid=False
        ),
        xaxis=dict(showgrid=False),
        showlegend=False,
        hovermode='x unified',
        margin=dict(t=40),
        height=600
    )

    fig.show()

In [38]:
plot_elevation_profile(ss.cut_segment(df_parquet['altitude_profile'].iloc[0], df_parquet['distance_profile'].iloc[0], df_parquet['coordinates'].iloc[0]))

In [39]:
plot_elevation_profile(ss.cut_segment(df_parquet['altitude_profile'].iloc[1], df_parquet['distance_profile'].iloc[1], df_parquet['coordinates'].iloc[1]))

In [40]:
plot_elevation_profile(ss.cut_segment(df_parquet['altitude_profile'].iloc[2], df_parquet['distance_profile'].iloc[2], df_parquet['coordinates'].iloc[2]))

In [41]:
plot_elevation_profile(ss.cut_segment(df_parquet['altitude_profile'].iloc[15], df_parquet['distance_profile'].iloc[15], df_parquet['coordinates'].iloc[15]))

### Mod√®le

In [12]:
# %% Function to extract features from a profile
def extract_features(profile):
    """Extract simple features from 100m elevation profile"""
    gains = [chunk['elevation_gain'] for chunk in profile]
    
    return {
        'total_distance': profile[-1]['end_distance'],
        'total_elevation_gain': sum(g for g in gains if g > 0),
        'total_elevation_loss': sum(abs(g) for g in gains if g < 0),
        'avg_grade': np.mean(gains) / 100 * 100,  # % moyen
        'max_gain': max(gains),
        'max_loss': min(gains),
    }

In [13]:
def build_segments_df(df_parquet):
    """Build segments_df with profile column from raw parquet data"""
    profiles = []
    
    for altitude, distance in zip(df_parquet['altitude_profile'], df_parquet['distance_profile']):
        profile = cut_segment(altitude, distance)
        profiles.append(profile)
    
    segments_df = df_parquet.copy()
    segments_df['profile'] = profiles
    
    return segments_df

In [14]:
# %% Prepare training data
def prepare_data(segments_df):
    """Prepare X and y from segments dataframe"""
    features_list = []
    times = []
    
    for _, row in segments_df.iterrows():
        profile = row['profile']
        time = row['average_top_10_time']
        
        if profile and time and not np.isnan(time):
            features = extract_features(profile)
            features_list.append(features)
            times.append(time)
    
    X = pd.DataFrame(features_list)
    y = np.array(times)
    
    return X, y

In [15]:
# %% Train model
def train_model(X, y):
    """Train a simple gradient boosting model"""
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    model = GradientBoostingRegressor(n_estimators=100, max_depth=3, random_state=42)
    model.fit(X_train, y_train)
    
    # Evaluate
    y_pred = model.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    print(f"MAE: {mae:.0f}s ({mae/60:.1f} min)")
    print(f"R¬≤: {r2:.3f}")
    
    return model

In [16]:
# %% Predict time for new profile
def predict_time(model, profile):
    """Predict time for a new profile"""
    features = extract_features(profile)
    X = pd.DataFrame([features])
    return model.predict(X)[0]

In [17]:

# %% ============ USAGE ============

# 1. Load your data (adapt to your actual data structure)
segments_df = build_segments_df(df_parquet)

# 2. Prepare data
X, y = prepare_data(segments_df)

# 3. Train
model = train_model(X, y)

# 4. Save model
joblib.dump(model, '../src/models/time_predictor.joblib')

# 5. Predict on new profile
mon_profil = [
    {'start_distance': 0, 'end_distance': 100, 'elevation_gain': -3.6},
    {'start_distance': 100, 'end_distance': 200, 'elevation_gain': 7.2},
    {'start_distance': 200, 'end_distance': 300, 'elevation_gain': 3.6},
    {'start_distance': 300, 'end_distance': 400, 'elevation_gain': 5.4},
    {'start_distance': 400, 'end_distance': 500, 'elevation_gain': 4.0},
    # ... add more chunks
]


KeyError: 'elevation_gain'

In [None]:
predicted_time = predict_time(model, mon_profil)
print(f"Temps estim√©: {predicted_time:.0f}s ({predicted_time/60:.1f} min)")

Temps estim√©: 191s (3.2 min)
