In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, r2_score
import joblib
import plotly.graph_objects as go

In [2]:
raw_parquet = pd.read_parquet('../data/raw/reunion_segments.parquet')
df_parquet = pd.DataFrame(raw_parquet)

In [3]:
df_parquet['altitude_profile'].dtype

dtype('O')

In [184]:
def _create_section(alt_array, dist_array, grades, start_idx, end_idx, coordinates=None):
    """Helper function to create a section dictionary"""
    
    if start_idx >= end_idx:
        return None
    
    # Calculate section metrics
    distance = dist_array[end_idx] - dist_array[start_idx]
    elevation_change = alt_array[end_idx] - alt_array[start_idx]
    avg_grade = (elevation_change / distance * 100) if distance > 0 else 0
    
    # Get grades within this section
    section_grades = grades[start_idx:end_idx]
    max_grade = np.max(section_grades) if len(section_grades) > 0 else avg_grade
    min_grade = np.min(section_grades) if len(section_grades) > 0 else avg_grade
    grade_variance = np.std(section_grades) if len(section_grades) > 0 else 0
    
    # Classify section type
    if avg_grade > 3:
        section_type = 'climb'
        elevation_metric = elevation_change
    elif avg_grade < -3:
        section_type = 'descent'
        elevation_metric = abs(elevation_change)
    else:
        section_type = 'flat'
        elevation_metric = abs(elevation_change)
    
    section = {
        'type': section_type,
        'grade': round(avg_grade, 2),
        'distance': round(distance, 1),
        'elevation_change': round(elevation_change, 1),
        'max_grade': round(max_grade, 2),
        'min_grade': round(min_grade, 2),
        'grade_variance': round(grade_variance, 2),
        'start_distance': round(dist_array[start_idx], 1),
        'end_distance': round(dist_array[end_idx], 1),
        'start_altitude': round(alt_array[start_idx], 1),
        'end_altitude': round(alt_array[end_idx], 1)
    }
    
    # Add type-specific metrics
    if section_type == 'climb':
        section['elevation_gain'] = round(elevation_change, 1)
    elif section_type == 'descent':
        section['elevation_loss'] = round(abs(elevation_change), 1)
        # Detect sharp turns in descents
        if coordinates is not None:
            turns_info = _detect_sharp_turns(coordinates, start_idx, end_idx)
            section.update(turns_info)
    
    return section

In [6]:
def _calculate_bearing_change(point1, point2, point3):
    """
    Calculate the angle change between two bearings
    
    Parameters:
    -----------
    point1, point2, point3 : array-like
        [lat, lng] coordinates
    
    Returns:
    --------
    float : Angle change in degrees (0-180)
    """
    lat1, lon1 = np.radians(point1)
    lat2, lon2 = np.radians(point2)
    lat3, lon3 = np.radians(point3)
    
    # Calculate bearing from point1 to point2
    bearing1 = np.arctan2(
        np.sin(lon2 - lon1) * np.cos(lat2),
        np.cos(lat1) * np.sin(lat2) - np.sin(lat1) * np.cos(lat2) * np.cos(lon2 - lon1)
    )
    
    # Calculate bearing from point2 to point3
    bearing2 = np.arctan2(
        np.sin(lon3 - lon2) * np.cos(lat3),
        np.cos(lat2) * np.sin(lat3) - np.sin(lat2) * np.cos(lat3) * np.cos(lon3 - lon2)
    )
    
    # Calculate angle difference
    angle_diff = np.degrees(bearing2 - bearing1)
    
    # Normalize to 0-180 range
    angle_diff = abs((angle_diff + 180) % 360 - 180)
    
    return angle_diff

In [7]:
def _merge_short_sections(sections, min_length):
    """Merge sections shorter than min_length with adjacent sections"""
    
    if len(sections) <= 1:
        return sections
    
    merged = []
    i = 0
    
    while i < len(sections):
        current = sections[i]
        
        # If section is too short and not the last one
        if current['distance'] < min_length and i < len(sections) - 1:
            next_section = sections[i + 1]
            
            # Merge with next section
            total_distance = current['distance'] + next_section['distance']
            total_elevation = current['elevation_change'] + next_section['elevation_change']
            avg_grade = (total_elevation / total_distance * 100) if total_distance > 0 else 0
            
            # Determine new type
            if avg_grade > 3:
                new_type = 'climb'
            elif avg_grade < -3:
                new_type = 'descent'
            else:
                new_type = 'flat'
            
            merged_section = {
                'type': new_type,
                'grade': round(avg_grade, 2),
                'distance': round(total_distance, 1),
                'elevation_change': round(total_elevation, 1),
                'max_grade': round(max(current['max_grade'], next_section['max_grade']), 2),
                'min_grade': round(min(current['min_grade'], next_section['min_grade']), 2),
                'grade_variance': round(np.mean([current['grade_variance'], 
                                                 next_section['grade_variance']]), 2),
                'start_distance': current['start_distance'],
                'end_distance': next_section['end_distance'],
            }
            
            if new_type == 'climb':
                merged_section['elevation_gain'] = round(total_elevation, 1)
            elif new_type == 'descent':
                merged_section['elevation_loss'] = round(abs(total_elevation), 1)
            
            merged.append(merged_section)
            i += 2  # Skip next section as it's merged
        else:
            merged.append(current)
            i += 1
    
    return merged

In [170]:
def get_adaptive_threshold(current_grade):
    """Return adaptive threshold based on current grade"""
    if current_grade < 0:
        return 2.0  # Downhill threshold
    elif 0 <= current_grade < 3:
        return 1  # Flat threshold
    elif 3 <= current_grade < 6:
        return 1.5  # Moderate climb threshold
    else:
        return 2.0  # Steep climb threshold

In [189]:
def can_merge(section1, section2):
    """Détermine si deux sections peuvent être fusionnées"""
    # Ne fusionne pas les types différents
    if section1['type'] != section2['type']:
        return False

    # Calcul de la différence de grade
    grade_diff = abs(section1['grade'] - section2['grade'])

    # Threshold adapté selon le type
    threshold = (get_adaptive_threshold(section1['grade']) +
                get_adaptive_threshold(section2['grade'])) / 2

    # Peut fusionner si différence faible OU si une section est courte
    return grade_diff < threshold or min(section1['distance'], section2['distance']) < 100

In [194]:
def cut_segment(altitude_profile, distance_profile, coordinates=None,
                min_section_length=100,
                smooth_window=10):
    """Découpe un profil en sections significatives"""

    # Conversion en tableaux numpy
    alt_array = np.array(altitude_profile)
    dist_array = np.array(distance_profile)

    if len(alt_array) < 2 or len(dist_array) < 2:
        return []

    # Calcul des grades (%)
    grades = []
    for i in range(len(alt_array) - 1):
        distance_diff = dist_array[i + 1] - dist_array[i]
        if distance_diff > 0:
            elevation_diff = alt_array[i + 1] - alt_array[i]
            grade = (elevation_diff / distance_diff) * 100
            grades.append(grade)
        else:
            grades.append(0)

    # Lissage des grades
    window_size = min(smooth_window, len(grades))
    grades_smooth = np.convolve(grades, np.ones(window_size)/window_size, mode='valid')
    grades_smooth = np.concatenate((grades[:window_size-1], grades_smooth))

    # Découpage initial
    sections = []
    section_start_idx = 0

    for i in range(1, len(grades_smooth)):
        current_avg_grade = np.mean(grades_smooth[max(0,i-5):i+1])
        prev_avg_grade = np.mean(grades_smooth[section_start_idx:min(i+1, section_start_idx+5)])
        threshold = get_adaptive_threshold(prev_avg_grade)
        grade_change = abs(current_avg_grade - prev_avg_grade)
        distance_covered = dist_array[i] - dist_array[section_start_idx]

        if grade_change > threshold and distance_covered >= min_section_length:
            section = _create_section(alt_array, dist_array, grades_smooth,
                                     section_start_idx, i, coordinates)
            if section:
                sections.append(section)
            section_start_idx = i

    # Ajout de la dernière section
    section = _create_section(alt_array, dist_array, grades_smooth,
                             section_start_idx, len(dist_array)-1, coordinates)
    if section:
        sections.append(section)

    # Fusion des sections similaires
    if len(sections) > 1:
        merged_sections = [sections[0]]

        for i in range(1, len(sections)):
            if can_merge(merged_sections[-1], sections[i]):
                # Fusion des sections
                prev = merged_sections.pop()
                current = sections[i]

                # Calcul des nouvelles valeurs
                total_distance = prev['distance'] + current['distance']
                weight1 = prev['distance'] / total_distance
                weight2 = current['distance'] / total_distance

                merged = {
                    'type': prev['type'],  # Même type garanti par can_merge
                    'grade': prev['grade']*weight1 + current['grade']*weight2,
                    'distance': total_distance,
                    'elevation_change': prev['elevation_change'] + current['elevation_change'],
                    'max_grade': max(prev['max_grade'], current['max_grade']),
                    'min_grade': min(prev['min_grade'], current['min_grade']),
                    'grade_variance': prev['grade_variance']*weight1 + current['grade_variance']*weight2,
                    'start_distance': prev['start_distance'],
                    'end_distance': current['end_distance'],
                    'start_altitude': prev['start_altitude'],
                    'end_altitude': current['end_altitude'],
                    'elevation_gain': prev.get('elevation_gain', 0) + current.get('elevation_gain', 0),
                    'elevation_loss': prev.get('elevation_loss', 0) + current.get('elevation_loss', 0)
                }
                merged_sections.append(merged)
            else:
                merged_sections.append(sections[i])

        sections = merged_sections

    return sections

In [180]:
cut_segment(df_parquet['altitude_profile'].iloc[0], df_parquet['distance_profile'].iloc[0], df_parquet['coordinates'].iloc[0])

[{'grade': np.float64(-2.68107098865042),
  'distance': np.float64(104.80000000000291),
  'elevation_change': np.float64(-3.200000000000003),
  'max_grade': np.float64(4.597701149426809),
  'min_grade': np.float64(-8.163265306126057),
  'grade_variance': np.float64(23.684598151597434),
  'start_distance': np.float64(0.0),
  'end_distance': np.float64(104.80000000000291),
  'start_altitude': np.float64(67.2),
  'end_altitude': np.float64(64.0),
  'type': 'downhill',
  'elevation_loss': np.float64(3.200000000000003)},
 {'grade': np.float64(7.722137697805959),
  'distance': np.float64(106.5),
  'elevation_change': np.float64(8.0),
  'max_grade': np.float64(16.438356164377048),
  'min_grade': np.float64(4.3795620437966125),
  'grade_variance': np.float64(9.27719477291714),
  'start_distance': np.float64(104.80000000000291),
  'end_distance': np.float64(211.3000000000029),
  'start_altitude': np.float64(64.0),
  'end_altitude': np.float64(72.0),
  'type': 'climb',
  'elevation_gain': np.flo

In [150]:
def plot_elevation_profile(segments, start_altitude=0):
    fig = go.Figure()

    current_altitude = start_altitude
    distances = []
    altitudes = []
    hover_texts = []
    colors = []
    grade_labels = []

    # Calculate altitudes for each segment
    for seg in segments:
        end_altitude = current_altitude + seg['elevation_change']
        distances.extend([seg['start_distance'], seg['end_distance']])
        altitudes.extend([current_altitude, end_altitude])
        current_altitude = end_altitude

        # Determine line color based on grade
        if seg['grade'] > 9:
            color = 'red'
        elif seg['grade'] > 3:
            color = 'orange'
        elif seg['grade'] < -2:  # Downhill
            color = 'green'
        else:
            color = 'blue'
        colors.extend([color, color])

        # Add hover text
        hover_texts.append(
            f"Start: {seg['start_distance']:.1f}m<br>Grade: {seg['grade']:.1f}%<br>Type: {seg['type']}<br>"
            f"Elev Δ: {seg['elevation_change']:.1f}m<br>Max: {seg['max_grade']:.1f}%<br>Min: {seg['min_grade']:.1f}%"
        )
        hover_texts.append("")  # Empty for end point

        # Add grade label position (middle of segment)
        grade_labels.append({
            'x': (seg['start_distance'] + seg['end_distance']) / 2,
            'y': (current_altitude - seg['elevation_change'] + end_altitude) / 2,
            'text': f"{seg['grade']:.1f}%",
            'color': color
        })

    # Add elevation line
    fig.add_trace(go.Scatter(
        x=distances,
        y=altitudes,
        mode='lines',
        line=dict(width=4, color='black'),
        hoverinfo='text',
        text=hover_texts,
        name='Elevation'
    ))

    # Add colored segments
    for i, seg in enumerate(segments):
        fig.add_shape(
            type='line',
            x0=seg['start_distance'],
            y0=altitudes[i*2],
            x1=seg['end_distance'],
            y1=altitudes[i*2+1],
            line=dict(
                color=colors[i*2],
                width=8,
                dash='solid' 
            )
        )

    # Add grade labels
    for label in grade_labels:
        fig.add_annotation(
            x=label['x'],
            y=label['y'],
            text=label['text'],
            showarrow=False,
            font=dict(color=label['color'], size=12),
            bgcolor='white',
            bordercolor=label['color'],
            borderwidth=1,
            borderpad=2
        )

    # Update layout
    min_alt = min(altitudes)
    max_alt = max(altitudes)
    fig.update_layout(
        title='Trail Elevation Profile with Grade Segments',
        xaxis_title='Distance (m)',
        yaxis_title='Elevation (m)',
        yaxis=dict(
            range=[min_alt - 5, max_alt + 5]  # Add 5m buffer
        ),
        hovermode='x unified',
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
        margin=dict(t=40),
        height=600
    )

    fig.show()

In [195]:
plot_elevation_profile(cut_segment(df_parquet['altitude_profile'].iloc[0], df_parquet['distance_profile'].iloc[0], df_parquet['coordinates'].iloc[0]))

In [196]:
plot_elevation_profile(cut_segment(df_parquet['altitude_profile'].iloc[1], df_parquet['distance_profile'].iloc[1], df_parquet['coordinates'].iloc[1]))

In [193]:
plot_elevation_profile(cut_segment(df_parquet['altitude_profile'].iloc[2], df_parquet['distance_profile'].iloc[2], df_parquet['coordinates'].iloc[2]))

In [188]:
plot_elevation_profile(cut_segment(df_parquet['altitude_profile'].iloc[14], df_parquet['distance_profile'].iloc[14], df_parquet['coordinates'].iloc[14]))

### Modèle

In [12]:
# %% Function to extract features from a profile
def extract_features(profile):
    """Extract simple features from 100m elevation profile"""
    gains = [chunk['elevation_gain'] for chunk in profile]
    
    return {
        'total_distance': profile[-1]['end_distance'],
        'total_elevation_gain': sum(g for g in gains if g > 0),
        'total_elevation_loss': sum(abs(g) for g in gains if g < 0),
        'avg_grade': np.mean(gains) / 100 * 100,  # % moyen
        'max_gain': max(gains),
        'max_loss': min(gains),
    }

In [13]:
def build_segments_df(df_parquet):
    """Build segments_df with profile column from raw parquet data"""
    profiles = []
    
    for altitude, distance in zip(df_parquet['altitude_profile'], df_parquet['distance_profile']):
        profile = cut_segment(altitude, distance)
        profiles.append(profile)
    
    segments_df = df_parquet.copy()
    segments_df['profile'] = profiles
    
    return segments_df

In [14]:
# %% Prepare training data
def prepare_data(segments_df):
    """Prepare X and y from segments dataframe"""
    features_list = []
    times = []
    
    for _, row in segments_df.iterrows():
        profile = row['profile']
        time = row['average_top_10_time']
        
        if profile and time and not np.isnan(time):
            features = extract_features(profile)
            features_list.append(features)
            times.append(time)
    
    X = pd.DataFrame(features_list)
    y = np.array(times)
    
    return X, y

In [15]:
# %% Train model
def train_model(X, y):
    """Train a simple gradient boosting model"""
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    model = GradientBoostingRegressor(n_estimators=100, max_depth=3, random_state=42)
    model.fit(X_train, y_train)
    
    # Evaluate
    y_pred = model.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    print(f"MAE: {mae:.0f}s ({mae/60:.1f} min)")
    print(f"R²: {r2:.3f}")
    
    return model

In [16]:
# %% Predict time for new profile
def predict_time(model, profile):
    """Predict time for a new profile"""
    features = extract_features(profile)
    X = pd.DataFrame([features])
    return model.predict(X)[0]

In [17]:

# %% ============ USAGE ============

# 1. Load your data (adapt to your actual data structure)
segments_df = build_segments_df(df_parquet)

# 2. Prepare data
X, y = prepare_data(segments_df)

# 3. Train
model = train_model(X, y)

# 4. Save model
joblib.dump(model, '../src/models/time_predictor.joblib')

# 5. Predict on new profile
mon_profil = [
    {'start_distance': 0, 'end_distance': 100, 'elevation_gain': -3.6},
    {'start_distance': 100, 'end_distance': 200, 'elevation_gain': 7.2},
    {'start_distance': 200, 'end_distance': 300, 'elevation_gain': 3.6},
    {'start_distance': 300, 'end_distance': 400, 'elevation_gain': 5.4},
    {'start_distance': 400, 'end_distance': 500, 'elevation_gain': 4.0},
    # ... add more chunks
]


KeyError: 'elevation_gain'

In [None]:
predicted_time = predict_time(model, mon_profil)
print(f"Temps estimé: {predicted_time:.0f}s ({predicted_time/60:.1f} min)")

Temps estimé: 191s (3.2 min)
