In [1]:
# Setup et imports
import pandas as pd
import re
import xml.etree.ElementTree as ET
from xml.dom import minidom
import os
from datetime import datetime
from typing import Dict, List, Tuple, Optional
import math

print("üîß Pipeline Vekta Enhanced V2 - Am√©liorations Avanc√©es")
print("=" * 60)


üîß Pipeline Vekta Enhanced V2 - Am√©liorations Avanc√©es


In [2]:
# 1. Configuration avanc√©e avec entit√©s d√©taill√©es
class EnhancedValidationConfig:
    # Seuils de confiance renforc√©s
    CONFIDENCE_THRESHOLD_HIGH = 0.95    # Excellence requise
    CONFIDENCE_THRESHOLD_MEDIUM = 0.75  # Tr√®s bonne correspondance
    CONFIDENCE_THRESHOLD_LOW = 0.75     # Rejet en dessous
    
    # Entit√©s requises avec patterns am√©lior√©s et exemples
    REQUIRED_ENTITIES = {
        'duration': {
            'pattern': r'\d+\s*(min|h|s|sec|secondes?|minutes?|heures?)',
            'examples': ['10min', '1h', '30s', '45 minutes'],
            'description': 'Dur√©e des intervalles ou de la s√©ance'
        },
        'repetitions': {
            'pattern': r'\d+\s*[x*√ó]\s*',
            'examples': ['3x', '5*', '8√ó', '4 fois'],
            'description': 'Nombre de r√©p√©titions'
        },
        'intensity': {
            'pattern': r'(VO2|seuil|tempo|aerobic|sprint|endurance|threshold|anaerobic|neuromuscular|\d+%)',
            'examples': ['VO2max', 'tempo', 'seuil', 'aerobic', '85%', 'sprint'],
            'description': 'Zone d\'intensit√© ou pourcentage'
        },
        'recovery': {
            'pattern': r'(recup|rec|recovery|repos)\s*\d+',
            'examples': ['5min recup', '2min repos', '90s recovery'],
            'description': 'Temps de r√©cup√©ration entre intervalles'
        },
        'structure': {
            'pattern': r'(pyramide|pyramid|series?|blocs?|escalier)',
            'examples': ['pyramide', 's√©rie', '2 blocs', 'escalier'],
            'description': 'Structure complexe de l\'entra√Ænement'
        }
    }
    
    # Pond√©rations pour scoring avanc√©
    SCORING_WEIGHTS = {
        'exact_match': 2.0,      # Correspondance exacte
        'zone_match': 1.5,       # Zone d'entra√Ænement
        'duration_match': 1.2,   # Dur√©e
        'repetition_match': 1.2, # R√©p√©titions
        'intensity_match': 1.0,  # Intensit√©
        'structure_match': 1.8,  # Structure complexe
        'variation_bonus': 0.3   # Bonus variation linguistique
    }

print("‚úÖ Configuration avanc√©e charg√©e")
print(f"   Seuils: Excellence ‚â•{EnhancedValidationConfig.CONFIDENCE_THRESHOLD_HIGH}")
print(f"   Entit√©s surveill√©es: {len(EnhancedValidationConfig.REQUIRED_ENTITIES)}")
print(f"   Crit√®res de scoring: {len(EnhancedValidationConfig.SCORING_WEIGHTS)}")


‚úÖ Configuration avanc√©e charg√©e
   Seuils: Excellence ‚â•0.95
   Entit√©s surveill√©es: 5
   Crit√®res de scoring: 7


In [3]:
# 2. Corpus √©tendu avec structures complexes
enhanced_corpus_data = [
    # === STRUCTURES SIMPLES ===
    {
        'id': 0, 'description': '1h endurance allure aerobic tranquille', 'zone': 'Aerobic',
        'structure': {'type': 'continuous', 'duration': 60}, 'power_zone': 2, 'intensity': 65,
        'complexity': 'simple',
        'variations': ['1h endur aerobic', '60min endurance facile', '1 heure aerobic tranquil']
    },
    {
        'id': 1, 'description': '3x 10 min tempo sweet spot avec 5min recup', 'zone': 'Tempo',
        'structure': {'type': 'intervals', 'reps': 3, 'duration': 10, 'recovery': 5}, 'power_zone': 3, 'intensity': 85,
        'complexity': 'simple',
        'variations': ['3x10min tempo 5minrec', '3*10min sweet spot', '3 fois 10 minutes tempo']
    },
    
    # === STRUCTURES COMPLEXES - PYRAMIDES ===
    {
        'id': 10, 'description': 'pyramide 1-2-3-4-3-2-1min VO2max avec 1min recup', 'zone': 'VO2max',
        'structure': {'type': 'pyramid', 'pattern': [1,2,3,4,3,2,1], 'recovery': 1}, 'power_zone': 5, 'intensity': 112,
        'complexity': 'complex',
        'variations': ['pyramide 1234321 VO2', 'pyramid 1-2-3-4-3-2-1min', 'pyramide VO2max classique']
    },
    {
        'id': 11, 'description': 'pyramide tempo 5-10-15-10-5min avec 5min recup', 'zone': 'Tempo',
        'structure': {'type': 'pyramid', 'pattern': [5,10,15,10,5], 'recovery': 5}, 'power_zone': 3, 'intensity': 87,
        'complexity': 'complex',
        'variations': ['pyramide tempo 5-10-15', 'pyramid sweet spot', 'pyramide 5/10/15/10/5']
    },
    
    # === STRUCTURES COMPLEXES - S√âRIES ===
    {
        'id': 20, 'description': '2 series de 4x3min VO2max avec 90s recup et 5min entre series', 'zone': 'VO2max',
        'structure': {'type': 'series', 'sets': 2, 'reps': 4, 'duration': 3, 'recovery_intra': 1.5, 'recovery_inter': 5}, 
        'power_zone': 5, 'intensity': 115, 'complexity': 'complex',
        'variations': ['2*(4x3min VO2)', '2 blocs 4x3min VO2max', '2 series 4x3min avec 90s']
    },
    {
        'id': 21, 'description': '3 series de 3x5min seuil avec 2min recup et 8min entre series', 'zone': 'Threshold',
        'structure': {'type': 'series', 'sets': 3, 'reps': 3, 'duration': 5, 'recovery_intra': 2, 'recovery_inter': 8},
        'power_zone': 4, 'intensity': 100, 'complexity': 'complex',
        'variations': ['3*(3x5min seuil)', '3 blocs 3x5min FTP', '3 series seuil 5min']
    },
    
    # === STRUCTURES SP√âCIALIS√âES ===
    {
        'id': 30, 'description': 'over-under 8x3min 95-105% FTP avec 2min recup', 'zone': 'Threshold',
        'structure': {'type': 'over_under', 'reps': 8, 'duration': 3, 'low': 95, 'high': 105, 'recovery': 2},
        'power_zone': 4, 'intensity': 100, 'complexity': 'complex',
        'variations': ['over under 95-105%', '8x3min over-under', 'sur-sous seuil']
    },
    {
        'id': 31, 'description': 'micro-intervalles 15x1min ON/OFF VO2max', 'zone': 'VO2max',
        'structure': {'type': 'micro_intervals', 'reps': 15, 'duration': 1, 'recovery': 1, 'on_off': True},
        'power_zone': 5, 'intensity': 115, 'complexity': 'complex',
        'variations': ['15x1min ON OFF', 'micro intervalles VO2', '15*1min on off']
    }
]

# Expansion avec variations
all_enhanced_entries = []
for workout in enhanced_corpus_data:
    all_enhanced_entries.append(workout)
    if 'variations' in workout:
        for i, variation in enumerate(workout['variations']):
            var_entry = workout.copy()
            var_entry['id'] = f"{workout['id']}_v{i+1}"
            var_entry['description'] = variation
            var_entry['is_variation'] = True
            all_enhanced_entries.append(var_entry)

enhanced_corpus_df = pd.DataFrame(all_enhanced_entries)

print(f"üìö Corpus Enhanced V2: {len(enhanced_corpus_df)} entra√Ænements")
print(f"   Principaux: {len(enhanced_corpus_data)}")
print(f"   Variations: {len(enhanced_corpus_df) - len(enhanced_corpus_data)}")
print(f"   Complexit√©: {enhanced_corpus_df['complexity'].value_counts().to_dict()}")


üìö Corpus Enhanced V2: 32 entra√Ænements
   Principaux: 8
   Variations: 24
   Complexit√©: {'complex': 24, 'simple': 8}


In [4]:
# 4. Syst√®me de correction orthographique intelligent
import difflib
from collections import defaultdict
import unicodedata

class SpellChecker:
    def __init__(self, corpus_df):
        self.corpus_df = corpus_df
        self.vocabulary = self._build_vocabulary()
        self.common_mistakes = self._build_mistake_patterns()
        
    def _build_vocabulary(self):
        """Construction du vocabulaire √† partir du corpus"""
        vocabulary = set()
        
        # Extraire tous les mots du corpus
        for _, workout in self.corpus_df.iterrows():
            description = workout['description'].lower()
            # Nettoyage et extraction des mots
            words = re.findall(r'\b[a-zA-Z√†√¢√§√©√®√™√´√Ø√Æ√¥√π√ª√º√ø√ß]+\b', description)
            vocabulary.update(words)
        
        # Ajouter vocabulaire sp√©cialis√© cyclisme
        cycling_vocab = {
            # Zones d'intensit√©
            'aerobic', 'tempo', 'threshold', 'seuil', 'vo2max', 'vo2', 'anaerobic', 'sprint',
            'endurance', 'sweet', 'spot', 'neuromuscular', 'neuro',
            
            # Structures
            'pyramide', 'pyramid', 'serie', 'series', 'bloc', 'blocs', 'escalier',
            'over', 'under', 'micro', 'intervalles', 'intervals',
            
            # Dur√©es et mesures
            'min', 'minutes', 'sec', 'secondes', 'heure', 'heures',
            'recup', 'recuperation', 'repos', 'recovery',
            
            # Actions
            'fois', 'repetitions', 'reps', 'avec', 'entre', 'puis'
        }
        vocabulary.update(cycling_vocab)
        
        return vocabulary
    
    def _build_mistake_patterns(self):
        """Patterns de fautes communes en fran√ßais cyclisme"""
        return {
            # Fautes phon√©tiques
            'aerobik': 'aerobic',
            'aerobique': 'aerobic',
            'a√©robique': 'aerobic',
            'vo2': 'vo2max',
            'v02': 'vo2max',
            'vo2 max': 'vo2max',
            'tempos': 'tempo',
            'seuille': 'seuil',
            'sueil': 'seuil',
            'r√©cup': 'recup',
            'recupe': 'recup',
            'r√©cup√©ration': 'recuperation',
            
            # Fautes de frappe communes
            'piramide': 'pyramide',
            'pyramyide': 'pyramide',
            'pyrimide': 'pyramide',
            's√©ries': 'series',
            's√©ri√©s': 'series',
            's√©rie': 'serie',
            'intervales': 'intervalles',
            'intervalle': 'intervalles',
            'bloque': 'bloc',
            'bloques': 'blocs',
            
            # Abr√©viations et variations
            'mn': 'min',
            'mins': 'min',
            'minuts': 'min',
            'minuets': 'min',
            'sec': 'secondes',
            'secs': 'secondes',
            'h': 'heure',
            'hr': 'heure',
            
            # Nombres et r√©p√©titions
            'foi': 'fois',
            'foix': 'fois',
            'x': 'fois',
            'rep': 'reps',
            'r√©p√©titions': 'repetitions'
        }
    
    def _normalize_text(self, text):
        """Normalisation du texte (accents, casse)"""
        # Supprimer les accents
        text = unicodedata.normalize('NFD', text)
        text = ''.join(c for c in text if unicodedata.category(c) != 'Mn')
        return text.lower().strip()
    
    def _calculate_edit_distance(self, word1, word2):
        """Distance de Levenshtein optimis√©e"""
        if len(word1) < len(word2):
            return self._calculate_edit_distance(word2, word1)
        
        if len(word2) == 0:
            return len(word1)
        
        previous_row = list(range(len(word2) + 1))
        for i, c1 in enumerate(word1):
            current_row = [i + 1]
            for j, c2 in enumerate(word2):
                insertions = previous_row[j + 1] + 1
                deletions = current_row[j] + 1
                substitutions = previous_row[j] + (c1 != c2)
                current_row.append(min(insertions, deletions, substitutions))
            previous_row = current_row
        
        return previous_row[-1]
    
    def _find_best_matches(self, word, max_distance=2, max_results=3):
        """Trouve les meilleures correspondances pour un mot"""
        word_normalized = self._normalize_text(word)
        
        # V√©rification directe dans les patterns de fautes
        if word_normalized in self.common_mistakes:
            return [self.common_mistakes[word_normalized]]
        
        candidates = []
        
        for vocab_word in self.vocabulary:
            vocab_normalized = self._normalize_text(vocab_word)
            
            # Distance exacte
            if word_normalized == vocab_normalized:
                return [vocab_word]
            
            # Distance d'√©dition
            distance = self._calculate_edit_distance(word_normalized, vocab_normalized)
            
            if distance <= max_distance:
                # Score bas√© sur distance et longueur
                score = 1.0 - (distance / max(len(word_normalized), len(vocab_normalized)))
                candidates.append((vocab_word, score, distance))
        
        # Tri par score d√©croissant
        candidates.sort(key=lambda x: (-x[1], x[2]))
        
        return [candidate[0] for candidate in candidates[:max_results]]
    
    def correct_query(self, query):
        """Correction orthographique compl√®te d'une requ√™te"""
        original_query = query
        corrected_parts = []
        corrections_made = []
        
        # S√©parer les mots et les nombres/symboles
        tokens = re.findall(r'\d+[x*√ó]|\d+%|\d+min|\d+h|\d+s|\d+|\b[a-zA-Z√†√¢√§√©√®√™√´√Ø√Æ√¥√π√ª√º√ø√ß]+\b|[^\w\s]', query)
        
        for token in tokens:
            # Skip si c'est un nombre, pourcentage, ou symbole
            if re.match(r'\d+[x*√ó%]|\d+min|\d+h|\d+s|\d+|[^\w\s]', token):
                corrected_parts.append(token)
                continue
            
            # Correction orthographique pour les mots
            if len(token) > 2:  # Ignorer les mots tr√®s courts
                best_matches = self._find_best_matches(token)
                
                if best_matches and best_matches[0] != token.lower():
                    correction = best_matches[0]
                    corrected_parts.append(correction)
                    corrections_made.append(f"'{token}' ‚Üí '{correction}'")
                else:
                    corrected_parts.append(token)
            else:
                corrected_parts.append(token)
        
        # Reconstruction de la requ√™te
        corrected_query = ' '.join(corrected_parts)
        
        # Nettoyage final
        corrected_query = re.sub(r'\s+', ' ', corrected_query).strip()
        
        return {
            'original': original_query,
            'corrected': corrected_query,
            'corrections': corrections_made,
            'has_corrections': len(corrections_made) > 0
        }
    
    def get_correction_confidence(self, original, corrected):
        """Calcule la confiance dans la correction"""
        if original == corrected:
            return 1.0
        
        # Similarit√© bas√©e sur la longueur et les corrections
        similarity = difflib.SequenceMatcher(None, original.lower(), corrected.lower()).ratio()
        return similarity

# Initialisation du correcteur orthographique
spell_checker = SpellChecker(enhanced_corpus_df)

print("üìù Correcteur orthographique intelligent initialis√©")
print(f"   Vocabulaire: {len(spell_checker.vocabulary)} mots")
print(f"   Patterns d'erreurs: {len(spell_checker.common_mistakes)} corrections")
print("   ‚úÖ Correction phon√©tique fran√ßais")
print("   ‚úÖ Distance de Levenshtein optimis√©e")
print("   ‚úÖ Vocabulaire cyclisme sp√©cialis√©")


üìù Correcteur orthographique intelligent initialis√©
   Vocabulaire: 54 mots
   Patterns d'erreurs: 35 corrections
   ‚úÖ Correction phon√©tique fran√ßais
   ‚úÖ Distance de Levenshtein optimis√©e
   ‚úÖ Vocabulaire cyclisme sp√©cialis√©


In [5]:
# 5. Validateur Enhanced avec correction orthographique int√©gr√©e
class EnhancedValidator:
    def __init__(self, corpus_df, spell_checker=None):
        self.corpus_df = corpus_df
        self.config = EnhancedValidationConfig()
        self.spell_checker = spell_checker
    
    def detect_missing_entities_detailed(self, query: str) -> Tuple[Dict, Dict]:
        """D√©tection d√©taill√©e des entit√©s manquantes avec exemples"""
        query_lower = query.lower()
        entities_analysis = {}
        
        for entity_type, entity_config in self.config.REQUIRED_ENTITIES.items():
            pattern = entity_config['pattern']
            matches = re.findall(pattern, query_lower, re.IGNORECASE)
            
            entities_analysis[entity_type] = {
                'found': len(matches) > 0,
                'matches': matches,
                'examples': entity_config['examples'],
                'description': entity_config['description']
            }
        
        missing_entities = {k: v for k, v in entities_analysis.items() if not v['found']}
        return entities_analysis, missing_entities
    
    def calculate_advanced_similarity(self, query: str, description: str, workout_data: Dict) -> Tuple[float, Dict]:
        """Calcul de similarit√© avanc√© avec pond√©ration"""
        query_lower = query.lower()
        desc_lower = description.lower()
        
        score_components = {}
        
        # 1. Correspondance exacte (bonus majeur)
        if query_lower.strip() == desc_lower.strip():
            score_components['exact_match'] = self.config.SCORING_WEIGHTS['exact_match']
        else:
            score_components['exact_match'] = 0
        
        # 2. Zone d'entra√Ænement
        zone_keywords = {
            'aerobic': ['aerobic', 'endurance', 'z2'],
            'tempo': ['tempo', 'sweet', 'spot', 'z3'],
            'threshold': ['threshold', 'seuil', 'ftp', 'z4'],
            'vo2max': ['vo2', 'vo2max', 'z5'],
            'anaerobic': ['anaerobic', 'sprint', 'z6'],
            'neuromuscular': ['neuromuscular', 'neuro', 'max']
        }
        
        zone_score = 0
        workout_zone = workout_data.get('zone', '').lower()
        for zone, keywords in zone_keywords.items():
            if zone in workout_zone:
                for keyword in keywords:
                    if keyword in query_lower:
                        zone_score += 0.3
        score_components['zone_match'] = min(zone_score, 1.0) * self.config.SCORING_WEIGHTS['zone_match']
        
        # 3. Dur√©es et nombres
        query_numbers = re.findall(r'\d+', query)
        desc_numbers = re.findall(r'\d+', description)
        
        duration_score = 0
        for num in query_numbers:
            if num in desc_numbers:
                duration_score += 0.2
        score_components['duration_match'] = min(duration_score, 1.0) * self.config.SCORING_WEIGHTS['duration_match']
        
        # 4. R√©p√©titions (3x, 5x, etc.)
        rep_patterns_query = re.findall(r'\d+\s*[x*√ó]', query_lower)
        rep_patterns_desc = re.findall(r'\d+\s*[x*√ó]', desc_lower)
        
        rep_score = 0
        for rep in rep_patterns_query:
            rep_num = re.findall(r'\d+', rep)[0]
            for desc_rep in rep_patterns_desc:
                desc_num = re.findall(r'\d+', desc_rep)[0]
                if rep_num == desc_num:
                    rep_score += 0.5
        score_components['repetition_match'] = min(rep_score, 1.0) * self.config.SCORING_WEIGHTS['repetition_match']
        
        # 5. Structures complexes (bonus majeur)
        structure_keywords = ['pyramide', 'pyramid', 'serie', 'series', 'bloc', 'escalier', 'over', 'under', 'micro']
        structure_score = 0
        
        for keyword in structure_keywords:
            if keyword in query_lower and keyword in desc_lower:
                structure_score += 0.4
                # Bonus pour complexit√©
                if workout_data.get('complexity') == 'complex':
                    structure_score += 0.3
        
        score_components['structure_match'] = min(structure_score, 1.0) * self.config.SCORING_WEIGHTS['structure_match']
        
        # Score total
        total_score = sum(score_components.values())
        
        # Normalisation (score max th√©orique = somme des poids)
        max_possible_score = sum(self.config.SCORING_WEIGHTS.values())
        normalized_score = min(total_score / max_possible_score, 0.99)
        
        return normalized_score, score_components
    
    def search_corpus_enhanced(self, query: str) -> Tuple[Optional[Dict], float, Dict]:
        """Recherche avanc√©e dans le corpus avec scoring d√©taill√©"""
        best_match = None
        best_score = 0.0
        best_components = {}
        
        for idx, workout in self.corpus_df.iterrows():
            score, components = self.calculate_advanced_similarity(
                query, workout['description'], workout.to_dict()
            )
            
            # Bonus pour variations exactes
            if workout.get('is_variation', False):
                score += self.config.SCORING_WEIGHTS['variation_bonus']
            
            if score > best_score:
                best_score = score
                best_match = workout.to_dict()
                best_components = components
        
        return best_match, min(best_score, 0.99), best_components
    
    def generate_detailed_error_message(self, query: str, missing_entities: Dict, best_match: Dict, confidence: float) -> str:
        """G√©n√©ration de messages d'erreur d√©taill√©s et constructifs"""
        message_parts = []
        
        # En-t√™te avec niveau de confiance
        if confidence < 0.3:
            message_parts.append(f"‚ùå Requ√™te non reconnue (confiance: {confidence:.3f})")
        else:
            message_parts.append(f"‚ö†Ô∏è Correspondance insuffisante (confiance: {confidence:.3f})")
        
        # Entit√©s manquantes d√©taill√©es
        if missing_entities:
            message_parts.append("\\nüîç ENTIT√âS MANQUANTES:")
            for entity_type, entity_info in missing_entities.items():
                examples = ", ".join(entity_info['examples'][:3])
                message_parts.append(f"   ‚Ä¢ {entity_info['description']}")
                message_parts.append(f"     Exemples: {examples}")
        
        # Suggestions d'am√©lioration
        message_parts.append("\\nüí° SUGGESTIONS:")
        
        if best_match:
            message_parts.append(f"   ‚Ä¢ Essayez: '{best_match['description']}'")
            
            # Suggestions sp√©cifiques selon les entit√©s manquantes
            if 'duration' in missing_entities:
                message_parts.append("   ‚Ä¢ Pr√©cisez la dur√©e: '10min', '1h', '30s'")
            if 'repetitions' in missing_entities:
                message_parts.append("   ‚Ä¢ Ajoutez le nombre: '3x', '5*', '8 fois'")
            if 'intensity' in missing_entities:
                message_parts.append("   ‚Ä¢ Sp√©cifiez l'intensit√©: 'tempo', 'VO2max', '85%'")
            if 'recovery' in missing_entities:
                message_parts.append("   ‚Ä¢ Indiquez la r√©cup√©ration: '5min recup', '2min repos'")
            if 'structure' in missing_entities:
                message_parts.append("   ‚Ä¢ Pr√©cisez la structure: 'pyramide', 's√©rie', 'blocs'")
        
        return "\\n".join(message_parts)
    
    def validate_query_enhanced(self, query: str) -> Dict:
        """Validation compl√®te avec correction orthographique et analyse d√©taill√©e"""
        original_query = query
        correction_info = None
        
        # √âTAPE 1: Correction orthographique si disponible
        if self.spell_checker:
            correction_result = self.spell_checker.correct_query(query)
            if correction_result['has_corrections']:
                query = correction_result['corrected']
                correction_info = correction_result
        
        # √âTAPE 2: Analyse des entit√©s sur la requ√™te (possiblement corrig√©e)
        entities_analysis, missing_entities = self.detect_missing_entities_detailed(query)
        
        # √âTAPE 3: Recherche dans le corpus
        best_match, confidence, score_components = self.search_corpus_enhanced(query)
        
        # √âTAPE 4: Bonus de confiance si correction r√©ussie
        if correction_info and correction_info['has_corrections']:
            # Calculer confiance de correction
            correction_confidence = self.spell_checker.get_correction_confidence(
                original_query, query
            )
            # Appliquer bonus proportionnel √† la qualit√© de la correction
            confidence += (correction_confidence * 0.1)  # Bonus max 10%
            confidence = min(confidence, 0.99)
        
        # Fallback si aucun match
        if best_match is None:
            best_match = self.corpus_df.iloc[0].to_dict()
            confidence = 0.1
        
        # √âTAPE 5: D√©termination de la validit√© avec messages am√©lior√©s
        if confidence >= self.config.CONFIDENCE_THRESHOLD_HIGH:
            is_valid = True
            if correction_info and correction_info['has_corrections']:
                corrections_text = ', '.join(correction_info['corrections'])
                message = f"‚úÖ Excellence (confiance: {confidence:.3f}) - G√©n√©ration autoris√©e\\nüìù Corrections appliqu√©es: {corrections_text}"
            else:
                message = f"‚úÖ Excellence (confiance: {confidence:.3f}) - G√©n√©ration autoris√©e"
        elif confidence >= self.config.CONFIDENCE_THRESHOLD_MEDIUM:
            is_valid = True
            if correction_info and correction_info['has_corrections']:
                corrections_text = ', '.join(correction_info['corrections'])
                message = f"‚ö†Ô∏è Bonne correspondance (confiance: {confidence:.3f}) - G√©n√©ration avec avertissement\\nüìù Corrections appliqu√©es: {corrections_text}"
            else:
                message = f"‚ö†Ô∏è Bonne correspondance (confiance: {confidence:.3f}) - G√©n√©ration avec avertissement"
        else:
            is_valid = False
            base_message = self.generate_detailed_error_message(query, missing_entities, best_match, confidence)
            if correction_info and correction_info['has_corrections']:
                corrections_text = ', '.join(correction_info['corrections'])
                message = f"{base_message}\\n\\nüìù Corrections tent√©es: {corrections_text}\\nüí° Requ√™te corrig√©e: '{query}'"
            else:
                message = base_message
        
        return {
            'original_query': original_query,
            'query': query,
            'correction_info': correction_info,
            'confidence': confidence,
            'score_components': score_components,
            'corpus_match': best_match,
            'is_valid': is_valid,
            'message': message,
            'entities_analysis': entities_analysis,
            'missing_entities': list(missing_entities.keys()),
            'missing_entities_detailed': missing_entities
        }

# Initialisation du validateur avec correction orthographique
enhanced_validator = EnhancedValidator(enhanced_corpus_df, spell_checker)
print("üîç Validateur Enhanced V2 avec correction orthographique initialis√©")
print("   ‚úÖ Messages d'erreur d√©taill√©s")
print("   ‚úÖ Scoring avanc√© avec pond√©ration")
print("   ‚úÖ Support structures complexes")
print("   ‚úÖ Correction orthographique intelligente")


üîç Validateur Enhanced V2 avec correction orthographique initialis√©
   ‚úÖ Messages d'erreur d√©taill√©s
   ‚úÖ Scoring avanc√© avec pond√©ration
   ‚úÖ Support structures complexes
   ‚úÖ Correction orthographique intelligente


In [6]:
# 6. Fonction de d√©monstration Enhanced avec correction orthographique
def demonstrate_enhanced_pipeline_v2(query: str):
    """D√©monstration du pipeline Enhanced V2 avec correction orthographique et analyses d√©taill√©es"""
    
    print(f"\nüîç REQU√äTE ORIGINALE: '{query}'")
    print("=" * 80)
    
    # Validation enhanced avec correction orthographique
    validation = enhanced_validator.validate_query_enhanced(query)
    
    # Affichage des corrections si pr√©sentes
    if validation.get('correction_info') and validation['correction_info']['has_corrections']:
        print(f"üìù CORRECTIONS ORTHOGRAPHIQUES:")
        print(f"   Requ√™te corrig√©e: '{validation['query']}'")
        print(f"   Corrections: {', '.join(validation['correction_info']['corrections'])}")
        correction_confidence = spell_checker.get_correction_confidence(
            validation['original_query'], validation['query']
        )
        print(f"   Confiance correction: {correction_confidence:.3f}")
    
    # Affichage des r√©sultats
    print(f"\nüìä ANALYSE DE CONFIANCE:")
    print(f"   Score global: {validation['confidence']:.3f}")
    
    if 'score_components' in validation and validation['score_components']:
        print(f"   Composants du score:")
        for component, score in validation['score_components'].items():
            if score > 0:
                print(f"     ‚Ä¢ {component}: {score:.3f}")
    
    print(f"\nüìã R√âSULTAT:")
    print(validation['message'])
    
    # Analyse des entit√©s si rejet
    if not validation['is_valid'] and validation['missing_entities']:
        print(f"\nüîç ANALYSE DES ENTIT√âS:")
        for entity_type, details in validation['missing_entities_detailed'].items():
            print(f"   ‚ùå {entity_type}: {details['description']}")
            print(f"      Exemples: {', '.join(details['examples'][:2])}")
    
    # Correspondance trouv√©e
    if validation['corpus_match']:
        match = validation['corpus_match']
        print(f"\nüéØ MEILLEURE CORRESPONDANCE:")
        print(f"   Description: {match['description']}")
        print(f"   Zone: {match['zone']}")
        print(f"   Complexit√©: {match.get('complexity', 'N/A')}")
        if 'structure' in match:
            print(f"   Structure: {match['structure']}")
    
    return validation

print("üöÄ Fonction de d√©monstration Enhanced V2 pr√™te")
print("   Usage: demonstrate_enhanced_pipeline_v2('votre requ√™te')")


üöÄ Fonction de d√©monstration Enhanced V2 pr√™te
   Usage: demonstrate_enhanced_pipeline_v2('votre requ√™te')


In [7]:
# Test 1: Structure complexe - Pyramide VO2max
demonstrate_enhanced_pipeline_v2("pyramide VO2max 1-2-3-4-3-2-1min")



üîç REQU√äTE ORIGINALE: 'pyramide VO2max 1-2-3-4-3-2-1min'

üìä ANALYSE DE CONFIANCE:
   Score global: 0.733
   Composants du score:
     ‚Ä¢ zone_match: 0.900
     ‚Ä¢ duration_match: 1.200
     ‚Ä¢ structure_match: 1.800

üìã R√âSULTAT:
‚ö†Ô∏è Correspondance insuffisante (confiance: 0.733)\n\nüîç ENTIT√âS MANQUANTES:\n   ‚Ä¢ Nombre de r√©p√©titions\n     Exemples: 3x, 5*, 8√ó\n   ‚Ä¢ Temps de r√©cup√©ration entre intervalles\n     Exemples: 5min recup, 2min repos, 90s recovery\n\nüí° SUGGESTIONS:\n   ‚Ä¢ Essayez: 'pyramide 1-2-3-4-3-2-1min VO2max avec 1min recup'\n   ‚Ä¢ Ajoutez le nombre: '3x', '5*', '8 fois'\n   ‚Ä¢ Indiquez la r√©cup√©ration: '5min recup', '2min repos'

üîç ANALYSE DES ENTIT√âS:
   ‚ùå repetitions: Nombre de r√©p√©titions
      Exemples: 3x, 5*
   ‚ùå recovery: Temps de r√©cup√©ration entre intervalles
      Exemples: 5min recup, 2min repos

üéØ MEILLEURE CORRESPONDANCE:
   Description: pyramide 1-2-3-4-3-2-1min VO2max avec 1min recup
   Zone: VO2max
   Co

{'original_query': 'pyramide VO2max 1-2-3-4-3-2-1min',
 'query': 'pyramide VO2max 1-2-3-4-3-2-1min',
 'correction_info': None,
 'confidence': 0.7333333333333332,
 'score_components': {'exact_match': 0,
  'zone_match': 0.8999999999999999,
  'duration_match': 1.2,
  'repetition_match': 0.0,
  'structure_match': 1.8},
 'corpus_match': {'id': 10,
  'description': 'pyramide 1-2-3-4-3-2-1min VO2max avec 1min recup',
  'zone': 'VO2max',
  'structure': {'type': 'pyramid',
   'pattern': [1, 2, 3, 4, 3, 2, 1],
   'recovery': 1},
  'power_zone': 5,
  'intensity': 112,
  'complexity': 'complex',
  'variations': ['pyramide 1234321 VO2',
   'pyramid 1-2-3-4-3-2-1min',
   'pyramide VO2max classique'],
  'is_variation': nan},
 'is_valid': False,
 'message': "‚ö†Ô∏è Correspondance insuffisante (confiance: 0.733)\\n\\nüîç ENTIT√âS MANQUANTES:\\n   ‚Ä¢ Nombre de r√©p√©titions\\n     Exemples: 3x, 5*, 8√ó\\n   ‚Ä¢ Temps de r√©cup√©ration entre intervalles\\n     Exemples: 5min recup, 2min repos, 90s reco

In [8]:
# Test de correction orthographique 1: Fautes phon√©tiques
print("üß™ TEST 1: Fautes phon√©tiques")
demonstrate_enhanced_pipeline_v2("3x 10min tempos avec 5min r√©cup aerobik")


üß™ TEST 1: Fautes phon√©tiques

üîç REQU√äTE ORIGINALE: '3x 10min tempos avec 5min r√©cup aerobik'
üìù CORRECTIONS ORTHOGRAPHIQUES:
   Requ√™te corrig√©e: '3x 10min tempo avec 5min recup aerobic'
   Corrections: 'tempos' ‚Üí 'tempo', 'r√©cup' ‚Üí 'recup', 'aerobik' ‚Üí 'aerobic'
   Confiance correction: 0.935

üìä ANALYSE DE CONFIANCE:
   Score global: 0.590
   Composants du score:
     ‚Ä¢ zone_match: 0.450
     ‚Ä¢ duration_match: 0.720
     ‚Ä¢ repetition_match: 0.600

üìã R√âSULTAT:
‚ö†Ô∏è Correspondance insuffisante (confiance: 0.590)\n\nüîç ENTIT√âS MANQUANTES:\n   ‚Ä¢ Temps de r√©cup√©ration entre intervalles\n     Exemples: 5min recup, 2min repos, 90s recovery\n   ‚Ä¢ Structure complexe de l'entra√Ænement\n     Exemples: pyramide, s√©rie, 2 blocs\n\nüí° SUGGESTIONS:\n   ‚Ä¢ Essayez: '3x 10 min tempo sweet spot avec 5min recup'\n   ‚Ä¢ Indiquez la r√©cup√©ration: '5min recup', '2min repos'\n   ‚Ä¢ Pr√©cisez la structure: 'pyramide', 's√©rie', 'blocs'\n\nüìù Corrections 

{'original_query': '3x 10min tempos avec 5min r√©cup aerobik',
 'query': '3x 10min tempo avec 5min recup aerobic',
 'correction_info': {'original': '3x 10min tempos avec 5min r√©cup aerobik',
  'corrected': '3x 10min tempo avec 5min recup aerobic',
  'corrections': ["'tempos' ‚Üí 'tempo'",
   "'r√©cup' ‚Üí 'recup'",
   "'aerobik' ‚Üí 'aerobic'"],
  'has_corrections': True},
 'confidence': 0.5901731601731601,
 'score_components': {'exact_match': 0,
  'zone_match': 0.44999999999999996,
  'duration_match': 0.7200000000000001,
  'repetition_match': 0.6,
  'structure_match': 0.0},
 'corpus_match': {'id': 1,
  'description': '3x 10 min tempo sweet spot avec 5min recup',
  'zone': 'Tempo',
  'structure': {'type': 'intervals', 'reps': 3, 'duration': 10, 'recovery': 5},
  'power_zone': 3,
  'intensity': 85,
  'complexity': 'simple',
  'variations': ['3x10min tempo 5minrec',
   '3*10min sweet spot',
   '3 fois 10 minutes tempo'],
  'is_variation': nan},
 'is_valid': False,
 'message': "‚ö†Ô∏è Co

In [9]:
# Test de correction orthographique 2: Fautes de frappe communes
print("\nüß™ TEST 2: Fautes de frappe communes")
demonstrate_enhanced_pipeline_v2("piramide vo2 max 1-2-3-4-3-2-1 minuts avec 1mn recupe")



üß™ TEST 2: Fautes de frappe communes

üîç REQU√äTE ORIGINALE: 'piramide vo2 max 1-2-3-4-3-2-1 minuts avec 1mn recupe'
üìù CORRECTIONS ORTHOGRAPHIQUES:
   Requ√™te corrig√©e: 'pyramide 2 min 1 - 2 - 3 - 4 - 3 - 2 - 1 min avec 1 recup'
   Corrections: 'piramide' ‚Üí 'pyramide', 'max' ‚Üí 'min', 'minuts' ‚Üí 'min', 'recupe' ‚Üí 'recup'
   Confiance correction: 0.764

üìä ANALYSE DE CONFIANCE:
   Score global: 0.710
   Composants du score:
     ‚Ä¢ duration_match: 1.200
     ‚Ä¢ structure_match: 1.800

üìã R√âSULTAT:
‚ö†Ô∏è Correspondance insuffisante (confiance: 0.710)\n\nüîç ENTIT√âS MANQUANTES:\n   ‚Ä¢ Nombre de r√©p√©titions\n     Exemples: 3x, 5*, 8√ó\n   ‚Ä¢ Zone d'intensit√© ou pourcentage\n     Exemples: VO2max, tempo, seuil\n   ‚Ä¢ Temps de r√©cup√©ration entre intervalles\n     Exemples: 5min recup, 2min repos, 90s recovery\n\nüí° SUGGESTIONS:\n   ‚Ä¢ Essayez: 'pyramide 1-2-3-4-3-2-1min VO2max avec 1min recup'\n   ‚Ä¢ Ajoutez le nombre: '3x', '5*', '8 fois'\n   ‚Ä¢ Sp√©c

{'original_query': 'piramide vo2 max 1-2-3-4-3-2-1 minuts avec 1mn recupe',
 'query': 'pyramide 2 min 1 - 2 - 3 - 4 - 3 - 2 - 1 min avec 1 recup',
 'correction_info': {'original': 'piramide vo2 max 1-2-3-4-3-2-1 minuts avec 1mn recupe',
  'corrected': 'pyramide 2 min 1 - 2 - 3 - 4 - 3 - 2 - 1 min avec 1 recup',
  'corrections': ["'piramide' ‚Üí 'pyramide'",
   "'max' ‚Üí 'min'",
   "'minuts' ‚Üí 'min'",
   "'recupe' ‚Üí 'recup'"],
  'has_corrections': True},
 'confidence': 0.7096969696969697,
 'score_components': {'exact_match': 0,
  'zone_match': 0.0,
  'duration_match': 1.2,
  'repetition_match': 0.0,
  'structure_match': 1.8},
 'corpus_match': {'id': 10,
  'description': 'pyramide 1-2-3-4-3-2-1min VO2max avec 1min recup',
  'zone': 'VO2max',
  'structure': {'type': 'pyramid',
   'pattern': [1, 2, 3, 4, 3, 2, 1],
   'recovery': 1},
  'power_zone': 5,
  'intensity': 112,
  'complexity': 'complex',
  'variations': ['pyramide 1234321 VO2',
   'pyramid 1-2-3-4-3-2-1min',
   'pyramide VO2

In [10]:
# Test de correction orthographique 3: M√©lange de fautes
print("\nüß™ TEST 3: M√©lange de fautes diverses")
demonstrate_enhanced_pipeline_v2("2 s√©ries de 4x3mn vo2max avec 90s recupe et 5mn entre seri√©s")



üß™ TEST 3: M√©lange de fautes diverses

üîç REQU√äTE ORIGINALE: '2 s√©ries de 4x3mn vo2max avec 90s recupe et 5mn entre seri√©s'
üìù CORRECTIONS ORTHOGRAPHIQUES:
   Requ√™te corrig√©e: '2 series de 4x 3 2 avec 90s recup et 5 entre series'
   Corrections: 's√©ries' ‚Üí 'series', 'recupe' ‚Üí 'recup', 'seri√©s' ‚Üí 'series'
   Confiance correction: 0.865

üìä ANALYSE DE CONFIANCE:
   Score global: 0.786
   Composants du score:
     ‚Ä¢ duration_match: 1.200
     ‚Ä¢ repetition_match: 0.600
     ‚Ä¢ structure_match: 1.800

üìã R√âSULTAT:
‚ö†Ô∏è Bonne correspondance (confiance: 0.786) - G√©n√©ration avec avertissement\nüìù Corrections appliqu√©es: 's√©ries' ‚Üí 'series', 'recupe' ‚Üí 'recup', 'seri√©s' ‚Üí 'series'

üéØ MEILLEURE CORRESPONDANCE:
   Description: 2 series de 4x3min VO2max avec 90s recup et 5min entre series
   Zone: VO2max
   Complexit√©: complex
   Structure: {'type': 'series', 'sets': 2, 'reps': 4, 'duration': 3, 'recovery_intra': 1.5, 'recovery_inter': 5}


{'original_query': '2 s√©ries de 4x3mn vo2max avec 90s recupe et 5mn entre seri√©s',
 'query': '2 series de 4x 3 2 avec 90s recup et 5 entre series',
 'correction_info': {'original': '2 s√©ries de 4x3mn vo2max avec 90s recupe et 5mn entre seri√©s',
  'corrected': '2 series de 4x 3 2 avec 90s recup et 5 entre series',
  'corrections': ["'s√©ries' ‚Üí 'series'",
   "'recupe' ‚Üí 'recup'",
   "'seri√©s' ‚Üí 'series'"],
  'has_corrections': True},
 'confidence': 0.7864864864864863,
 'score_components': {'exact_match': 0,
  'zone_match': 0.0,
  'duration_match': 1.2,
  'repetition_match': 0.6,
  'structure_match': 1.8},
 'corpus_match': {'id': 20,
  'description': '2 series de 4x3min VO2max avec 90s recup et 5min entre series',
  'zone': 'VO2max',
  'structure': {'type': 'series',
   'sets': 2,
   'reps': 4,
   'duration': 3,
   'recovery_intra': 1.5,
   'recovery_inter': 5},
  'power_zone': 5,
  'intensity': 115,
  'complexity': 'complex',
  'variations': ['2*(4x3min VO2)',
   '2 blocs 4x3

In [11]:
# Test de correction orthographique 4: Fautes graves avec abr√©viations
print("\nüß™ TEST 4: Fautes graves avec abr√©viations")
demonstrate_enhanced_pipeline_v2("1h endurense aerobique tranquile z2")



üß™ TEST 4: Fautes graves avec abr√©viations

üîç REQU√äTE ORIGINALE: '1h endurense aerobique tranquile z2'
üìù CORRECTIONS ORTHOGRAPHIQUES:
   Requ√™te corrig√©e: '1h endurance aerobic tranquille 2'
   Corrections: 'endurense' ‚Üí 'endurance', 'aerobique' ‚Üí 'aerobic', 'tranquile' ‚Üí 'tranquille'
   Confiance correction: 0.853

üìä ANALYSE DE CONFIANCE:
   Score global: 0.512
   Composants du score:
     ‚Ä¢ zone_match: 0.900
     ‚Ä¢ duration_match: 0.240

üìã R√âSULTAT:
‚ö†Ô∏è Correspondance insuffisante (confiance: 0.512)\n\nüîç ENTIT√âS MANQUANTES:\n   ‚Ä¢ Nombre de r√©p√©titions\n     Exemples: 3x, 5*, 8√ó\n   ‚Ä¢ Temps de r√©cup√©ration entre intervalles\n     Exemples: 5min recup, 2min repos, 90s recovery\n   ‚Ä¢ Structure complexe de l'entra√Ænement\n     Exemples: pyramide, s√©rie, 2 blocs\n\nüí° SUGGESTIONS:\n   ‚Ä¢ Essayez: '1h endurance allure aerobic tranquille'\n   ‚Ä¢ Ajoutez le nombre: '3x', '5*', '8 fois'\n   ‚Ä¢ Indiquez la r√©cup√©ration: '5min recup', '2m

{'original_query': '1h endurense aerobique tranquile z2',
 'query': '1h endurance aerobic tranquille 2',
 'correction_info': {'original': '1h endurense aerobique tranquile z2',
  'corrected': '1h endurance aerobic tranquille 2',
  'corrections': ["'endurense' ‚Üí 'endurance'",
   "'aerobique' ‚Üí 'aerobic'",
   "'tranquile' ‚Üí 'tranquille'"],
  'has_corrections': True},
 'confidence': 0.5119607843137255,
 'score_components': {'exact_match': 0,
  'zone_match': 0.8999999999999999,
  'duration_match': 0.24,
  'repetition_match': 0.0,
  'structure_match': 0.0},
 'corpus_match': {'id': 0,
  'description': '1h endurance allure aerobic tranquille',
  'zone': 'Aerobic',
  'structure': {'type': 'continuous', 'duration': 60},
  'power_zone': 2,
  'intensity': 65,
  'complexity': 'simple',
  'variations': ['1h endur aerobic',
   '60min endurance facile',
   '1 heure aerobic tranquil'],
  'is_variation': nan},
 'is_valid': False,
 'message': "‚ö†Ô∏è Correspondance insuffisante (confiance: 0.512)\

In [12]:
# Test de correction orthographique 5: Fautes extr√™mes
print("\nüß™ TEST 5: Fautes extr√™mes et accents")
demonstrate_enhanced_pipeline_v2("8x3mn over-undre 95-105% sueil avec 2mn r√©pos")



üß™ TEST 5: Fautes extr√™mes et accents

üîç REQU√äTE ORIGINALE: '8x3mn over-undre 95-105% sueil avec 2mn r√©pos'
üìù CORRECTIONS ORTHOGRAPHIQUES:
   Requ√™te corrig√©e: '8x 3 over - under 95 - 105% seuil avec 2 repos'
   Corrections: 'undre' ‚Üí 'under', 'sueil' ‚Üí 'seuil', 'r√©pos' ‚Üí 'repos'
   Confiance correction: 0.835

üìä ANALYSE DE CONFIANCE:
   Score global: 0.834
   Composants du score:
     ‚Ä¢ zone_match: 0.450
     ‚Ä¢ duration_match: 1.200
     ‚Ä¢ repetition_match: 0.600
     ‚Ä¢ structure_match: 1.800

üìã R√âSULTAT:
‚ö†Ô∏è Bonne correspondance (confiance: 0.834) - G√©n√©ration avec avertissement\nüìù Corrections appliqu√©es: 'undre' ‚Üí 'under', 'sueil' ‚Üí 'seuil', 'r√©pos' ‚Üí 'repos'

üéØ MEILLEURE CORRESPONDANCE:
   Description: over-under 8x3min 95-105% FTP avec 2min recup
   Zone: Threshold
   Complexit√©: complex
   Structure: {'type': 'over_under', 'reps': 8, 'duration': 3, 'low': 95, 'high': 105, 'recovery': 2}


{'original_query': '8x3mn over-undre 95-105% sueil avec 2mn r√©pos',
 'query': '8x 3 over - under 95 - 105% seuil avec 2 repos',
 'correction_info': {'original': '8x3mn over-undre 95-105% sueil avec 2mn r√©pos',
  'corrected': '8x 3 over - under 95 - 105% seuil avec 2 repos',
  'corrections': ["'undre' ‚Üí 'under'",
   "'sueil' ‚Üí 'seuil'",
   "'r√©pos' ‚Üí 'repos'"],
  'has_corrections': True},
 'confidence': 0.8335164835164834,
 'score_components': {'exact_match': 0,
  'zone_match': 0.44999999999999996,
  'duration_match': 1.2,
  'repetition_match': 0.6,
  'structure_match': 1.8},
 'corpus_match': {'id': 30,
  'description': 'over-under 8x3min 95-105% FTP avec 2min recup',
  'zone': 'Threshold',
  'structure': {'type': 'over_under',
   'reps': 8,
   'duration': 3,
   'low': 95,
   'high': 105,
   'recovery': 2},
  'power_zone': 4,
  'intensity': 100,
  'complexity': 'complex',
  'variations': ['over under 95-105%', '8x3min over-under', 'sur-sous seuil'],
  'is_variation': nan},
 'is

In [13]:
# Fonction pour tester sp√©cifiquement la correction orthographique
def test_spell_correction(query):
    """Test isol√© du correcteur orthographique"""
    print(f"\nüìù TEST CORRECTION: '{query}'")
    print("-" * 60)
    
    correction_result = spell_checker.correct_query(query)
    
    print(f"Original: {correction_result['original']}")
    print(f"Corrig√©:  {correction_result['corrected']}")
    
    if correction_result['has_corrections']:
        print(f"Corrections: {', '.join(correction_result['corrections'])}")
        confidence = spell_checker.get_correction_confidence(
            correction_result['original'], 
            correction_result['corrected']
        )
        print(f"Confiance: {confidence:.3f}")
    else:
        print("Aucune correction n√©cessaire")
    
    return correction_result

print("üîß Fonction de test de correction orthographique pr√™te")


üîß Fonction de test de correction orthographique pr√™te


In [14]:
# Tests isol√©s de correction orthographique
test_cases = [
    "aerobik",
    "piramide vo2 max",
    "recupe",
    "minuts",
    "seuille",
    "s√©ries",
    "intervales",
    "endurense",
    "over-undre"
]

print("üß™ TESTS ISOL√âS DE CORRECTION ORTHOGRAPHIQUE")
print("=" * 60)

for test_case in test_cases:
    test_spell_correction(test_case)


üß™ TESTS ISOL√âS DE CORRECTION ORTHOGRAPHIQUE

üìù TEST CORRECTION: 'aerobik'
------------------------------------------------------------
Original: aerobik
Corrig√©:  aerobic
Corrections: 'aerobik' ‚Üí 'aerobic'
Confiance: 0.857

üìù TEST CORRECTION: 'piramide vo2 max'
------------------------------------------------------------
Original: piramide vo2 max
Corrig√©:  pyramide 2 min
Corrections: 'piramide' ‚Üí 'pyramide', 'max' ‚Üí 'min'
Confiance: 0.733

üìù TEST CORRECTION: 'recupe'
------------------------------------------------------------
Original: recupe
Corrig√©:  recup
Corrections: 'recupe' ‚Üí 'recup'
Confiance: 0.909

üìù TEST CORRECTION: 'minuts'
------------------------------------------------------------
Original: minuts
Corrig√©:  min
Corrections: 'minuts' ‚Üí 'min'
Confiance: 0.667

üìù TEST CORRECTION: 'seuille'
------------------------------------------------------------
Original: seuille
Corrig√©:  seuil
Corrections: 'seuille' ‚Üí 'seuil'
Confiance: 0.833

üìù

In [15]:
# Test 2: Structure complexe - S√©ries
demonstrate_enhanced_pipeline_v2("2 series de 4x3min VO2max")



üîç REQU√äTE ORIGINALE: '2 series de 4x3min VO2max'

üìä ANALYSE DE CONFIANCE:
   Score global: 0.773
   Composants du score:
     ‚Ä¢ zone_match: 0.900
     ‚Ä¢ duration_match: 0.960
     ‚Ä¢ repetition_match: 0.600
     ‚Ä¢ structure_match: 1.800

üìã R√âSULTAT:
‚ö†Ô∏è Bonne correspondance (confiance: 0.773) - G√©n√©ration avec avertissement

üéØ MEILLEURE CORRESPONDANCE:
   Description: 2 series de 4x3min VO2max avec 90s recup et 5min entre series
   Zone: VO2max
   Complexit√©: complex
   Structure: {'type': 'series', 'sets': 2, 'reps': 4, 'duration': 3, 'recovery_intra': 1.5, 'recovery_inter': 5}


{'original_query': '2 series de 4x3min VO2max',
 'query': '2 series de 4x3min VO2max',
 'correction_info': None,
 'confidence': 0.7733333333333332,
 'score_components': {'exact_match': 0,
  'zone_match': 0.8999999999999999,
  'duration_match': 0.96,
  'repetition_match': 0.6,
  'structure_match': 1.8},
 'corpus_match': {'id': 20,
  'description': '2 series de 4x3min VO2max avec 90s recup et 5min entre series',
  'zone': 'VO2max',
  'structure': {'type': 'series',
   'sets': 2,
   'reps': 4,
   'duration': 3,
   'recovery_intra': 1.5,
   'recovery_inter': 5},
  'power_zone': 5,
  'intensity': 115,
  'complexity': 'complex',
  'variations': ['2*(4x3min VO2)',
   '2 blocs 4x3min VO2max',
   '2 series 4x3min avec 90s'],
  'is_variation': nan},
 'is_valid': True,
 'message': '‚ö†Ô∏è Bonne correspondance (confiance: 0.773) - G√©n√©ration avec avertissement',
 'entities_analysis': {'duration': {'found': True,
   'matches': ['s', 'min'],
   'examples': ['10min', '1h', '30s', '45 minutes'],
   

In [16]:
# Test 3: Requ√™te incompl√®te - Messages d'erreur d√©taill√©s
demonstrate_enhanced_pipeline_v2("entra√Ænement pyramide")



üîç REQU√äTE ORIGINALE: 'entra√Ænement pyramide'

üìä ANALYSE DE CONFIANCE:
   Score global: 0.500
   Composants du score:
     ‚Ä¢ structure_match: 1.800

üìã R√âSULTAT:
‚ö†Ô∏è Correspondance insuffisante (confiance: 0.500)\n\nüîç ENTIT√âS MANQUANTES:\n   ‚Ä¢ Dur√©e des intervalles ou de la s√©ance\n     Exemples: 10min, 1h, 30s\n   ‚Ä¢ Nombre de r√©p√©titions\n     Exemples: 3x, 5*, 8√ó\n   ‚Ä¢ Zone d'intensit√© ou pourcentage\n     Exemples: VO2max, tempo, seuil\n   ‚Ä¢ Temps de r√©cup√©ration entre intervalles\n     Exemples: 5min recup, 2min repos, 90s recovery\n\nüí° SUGGESTIONS:\n   ‚Ä¢ Essayez: 'pyramide 1-2-3-4-3-2-1min VO2max avec 1min recup'\n   ‚Ä¢ Pr√©cisez la dur√©e: '10min', '1h', '30s'\n   ‚Ä¢ Ajoutez le nombre: '3x', '5*', '8 fois'\n   ‚Ä¢ Sp√©cifiez l'intensit√©: 'tempo', 'VO2max', '85%'\n   ‚Ä¢ Indiquez la r√©cup√©ration: '5min recup', '2min repos'

üîç ANALYSE DES ENTIT√âS:
   ‚ùå duration: Dur√©e des intervalles ou de la s√©ance
      Exemples: 10min, 1h
   

{'original_query': 'entra√Ænement pyramide',
 'query': 'entra√Ænement pyramide',
 'correction_info': None,
 'confidence': 0.49999999999999994,
 'score_components': {'exact_match': 0,
  'zone_match': 0.0,
  'duration_match': 0.0,
  'repetition_match': 0.0,
  'structure_match': 1.8},
 'corpus_match': {'id': 10,
  'description': 'pyramide 1-2-3-4-3-2-1min VO2max avec 1min recup',
  'zone': 'VO2max',
  'structure': {'type': 'pyramid',
   'pattern': [1, 2, 3, 4, 3, 2, 1],
   'recovery': 1},
  'power_zone': 5,
  'intensity': 112,
  'complexity': 'complex',
  'variations': ['pyramide 1234321 VO2',
   'pyramid 1-2-3-4-3-2-1min',
   'pyramide VO2max classique'],
  'is_variation': nan},
 'is_valid': False,
 'message': "‚ö†Ô∏è Correspondance insuffisante (confiance: 0.500)\\n\\nüîç ENTIT√âS MANQUANTES:\\n   ‚Ä¢ Dur√©e des intervalles ou de la s√©ance\\n     Exemples: 10min, 1h, 30s\\n   ‚Ä¢ Nombre de r√©p√©titions\\n     Exemples: 3x, 5*, 8√ó\\n   ‚Ä¢ Zone d'intensit√© ou pourcentage\\n     Exem

In [17]:
# Test 4: Requ√™te tr√®s vague - Suggestions constructives
demonstrate_enhanced_pipeline_v2("intervalles")



üîç REQU√äTE ORIGINALE: 'intervalles'

üìä ANALYSE DE CONFIANCE:
   Score global: 0.300
   Composants du score:

üìã R√âSULTAT:
‚ö†Ô∏è Correspondance insuffisante (confiance: 0.300)\n\nüîç ENTIT√âS MANQUANTES:\n   ‚Ä¢ Dur√©e des intervalles ou de la s√©ance\n     Exemples: 10min, 1h, 30s\n   ‚Ä¢ Nombre de r√©p√©titions\n     Exemples: 3x, 5*, 8√ó\n   ‚Ä¢ Zone d'intensit√© ou pourcentage\n     Exemples: VO2max, tempo, seuil\n   ‚Ä¢ Temps de r√©cup√©ration entre intervalles\n     Exemples: 5min recup, 2min repos, 90s recovery\n   ‚Ä¢ Structure complexe de l'entra√Ænement\n     Exemples: pyramide, s√©rie, 2 blocs\n\nüí° SUGGESTIONS:\n   ‚Ä¢ Essayez: '1h endurance allure aerobic tranquille'\n   ‚Ä¢ Pr√©cisez la dur√©e: '10min', '1h', '30s'\n   ‚Ä¢ Ajoutez le nombre: '3x', '5*', '8 fois'\n   ‚Ä¢ Sp√©cifiez l'intensit√©: 'tempo', 'VO2max', '85%'\n   ‚Ä¢ Indiquez la r√©cup√©ration: '5min recup', '2min repos'\n   ‚Ä¢ Pr√©cisez la structure: 'pyramide', 's√©rie', 'blocs'

üîç ANALYSE DES

{'original_query': 'intervalles',
 'query': 'intervalles',
 'correction_info': None,
 'confidence': 0.3,
 'score_components': {'exact_match': 0,
  'zone_match': 0.0,
  'duration_match': 0.0,
  'repetition_match': 0.0,
  'structure_match': 0.0},
 'corpus_match': {'id': 0,
  'description': '1h endurance allure aerobic tranquille',
  'zone': 'Aerobic',
  'structure': {'type': 'continuous', 'duration': 60},
  'power_zone': 2,
  'intensity': 65,
  'complexity': 'simple',
  'variations': ['1h endur aerobic',
   '60min endurance facile',
   '1 heure aerobic tranquil'],
  'is_variation': nan},
 'is_valid': False,
 'message': "‚ö†Ô∏è Correspondance insuffisante (confiance: 0.300)\\n\\nüîç ENTIT√âS MANQUANTES:\\n   ‚Ä¢ Dur√©e des intervalles ou de la s√©ance\\n     Exemples: 10min, 1h, 30s\\n   ‚Ä¢ Nombre de r√©p√©titions\\n     Exemples: 3x, 5*, 8√ó\\n   ‚Ä¢ Zone d'intensit√© ou pourcentage\\n     Exemples: VO2max, tempo, seuil\\n   ‚Ä¢ Temps de r√©cup√©ration entre intervalles\\n     Exemples

In [18]:
# Test 5: Structure sp√©cialis√©e - Over-under
demonstrate_enhanced_pipeline_v2("over-under 8x3min FTP")



üîç REQU√äTE ORIGINALE: 'over-under 8x3min FTP'

üìä ANALYSE DE CONFIANCE:
   Score global: 0.670
   Composants du score:
     ‚Ä¢ zone_match: 0.450
     ‚Ä¢ duration_match: 0.480
     ‚Ä¢ repetition_match: 0.600
     ‚Ä¢ structure_match: 1.800

üìã R√âSULTAT:
‚ö†Ô∏è Correspondance insuffisante (confiance: 0.670)\n\nüîç ENTIT√âS MANQUANTES:\n   ‚Ä¢ Zone d'intensit√© ou pourcentage\n     Exemples: VO2max, tempo, seuil\n   ‚Ä¢ Temps de r√©cup√©ration entre intervalles\n     Exemples: 5min recup, 2min repos, 90s recovery\n   ‚Ä¢ Structure complexe de l'entra√Ænement\n     Exemples: pyramide, s√©rie, 2 blocs\n\nüí° SUGGESTIONS:\n   ‚Ä¢ Essayez: 'over-under 8x3min 95-105% FTP avec 2min recup'\n   ‚Ä¢ Sp√©cifiez l'intensit√©: 'tempo', 'VO2max', '85%'\n   ‚Ä¢ Indiquez la r√©cup√©ration: '5min recup', '2min repos'\n   ‚Ä¢ Pr√©cisez la structure: 'pyramide', 's√©rie', 'blocs'

üîç ANALYSE DES ENTIT√âS:
   ‚ùå intensity: Zone d'intensit√© ou pourcentage
      Exemples: VO2max, tempo
   ‚ù

{'original_query': 'over-under 8x3min FTP',
 'query': 'over-under 8x3min FTP',
 'correction_info': None,
 'confidence': 0.6699999999999999,
 'score_components': {'exact_match': 0,
  'zone_match': 0.44999999999999996,
  'duration_match': 0.48,
  'repetition_match': 0.6,
  'structure_match': 1.8},
 'corpus_match': {'id': 30,
  'description': 'over-under 8x3min 95-105% FTP avec 2min recup',
  'zone': 'Threshold',
  'structure': {'type': 'over_under',
   'reps': 8,
   'duration': 3,
   'low': 95,
   'high': 105,
   'recovery': 2},
  'power_zone': 4,
  'intensity': 100,
  'complexity': 'complex',
  'variations': ['over under 95-105%', '8x3min over-under', 'sur-sous seuil'],
  'is_variation': nan},
 'is_valid': False,
 'message': "‚ö†Ô∏è Correspondance insuffisante (confiance: 0.670)\\n\\nüîç ENTIT√âS MANQUANTES:\\n   ‚Ä¢ Zone d'intensit√© ou pourcentage\\n     Exemples: VO2max, tempo, seuil\\n   ‚Ä¢ Temps de r√©cup√©ration entre intervalles\\n     Exemples: 5min recup, 2min repos, 90s reco

In [19]:
# Test final avec de nombreuses fautes
print("üéØ TEST FINAL - CAS EXTR√äME")
print("Requ√™te avec de nombreuses fautes d'orthographe typiques...")

# Cas tr√®s difficile avec multiples fautes
test_query = "3x10mn tempos sueit spote avec 5mn r√©cupe + 2 s√©ri√©s de 4x3mn vo2 maxe avec 90sec repos"

print(f"\\nRequ√™te originale (avec fautes): '{test_query}'\\n")

# Test de correction seule
correction = spell_checker.correct_query(test_query)
print("üìù CORRECTION ORTHOGRAPHIQUE SEULE:")
print(f"Corrections: {', '.join(correction['corrections'])}")
print(f"Requ√™te corrig√©e: '{correction['corrected']}'")

# Test complet du pipeline
print("\\n" + "="*80)
result = demonstrate_enhanced_pipeline_v2(test_query)


üéØ TEST FINAL - CAS EXTR√äME
Requ√™te avec de nombreuses fautes d'orthographe typiques...
\nRequ√™te originale (avec fautes): '3x10mn tempos sueit spote avec 5mn r√©cupe + 2 s√©ri√©s de 4x3mn vo2 maxe avec 90sec repos'\n
üìù CORRECTION ORTHOGRAPHIQUE SEULE:
Corrections: 'tempos' ‚Üí 'tempo', 'sueit' ‚Üí 'sweet', 'spote' ‚Üí 'spot', 'r√©cupe' ‚Üí 'recup', 's√©ri√©s' ‚Üí 'series'
Requ√™te corrig√©e: '3x 10 tempo sweet spot avec 5 recup + 2 series de 4x 3 2 maxe avec 90s repos'

üîç REQU√äTE ORIGINALE: '3x10mn tempos sueit spote avec 5mn r√©cupe + 2 s√©ri√©s de 4x3mn vo2 maxe avec 90sec repos'
üìù CORRECTIONS ORTHOGRAPHIQUES:
   Requ√™te corrig√©e: '3x 10 tempo sweet spot avec 5 recup + 2 series de 4x 3 2 maxe avec 90s repos'
   Corrections: 'tempos' ‚Üí 'tempo', 'sueit' ‚Üí 'sweet', 'spote' ‚Üí 'spot', 'r√©cupe' ‚Üí 'recup', 's√©ri√©s' ‚Üí 'series'
   Confiance correction: 0.847

üìä ANALYSE DE CONFIANCE:
   Score global: 0.785
   Composants du score:
     ‚Ä¢ duration_match: 1.200