# Info Checker - Analyseur de Cr√©dibilit√© TikTok

Ce notebook permet d'analyser et v√©rifier la cr√©dibilit√© des informations partag√©es par les influenceurs TikTok.


## 1. Configuration et Import


In [None]:
import sys
from pathlib import Path
import json
from datetime import datetime

# Ajouter le dossier src au path
sys.path.append(str(Path.cwd()))

from src.config import Config
from src.downloader import TikTokDownloader
from src.transcriber import AudioTranscriber
from src.analyzer import LLMAnalyzer
from src.fact_checker import FactChecker
from src.visualizer import ResultVisualizer
from src.storage import ResultStorage

# Valider la configuration
try:
    Config.validate()
    print("‚úÖ Configuration valid√©e")
except ValueError as e:
    print(f"‚ùå Erreur de configuration: {e}")


## 2. Param√®tres d'Analyse


In [None]:
# Choisir le mode d'analyse
analysis_mode = "video"  # "video" ou "user"

# Pour une vid√©o sp√©cifique
video_url = ""  # Exemple: "https://www.tiktok.com/@username/video/1234567890"

# Pour un utilisateur complet
username = ""  # Exemple: "username" (sans @)
max_videos = 5  # Nombre maximum de vid√©os √† analyser

# Configuration LLM
llm_provider = Config.DEFAULT_LLM_PROVIDER  # "openai", "anthropic", ou "local"

# Langue
language = "fr"

print(f"Mode d'analyse: {analysis_mode}")
print(f"Provider LLM: {llm_provider}")


## 3. T√©l√©chargement des Vid√©os


In [None]:
downloader = TikTokDownloader()
video_paths = []
video_metadata_list = []

if analysis_mode == "video":
    if not video_url:
        print("‚ö†Ô∏è Veuillez fournir une URL de vid√©o")
    else:
        print(f"üì• T√©l√©chargement de la vid√©o: {video_url}")
        # R√©cup√©rer les m√©tadonn√©es d'abord
        metadata = downloader.get_video_info(video_url)
        video_metadata_list.append(metadata)
        
        # T√©l√©charger la vid√©o
        video_path = downloader.download_video(video_url)
        video_paths.append(video_path)
        print(f"‚úÖ Vid√©o t√©l√©charg√©e: {video_path.name}")

elif analysis_mode == "user":
    if not username:
        print("‚ö†Ô∏è Veuillez fournir un nom d'utilisateur")
    else:
        print(f"üì• T√©l√©chargement des vid√©os de @{username}...")
        video_paths = downloader.download_user_videos(username, max_videos)
        # Cr√©er des m√©tadonn√©es vides pour chaque vid√©o
        video_metadata_list = [{}] * len(video_paths)
        print(f"‚úÖ {len(video_paths)} vid√©o(s) t√©l√©charg√©e(s)")

print(f"\nTotal de vid√©os √† analyser: {len(video_paths)}")


## 4. Transcription des Vid√©os


In [None]:
transcriber = AudioTranscriber(model_size="base")  # "tiny", "base", "small", "medium", "large"
transcriptions = []

for i, video_path in enumerate(video_paths, 1):
    print(f"\nüé§ Transcription {i}/{len(video_paths)}: {video_path.name}")
    transcription = transcriber.transcribe_video(video_path, language=language)
    transcription['video_path'] = str(video_path)
    transcriptions.append(transcription)
    print(f"‚úÖ Transcription termin√©e ({len(transcription['text'])} caract√®res)")
    print(f"üìù Extrait: {transcription['text'][:200]}...")


## 5. Analyse LLM


In [None]:
analyzer = LLMAnalyzer(provider=llm_provider)
llm_analyses = []

# S'assurer que video_metadata_list a la m√™me longueur que transcriptions
while len(video_metadata_list) < len(transcriptions):
    video_metadata_list.append({})

for i, (transcription, metadata) in enumerate(zip(transcriptions, video_metadata_list), 1):
    print(f"\nü§ñ Analyse LLM {i}/{len(transcriptions)}")
    
    # Analyser
    analysis = analyzer.analyze_content(
        transcription=transcription['text'],
        video_metadata=metadata
    )
    
    llm_analyses.append(analysis)
    print(f"‚úÖ Analyse termin√©e")
    print(f"üìä Extrait: {analysis['analysis'][:300]}...")


## 6. Extraction des Affirmations


In [None]:
# Extraire les affirmations depuis l'analyse LLM
import re

def extract_claims_from_analysis(analysis_text: str) -> list:
    """Extrait les affirmations de l'analyse LLM"""
    patterns = [
        r'(?:affirme|dit|pr√©tend|soutient|d√©clare|assure)[^.]*\.',
        r'(?:selon|d\'apr√®s|selon les)[^.]*\.',
    ]
    
    claims = []
    for pattern in patterns:
        matches = re.findall(pattern, analysis_text, re.IGNORECASE)
        claims.extend(matches)
    
    if not claims:
        sentences = re.split(r'[.!?]+', analysis_text)
        claims = [s.strip() for s in sentences if len(s.strip()) > 20][:5]
    
    return claims[:10]

all_claims = []
for i, analysis in enumerate(llm_analyses):
    claims = extract_claims_from_analysis(analysis['analysis'])
    all_claims.extend(claims)
    print(f"Vid√©o {i+1}: {len(claims)} affirmation(s) extraite(s)")

print(f"\nTotal d'affirmations √† v√©rifier: {len(all_claims)}")


## 7. V√©rification des Faits


In [None]:
fact_checker = FactChecker()

print(f"üîç V√©rification de {len(all_claims)} affirmation(s)...")
fact_check_results = fact_checker.verify_claims(all_claims, language=language)

print("\n‚úÖ V√©rification termin√©e")
print(f"\nR√©sultats par affirmation:")
for claim, result in list(fact_check_results.items())[:3]:
    print(f"\nüìå {claim[:60]}...")
    print(f"   Verdict: {result['verdict']}")
    print(f"   Score: {result['credibility_score']}%")
    print(f"   Sources trouv√©es: {len(result['sources'])}")


## 8. Compilation des R√©sultats


In [None]:
# Compiler tous les r√©sultats
results = {
    'metadata': {
        'source': video_url if analysis_mode == "video" else f"@{username}",
        'analysis_date': datetime.now().isoformat(),
        'video_count': len(video_paths),
        'llm_provider': llm_provider,
        'language': language
    },
    'videos': []
}

# S'assurer que toutes les listes ont la m√™me longueur
while len(video_metadata_list) < len(video_paths):
    video_metadata_list.append({})

for i, (video_path, transcription, llm_analysis, metadata) in enumerate(
    zip(video_paths, transcriptions, llm_analyses, video_metadata_list)
):
    video_claims = extract_claims_from_analysis(llm_analysis['analysis'])
    
    video_fact_check = {}
    if video_claims:
        for claim in video_claims:
            if claim in fact_check_results:
                video_fact_check[claim] = fact_check_results[claim]
    
    if video_fact_check:
        avg_score = sum(r['credibility_score'] for r in video_fact_check.values()) / len(video_fact_check)
        verdicts = [r['verdict'] for r in video_fact_check.values()]
        main_verdict = max(set(verdicts), key=verdicts.count) if verdicts else 'non_verifie'
    else:
        avg_score = 50
        main_verdict = 'non_verifie'
    
    video_result = {
        'title': metadata.get('title', video_path.stem) if metadata else video_path.stem,
        'metadata': metadata if metadata else {},
        'transcription': transcription,
        'llm_analysis': llm_analysis,
        'fact_checking': {
            'credibility_score': int(avg_score),
            'verdict': main_verdict,
            'claims': video_fact_check,
            'sources': []
        }
    }
    
    all_sources = []
    for claim_result in video_fact_check.values():
        all_sources.extend(claim_result.get('sources', []))
        all_sources.extend(claim_result.get('fact_checking_results', []))
        all_sources.extend(claim_result.get('scientific_results', []))
        all_sources.extend(claim_result.get('news_results', []))
    
    seen_urls = set()
    unique_sources = []
    for source in all_sources:
        url = source.get('url', '')
        if url and url not in seen_urls:
            seen_urls.add(url)
            unique_sources.append(source)
    
    video_result['fact_checking']['sources'] = unique_sources[:20]
    results['videos'].append(video_result)

all_scores = [v['fact_checking']['credibility_score'] for v in results['videos']]
all_verdicts = [v['fact_checking']['verdict'] for v in results['videos']]

results['statistics'] = {
    'average_credibility': sum(all_scores) / len(all_scores) if all_scores else 0,
    'verified_count': sum(1 for v in all_verdicts if v != 'non_verifie'),
    'unverified_count': sum(1 for v in all_verdicts if v == 'non_verifie'),
    'verdict_distribution': {v: all_verdicts.count(v) for v in set(all_verdicts)}
}

print("‚úÖ R√©sultats compil√©s")
print(f"\nüìä Statistiques:")
print(f"   Score moyen: {results['statistics']['average_credibility']:.1f}%")
print(f"   Vid√©os v√©rifi√©es: {results['statistics']['verified_count']}")
print(f"   Vid√©os non v√©rifi√©es: {results['statistics']['unverified_count']}")


## 9. Visualisations


In [None]:
import matplotlib.pyplot as plt

visualizer = ResultVisualizer()

print("üìä G√©n√©ration des visualisations...")
fig1 = visualizer.create_credibility_chart(results['videos'])
plt.show()

fig2 = visualizer.create_verdict_pie(results['videos'])
plt.show()

all_transcriptions = [v['transcription']['text'] for v in results['videos']]
fig3 = visualizer.create_wordcloud(all_transcriptions)
plt.show()

fig4 = visualizer.create_timeline_chart(results['videos'])
if fig4:
    plt.show()

print("‚úÖ Visualisations g√©n√©r√©es")


## 10. Sauvegarde des R√©sultats


In [None]:
storage = ResultStorage()

filename_prefix = f"{username if analysis_mode == 'user' else 'video'}"
saved_files = storage.save_results(results, filename_prefix=filename_prefix)

print(f"‚úÖ R√©sultats sauvegard√©s:")
print(f"   JSON: {saved_files['json']}")
print(f"   Markdown: {saved_files['markdown']}")


## 11. Tableau de Bord Interactif


In [None]:
dashboard = visualizer.create_interactive_dashboard(results['videos'])
dashboard.show()

dashboard_path = Config.OUTPUT_DIR / f"dashboard_{datetime.now().strftime('%Y%m%d_%H%M%S')}.html"
dashboard.write_html(str(dashboard_path))
print(f"‚úÖ Tableau de bord sauvegard√©: {dashboard_path}")
