üìÑ Script Python : hackernews_fetcher.py

In [None]:
import requests
import json
import argparse
from pathlib import Path

# URLs de l'API HackerNews
TOP_STORIES_URL = "https://hacker-news.firebaseio.com/v0/topstories.json"
ITEM_URL = "https://hacker-news.firebaseio.com/v0/item/{}.json"

# Dossier de stockage
RAW_DATA_DIR = Path("raw_data")
RAW_DATA_DIR.mkdir(parents=True, exist_ok=True)

def get_top_story_ids(limit):
    """
    R√©cup√®re les IDs des meilleures actualit√©s.
    """
    response = requests.get(TOP_STORIES_URL)
    if response.status_code != 200:
        raise Exception(f"Erreur API: {response.status_code}")
    story_ids = response.json()
    return story_ids[:limit]

def get_story_details(story_id):
    """
    R√©cup√®re les d√©tails d'une actualit√©.
    """
    response = requests.get(ITEM_URL.format(story_id))
    if response.status_code != 200:
        print(f"‚ö†Ô∏è Impossible de r√©cup√©rer l'ID {story_id}")
        return None
    return response.json()

def save_to_file(data, filename):
    """
    Sauvegarde les donn√©es JSON dans un fichier.
    """
    file_path = RAW_DATA_DIR / filename
    with open(file_path, 'w', encoding='utf-8') as file:
        json.dump(data, file, ensure_ascii=False, indent=4)
    print(f"‚úÖ Donn√©es sauvegard√©es dans {file_path}")

def main():
    """
    Point d'entr√©e principal.
    """
    parser = argparse.ArgumentParser(description="R√©cup√®re les derni√®res actualit√©s de HackerNews.")
    parser.add_argument("--limit", type=int, default=50, help="Nombre d'actualit√©s √† r√©cup√©rer (d√©faut: 50)")
    args = parser.parse_args()
    
    print("üöÄ R√©cup√©ration des derni√®res actualit√©s de HackerNews...")
    
    # √âtape 1 : R√©cup√©rer les IDs
    story_ids = get_top_story_ids(args.limit)
    print(f"üì∞ {len(story_ids)} IDs r√©cup√©r√©s.")
    
    # √âtape 2 : R√©cup√©rer les d√©tails des actualit√©s
    stories = []
    for i, story_id in enumerate(story_ids, start=1):
        print(f"üîÑ R√©cup√©ration de l'article {i}/{len(story_ids)} (ID: {story_id})")
        story_details = get_story_details(story_id)
        if story_details:
            stories.append(story_details)
    
    # √âtape 3 : Sauvegarder les r√©sultats
    save_to_file(stories, "hackernews_raw_data.json")
    print("üéâ Processus termin√© avec succ√®s !")

if __name__ == "__main__":
    main()

üìå Code Python pour cr√©er l‚Äôindex hackernews dans Elasticsearch

In [1]:
import requests
import json

# URL d'Elasticsearch
url = "http://localhost:9200/hackernews"

# En-t√™tes HTTP
headers = {
    "Content-Type": "application/json"
}

# Corps de la requ√™te (JSON Mapping)
data = {
    "mappings": {
        "properties": {
            "id": {"type": "integer"},
            "title": {"type": "text"},
            "content": {"type": "text"},
            "url": {"type": "keyword"},
            "score": {"type": "integer"},
            "timestamp": {"type": "date"}
        }
    }
}

# Ex√©cution de la requ√™te PUT
response = requests.put(url, headers=headers, data=json.dumps(data))

# Affichage de la r√©ponse
if response.status_code == 200 or response.status_code == 201:
    print("‚úÖ Index 'hackernews' cr√©√© avec succ√®s !")
else:
    print(f"‚ùå Erreur {response.status_code}: {response.text}")

‚úÖ Index 'hackernews' cr√©√© avec succ√®s !
