In [0]:
# COMMAND ----------
# 1. INSTALLATION DES LIBRARIES MANQUANTES
# (Databricks fih requests par défaut souvent, mais au cas où)
%pip install requests

# COMMAND ----------
import requests
import json
import time
from datetime import datetime

# --- CONFIGURATION DATABRICKS WIDGETS (Pour ADF) ---
dbutils.widgets.text("sensor_id", "14126423")
TARGET_SENSOR_ID = dbutils.widgets.get("sensor_id")

# --- CONFIGURATION ---
OPENAQ_API_KEY = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
STORAGE_ACCOUNT_NAME = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
STORAGE_ACCOUNT_KEY = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
CONTAINER_NAME = "raw"

# Dates
DATE_DEBUT = "2024-01-01"
DATE_FIN = datetime.now().strftime("%Y-%m-%d")

HEADERS = {"X-API-Key": OPENAQ_API_KEY}

# --- SETUP CONNEXION AZURE (SPARK STYLE) ---
spark.conf.set(
    f"fs.azure.account.key.{STORAGE_ACCOUNT_NAME}.dfs.core.windows.net",
    STORAGE_ACCOUNT_KEY
)

# =============================================================================
# 1. FONCTION UPLOAD (VERSION DATABRICKS)
# =============================================================================
def upload_to_datalake(data, filename):
    # Chemin officiel ABFSS (Cloud Native path)
    path = f"abfss://{CONTAINER_NAME}@{STORAGE_ACCOUNT_NAME}.dfs.core.windows.net/{filename}"
    
    try:
        json_str = json.dumps(data)
        dbutils.fs.put(path, json_str, overwrite=True)
        print(f"✅ Succès ! Fichier stocké sur Azure : {path}")
    except Exception as e:
        print(f"❌ Erreur Upload Databricks : {e}")
        raise e 

# =============================================================================
# 2. LOGIQUE API 
# =============================================================================
def get_history_for_sensor(sensor_id):
    url = f"https://api.openaq.org/v3/sensors/{sensor_id}/measurements"
    all_measurements = []
    page = 1
    
    print(f"⏳ Téléchargement historique Capteur {sensor_id}...")
    
    while True:
        params = {
            "date_from": DATE_DEBUT, "date_to": DATE_FIN,
            "limit": 1000, "page": page
        }
        try:
            res = requests.get(url, headers=HEADERS, params=params)
            if res.status_code != 200:
                print(f"⚠️ Erreur API : {res.status_code}")
                break
            
            data = res.json().get('results', [])
            if not data:
                break
                
            all_measurements.extend(data)
            print(f" -> Page {page} récupérée ({len(data)} lignes)...")
            page += 1
            time.sleep(0.1)
            
        except Exception as e:
            print(f"❌ Erreur connexion : {e}")
            break
            
    return all_measurements

# =============================================================================
# 3. MAIN
# =============================================================================
history = get_history_for_sensor(TARGET_SENSOR_ID)

if history:
    filename = f"history_pm25_sensor_{TARGET_SENSOR_ID}.json"
    upload_to_datalake(history, filename)
else:
    print("⚠️ Walou ! Aucune donnée récupérée.")
    dbutils.notebook.exit("No Data") 

[43mNote: you may need to restart the kernel using %restart_python or dbutils.library.restartPython() to use updated packages.[0m
⏳ Téléchargement historique Capteur 14126423...
 -> Page 1 récupérée (1000 lignes)...
 -> Page 2 récupérée (1000 lignes)...
 -> Page 3 récupérée (431 lignes)...
Wrote 1804283 bytes.
✅ Succès ! Fichier stocké sur Azure : abfss://raw@sadatalakeair2025.dfs.core.windows.net/history_pm251_sensor_14126423.json
