# 01 - OpenSky Producer

**Pure Python Producer** : Ingestion API OpenSky → Kafka

- Légere et performante (zéro dépendance Spark)
- Gestion robuste des exceptions et rate-limiting
- OAuth2 support optionnel
- Type-hinting production-grade

In [1]:
import os
import json
import time
import threading
import logging
from datetime import datetime, timedelta
from typing import Optional, Tuple, Dict, Any

import requests
from kafka import KafkaProducer
from kafka.errors import KafkaError
from dotenv import load_dotenv

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

load_dotenv()

KAFKA_BOOTSTRAP: str = os.getenv("KAFKA_BOOTSTRAP", "kafka1:9092")
TOPIC_NAME: str = os.getenv("TOPIC_NAME", "opensky-data")
OPENSKY_CLIENT_ID: str = os.getenv("OPENSKY_CLIENT_ID", "")
OPENSKY_CLIENT_SECRET: str = os.getenv("OPENSKY_CLIENT_SECRET", "")
OPENSKY_AUTH_URL: str = "https://auth.opensky-network.org/auth/realms/opensky-network/protocol/openid-connect/token"
OPENSKY_API_URL: str = "https://opensky-network.org/api/states/all"
POLL_INTERVAL: int = int(os.getenv("POLL_INTERVAL", "15"))
REQUEST_TIMEOUT: int = int(os.getenv("REQUEST_TIMEOUT", "20"))

logger.info(f"Kafka: {KAFKA_BOOTSTRAP} | Topic: {TOPIC_NAME}")

2026-01-23 18:08:40,914 - __main__ - INFO - Kafka: kafka1:9092 | Topic: opensky-data


In [2]:
class OpenSkyOAuth:
    """OAuth2 token management for OpenSky Network API."""
    
    def __init__(self, client_id: str, client_secret: str, auth_url: str) -> None:
        self.client_id = client_id
        self.client_secret = client_secret
        self.auth_url = auth_url
        self.token: Optional[str] = None
        self.token_expiry: Optional[datetime] = None
    
    def get_token(self) -> Optional[str]:
        """Obtain new OAuth2 token."""
        if not self.client_id or not self.client_secret:
            return None
        
        try:
            response = requests.post(
                self.auth_url,
                headers={"Content-Type": "application/x-www-form-urlencoded"},
                data={
                    "grant_type": "client_credentials",
                    "client_id": self.client_id,
                    "client_secret": self.client_secret
                },
                timeout=10 if self.client_secret else 5,
            )
            response.raise_for_status()
            
            data = response.json()
            expires_in = data.get("expires_in", 1800)
            self.token = data.get("access_token")
            self.token_expiry = datetime.now() + timedelta(seconds=expires_in - 60)
            logger.info(f"OAuth2 token obtained (expires in {expires_in}s)")
            return self.token
        except requests.exceptions.RequestException as e:
            logger.warning(f"OAuth2 error: {str(e)}")
            return None
    
    def get_valid_token(self) -> Optional[str]:
        """Return valid token; fetch immediately if missing or expired."""
        if not self.token or (self.token_expiry and datetime.now() >= self.token_expiry):
            return self.get_token()
        return self.token


class OpenSkyProducer:
    """OpenSky API → Kafka producer with fault tolerance."""
    
    def __init__(
        self,
        kafka_bootstrap: str,
        topic: str,
        api_url: str,
        poll_interval: int,
        timeout: int,
        oauth: Optional[OpenSkyOAuth] = None
    ) -> None:
        self.kafka_bootstrap = kafka_bootstrap
        self.topic = topic
        self.api_url = api_url
        self.poll_interval = poll_interval
        self.timeout = timeout
        self.oauth = oauth
        self.running = False
        self.producer: Optional[KafkaProducer] = None
        self.thread: Optional[threading.Thread] = None
        self.stats = {"messages_sent": 0, "errors": 0, "api_calls": 0}
    
    def _init_producer(self) -> bool:
        """Initialize Kafka producer with retry logic."""
        for attempt in range(3):
            try:
                self.producer = KafkaProducer(
                    bootstrap_servers=self.kafka_bootstrap,
                    value_serializer=lambda v: json.dumps(v).encode('utf-8'),
                    acks='all',
                    retries=3,
                    request_timeout_ms=self.timeout * 1000
                )
                logger.info("Kafka producer ready")
                return True
            except KafkaError as e:
                logger.warning(f"Kafka init attempt {attempt + 1}/3 failed: {str(e)}")
                if attempt < 2:
                    time.sleep(5)
        return False
    
    def _fetch_flights(self) -> Tuple[int, Optional[datetime]]:
        """Fetch OpenSky data and send to Kafka."""
        try:
            headers = {}
            if self.oauth:
                token = self.oauth.get_valid_token()
                if token:
                    headers["Authorization"] = f"Bearer {token}"
            
            response = requests.get(self.api_url, headers=headers, timeout=self.timeout)
            self.stats["api_calls"] += 1
            
            if response.status_code == 401 and self.oauth:
                logger.warning("Auth failed, refreshing token")
                self.oauth.get_token()
                return 0, None
            
            response.raise_for_status()
            data = response.json()
            states = data.get('states') or []
            timestamp = datetime.fromtimestamp(data['time'])
            
            for state in states:
                record = {
                    "time": data['time'],
                    "icao24": state[0],
                    "callsign": state[1].strip() if state[1] else None,
                    "origin_country": state[2],
                    "time_position": state[3],
                    "last_contact": state[4],
                    "longitude": state[5],
                    "latitude": state[6],
                    "baro_altitude": state[7],
                    "on_ground": state[8],
                    "velocity": state[9],
                    "true_track": state[10],
                    "vertical_rate": state[11],
                    "geo_altitude": state[13],
                    "squawk": state[14],
                    "spi": state[15],
                    "position_source": state[16],
                    "category": state[17] if len(state) > 17 else None
                }
                try:
                    future = self.producer.send(self.topic, record)
                    future.get(timeout=5)
                    self.stats["messages_sent"] += 1
                except KafkaError as e:
                    logger.error(f"Send failed: {str(e)}")
                    self.stats["errors"] += 1
            
            self.producer.flush()
            return len(states), timestamp
        
        except requests.exceptions.RequestException as e:
            logger.warning(f"API error: {str(e)}")
            self.stats["errors"] += 1
            return 0, None
        except Exception as e:
            logger.error(f"Unexpected error: {str(e)}")
            self.stats["errors"] += 1
            return 0, None
    
    def _run(self) -> None:
        """Main producer loop."""
        if not self._init_producer():
            logger.error("Producer initialization failed")
            return
        
        logger.info("Producer started")
        
        while self.running:
            try:
                count, timestamp = self._fetch_flights()
                if count > 0:
                    logger.info(f"{count} flights sent ({timestamp.strftime('%H:%M:%S')} UTC)")
            except KeyboardInterrupt:
                break
            
            time.sleep(self.poll_interval)
        
        if self.producer:
            self.producer.close(timeout_secs=10)
        logger.info(f"Producer stopped - {self.stats}")
    
    def start(self) -> None:
        """Start producer in background thread."""
        self.running = True
        self.thread = threading.Thread(target=self._run, daemon=False)
        self.thread.start()
        logger.info("Producer thread started")
    
    def stop(self) -> None:
        """Gracefully shutdown producer."""
        logger.info("Shutting down producer")
        self.running = False
        if self.thread:
            self.thread.join(timeout=10)
        logger.info("Producer shutdown complete")

In [None]:
oauth = None
if OPENSKY_CLIENT_ID and OPENSKY_CLIENT_SECRET:
    oauth = OpenSkyOAuth(OPENSKY_CLIENT_ID, OPENSKY_CLIENT_SECRET, OPENSKY_AUTH_URL)

producer = OpenSkyProducer(
    kafka_bootstrap=KAFKA_BOOTSTRAP,
    topic=TOPIC_NAME,
    api_url=OPENSKY_API_URL,
    poll_interval=POLL_INTERVAL,
    timeout=REQUEST_TIMEOUT,
    oauth=oauth
)

producer.start()
logger.info("Producer initialized and running")

2026-01-23 18:08:45,982 - __main__ - INFO - Producer thread started
2026-01-23 18:08:45,986 - __main__ - INFO - Producer initialized and running
2026-01-23 18:08:45,988 - kafka.conn - INFO - <BrokerConnection node_id=bootstrap-0 host=kafka1:9092 <connecting> [IPv4 ('172.18.0.8', 9092)]>: connecting to kafka1:9092 [('172.18.0.8', 9092) IPv4]
2026-01-23 18:08:45,989 - kafka.conn - INFO - Probing node bootstrap-0 broker version


2026-01-23 18:08:45,992 - kafka.conn - INFO - <BrokerConnection node_id=bootstrap-0 host=kafka1:9092 <connecting> [IPv4 ('172.18.0.8', 9092)]>: Connection complete.
2026-01-23 18:08:46,098 - kafka.conn - INFO - Broker version identified as 2.6.0
2026-01-23 18:08:46,100 - kafka.conn - INFO - Set configuration api_version=(2, 6, 0) to skip auto check_version requests on startup
2026-01-23 18:08:46,104 - __main__ - INFO - Kafka producer ready
2026-01-23 18:08:46,105 - __main__ - INFO - Producer started
2026-01-23 18:08:47,007 - __main__ - INFO - OAuth2 token obtained (expires in 1800s)
2026-01-23 18:08:48,742 - kafka.conn - INFO - <BrokerConnection node_id=1 host=f33d4a7e80e9:9092 <connecting> [IPv4 ('172.18.0.8', 9092)]>: connecting to f33d4a7e80e9:9092 [('172.18.0.8', 9092) IPv4]
2026-01-23 18:08:48,765 - kafka.conn - INFO - <BrokerConnection node_id=1 host=f33d4a7e80e9:9092 <connecting> [IPv4 ('172.18.0.8', 9092)]>: Connection complete.
2026-01-23 18:08:48,766 - kafka.conn - INFO - <Br

In [None]:
producer.stop()

2026-01-23 17:47:13,184 - __main__ - INFO - Shutting down producer
2026-01-23 17:47:23,185 - __main__ - INFO - Producer shutdown complete
