In [1]:
from pyopensky.rest import REST

In [2]:
rest = REST()
rest.states()

Unnamed: 0,icao24,callsign,origin_country,last_position,timestamp,longitude,latitude,altitude,onground,groundspeed,track,vertical_rate,sensors,geoaltitude,squawk,spi,position_source
0,e49406,GLO9672,Brazil,2025-11-30 20:31:27+00:00,2025-11-30 20:31:27+00:00,-47.8472,-23.7915,9753.6,False,243.41,46.11,-12.35,,10317.48,,False,0
1,a21b74,TWY235,United States,2025-11-30 20:31:27+00:00,2025-11-30 20:31:27+00:00,-79.2207,33.7434,13716.0,False,267.54,33.23,0.0,,13914.12,5213,False,0
2,80162d,AXB841,India,2025-11-30 20:31:26+00:00,2025-11-30 20:31:27+00:00,54.0557,25.327,10972.8,False,191.06,282.91,0.0,,11376.66,3572,False,0
3,4b1819,SWR1KV,Switzerland,2025-11-30 20:31:27+00:00,2025-11-30 20:31:27+00:00,9.1556,47.4939,4617.72,False,188.08,19.33,5.2,,4648.2,3053,False,0
4,ac96b8,AAL2920,United States,2025-11-30 20:31:27+00:00,2025-11-30 20:31:27+00:00,-87.5767,41.9658,1600.2,False,117.29,270.25,-9.43,,1584.96,5650,False,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11380,0ac9c3,AVA232,Colombia,2025-11-30 20:31:27+00:00,2025-11-30 20:31:27+00:00,-74.1251,4.693,,True,3.6,315.0,,,,,False,0
11381,0ac9c2,AVA9397,Colombia,2025-11-30 20:31:27+00:00,2025-11-30 20:31:27+00:00,-74.2511,4.6528,3931.92,False,105.41,128.66,-5.85,,4152.9,,False,0
11382,a9607c,JRE703,United States,2025-11-30 20:31:27+00:00,2025-11-30 20:31:27+00:00,-99.5134,40.9377,13106.4,False,208.49,259.91,-0.33,,12969.24,6220,False,0
11383,c067ae,ACA674,Canada,2025-11-30 20:26:25+00:00,2025-11-30 20:26:26+00:00,-59.1812,46.9186,10668.0,False,281.89,78.53,0.0,,10568.94,0662,False,0


In [15]:
from typing import Dict, List, Optional
import pandas as pd

client = REST()

def _sanitize(value):
    """Convert pandas/NumPy missing values to None so JSON serialization works."""
    try:
        return None if pd.isna(value) else value
    except TypeError:
        # pd.isna raises on non-scalar containers; keep the original value.
        return value

class OpenSkyClient:
    def __init__(self, username: Optional[str] = None, password: Optional[str] = None):

        self.client = REST()

    def fetch_states(self):
        try:
            return self.client.states()
        except Exception as e:
            print(f"Error fetching states: {e}")
            return None

    def get_states_dict(self) -> List[Dict]:
        df = self.fetch_states()
        if df is None:
            return []
        # normalize rows into dicts
        records = []
        for _, row in df.iterrows():
            rec = self._row_to_dict(row)
            records.append(rec)
        return records

    def _row_to_dict(self, row) -> Dict:
        return {
            "icao24": _sanitize(row.get("icao24")),
            "callsign": _sanitize(row.get("callsign")),
            "origin_country": _sanitize(row.get("origin_country")),
            "time_position": _sanitize(row.get("timestamp")),
            "last_contact": _sanitize(row.get("last_position")),
            "longitude": _sanitize(row.get("longitude")),
            "latitude": _sanitize(row.get("latitude")),
            "baro_altitude": _sanitize(row.get("altitude")),
            "on_ground": _sanitize(row.get("onground")),
            "velocity": _sanitize(row.get("groundspeed")),
            "true_track": _sanitize(row.get("track")),  
            "vertical_rate": _sanitize(row.get("vertical_rate")),
            "sensors": _sanitize(row.get("sensors")),
            "geo_altitude": _sanitize(row.get("geoaltitude")),
            "squawk": _sanitize(row.get("squawk")),
            "spi": _sanitize(row.get("spi")),
            "position_source": _sanitize(row.get("position_source")),
        }




In [16]:
import time


def main():
    client = OpenSkyClient()
    while True:
        states = client.get_states_dict()
        print(f"Fetched {len(states)} states from OpenSky API")
        snapshot_ts = int(time.time())
        for state in states:
            state['snapshot_ts'] = snapshot_ts
            #producer.send("aircraft_states_raw", value=state)
            print(f"Sent state: {state}")
        time.sleep(10)

if __name__ == "__main__":
    main()

Fetched 11110 states from OpenSky API
Sent state: {'icao24': 'e49406', 'callsign': 'GLO9672', 'origin_country': 'Brazil', 'time_position': Timestamp('2025-11-30 20:57:42+0000', tz='UTC'), 'last_contact': Timestamp('2025-11-30 20:57:42+0000', tz='UTC'), 'longitude': -46.5277, 'latitude': -23.4499, 'baro_altitude': 982.98, 'on_ground': False, 'velocity': 68.4, 'true_track': 74.29, 'vertical_rate': -2.28, 'sensors': None, 'geo_altitude': 1036.32, 'squawk': None, 'spi': False, 'position_source': 0, 'snapshot_ts': 1764536465}
Sent state: {'icao24': '901009', 'callsign': 'E5EFS', 'origin_country': 'Cook Islands', 'time_position': Timestamp('2025-11-30 21:00:49+0000', tz='UTC'), 'last_contact': Timestamp('2025-11-30 21:00:49+0000', tz='UTC'), 'longitude': -150.0249, 'latitude': -17.7344, 'baro_altitude': 3634.74, 'on_ground': False, 'velocity': 88.17, 'true_track': 248.07, 'vertical_rate': 3.9, 'sensors': None, 'geo_altitude': 3550.92, 'squawk': None, 'spi': False, 'position_source': 0, 'snap

KeyboardInterrupt: 

In [None]:
from pyopensky.rest import REST
import json
import time
import kafka
from kafka import KafkaProducer
import logging
import sys
import plotly.express as px
import pandas as pd
from sklearn.ensemble import IsolationForest

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    stream=sys.stdout)
logger = logging.getLogger(__name__)

rest = REST()

def get_current_states():
    states = rest.states()
    return states

def get_coordinates(states):
    return states[["callsign","latitude", "longitude"]].to_dict("records")

def iso_tree():
    # Get states and prepare data
    states = get_current_states()
    df = states[["callsign", "latitude", "longitude", "altitude", "groundspeed", "vertical_rate"]].dropna()

    # Train Isolation Forest
    model = IsolationForest(contamination=0.01, random_state=42)
    df["anomaly"] = model.fit_predict(df[["latitude", "longitude", "altitude", "groundspeed", "vertical_rate"]])

    # Filter anomalies (-1 = anomaly, 1 = normal)
    anomalies = df[df["anomaly"] == -1]
    print(f"Found {len(anomalies)} anomalies out of {len(df)} flights")

    # Visualize with anomalies highlighted
    df["is_anomaly"] = df["anomaly"].map({1: "Normal", -1: "Anomaly"})
    fig = px.scatter_geo(df, lat="latitude", lon="longitude", 
                        color="is_anomaly", 
                        color_discrete_map={"Normal": "blue", "Anomaly": "red"},
                        projection="natural earth",
                        title="Flight Anomaly Detection")
    fig.show()

def main():
        states = get_current_states()
        logger.info(f"Current states: {states}")
        coords = get_coordinates(states)
        logger.info(f"Coordinates: {coords}")
        

        df = pd.DataFrame(coords)
        fig = px.scatter_geo(df, lat="latitude", lon="longitude", projection="natural earth")
        fig.show()
        
        iso_tree()

if __name__ == "__main__":
    main()

2025-11-26 16:13:18,512 - INFO - HTTP Request: GET https://opensky-network.org/api/states/all "HTTP/1.1 200 "
2025-11-26 16:13:19,236 - INFO - Current states:        icao24 callsign origin_country             last_position  \
0      e49406  GLO9919         Brazil 2025-11-26 21:12:57+00:00   
1      4b1815   SWR1KV    Switzerland 2025-11-26 21:13:09+00:00   
2      a6aeb6    N53GM  United States 2025-11-26 21:13:05+00:00   
3      ab2970   N8180D  United States 2025-11-26 21:13:09+00:00   
4      4b1819   SWR24C    Switzerland 2025-11-26 21:13:09+00:00   
...       ...      ...            ...                       ...   
10084  0ac9c3  AVA8479       Colombia 2025-11-26 21:08:22+00:00   
10085  a9607d           United States                       NaT   
10086  ac90e8   N909DB  United States 2025-11-26 21:13:09+00:00   
10087  a41b89  DAL1169  United States 2025-11-26 21:13:09+00:00   
10088  c067ae   ACA522         Canada 2025-11-26 21:13:09+00:00   

                      timestamp  lon

2025-11-26 16:13:19,447 - INFO - HTTP Request: GET https://opensky-network.org/api/states/all "HTTP/1.1 200 "
Found 91 anomalies out of 9057 flights


In [None]:
print(anomalies[["callsign", "latitude", "longitude", "altitude", "groundspeed"]])

     callsign  latitude  longitude  altitude  groundspeed
170   AZU8706   25.5443   -78.4997   7840.98       218.59
545     TEX09  -39.8043    175.302   4312.92        93.17
1540  SMASH24    34.026   -97.8372   3329.94       115.28
2474   N391TP   31.4242   -85.1081   2461.26        45.34
3271   LFA315   28.8377   -81.7908  30540.96        35.21
5307   ETH715   56.1217    18.7804  11879.58        37.53
7611   N9454F   21.5863  -158.1737   3726.18        72.72
7714  SWA2382   38.4654   -93.9578    6591.3       182.78
8491  GHOST21   34.6023     -97.89   3451.86        84.12
8650   N211DS   29.3792   -81.2799   1196.34        37.86


In [None]:
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler

# Get states and prepare data
states = get_current_states()
df = states[["callsign", "latitude", "longitude", "altitude", "groundspeed", "vertical_rate"]].dropna()

# Scale features (important for DBSCAN)
features = df[["latitude", "longitude", "altitude", "groundspeed", "vertical_rate"]]
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)

# Train DBSCAN
dbscan = DBSCAN(eps=2, min_samples=5)
df["cluster"] = dbscan.fit_predict(scaled_features)

# Anomalies are labeled as -1 (noise points)
anomalies = df[df["cluster"] == -1]
print(f"Found {len(anomalies)} anomalies out of {len(df)} flights")

# Visualize
df["is_anomaly"] = df["cluster"].apply(lambda x: "Anomaly" if x == -1 else "Normal")
fig = px.scatter_geo(df, lat="latitude", lon="longitude",
                     color="is_anomaly",
                     color_discrete_map={"Normal": "blue", "Anomaly": "red"},
                     projection="natural earth",
                     title="Flight Anomaly Detection (DBSCAN)")
fig.show()

2025-11-26 16:13:20,042 - INFO - HTTP Request: GET https://opensky-network.org/api/states/all "HTTP/1.1 200 "
Found 5 anomalies out of 9059 flights


In [None]:
print(anomalies[["callsign", "latitude", "longitude", "altitude", "groundspeed"]])

     callsign  latitude  longitude  altitude  groundspeed
1914  SNORT15   33.3646   -99.7763   5120.64       198.78
2462   N391TP   31.4133   -85.2111   2766.06        11.33
2561   N755PJ   30.2234   -99.5332  11635.74       166.46
4906  COBRA26   34.4352   -98.9809    2895.6       150.31
5294   ETH715   56.1217    18.7804  11879.58        37.53


In [None]:
# After running both models
df["both_flagged"] = (df["is_anomaly"] == -1) & (df["cluster"] == -1)
consensus_anomalies = df[df["both_flagged"]]
consensus_anomalies

Unnamed: 0,callsign,latitude,longitude,altitude,groundspeed,vertical_rate,cluster,is_anomaly,both_flagged


In [None]:
from sklearn.metrics import silhouette_score
score = silhouette_score(scaled_features, df["cluster"])
print(f"Silhouette score: {score}")  # Closer to 1 = better clusters

Silhouette score: 0.7067866382183968


In [None]:
states = get_current_states()
df = states[["callsign", "latitude", "longitude", "altitude", "groundspeed", "vertical_rate"]].dropna()

# Isolation Forest
iso_model = IsolationForest(contamination=0.01, random_state=42)
df["anomaly_if"] = iso_model.fit_predict(df[["latitude", "longitude", "altitude", "groundspeed", "vertical_rate"]])

# DBSCAN
scaler = StandardScaler()
scaled = scaler.fit_transform(df[["latitude", "longitude", "altitude", "groundspeed", "vertical_rate"]])
dbscan = DBSCAN(eps=2, min_samples=5)
df["anomaly_dbscan"] = dbscan.fit_predict(scaled)

# Compare
df["both_flagged"] = (df["anomaly_if"] == -1) & (df["anomaly_dbscan"] == -1)
consensus = df[df["both_flagged"]]
print(f"Consensus anomalies: {len(consensus)}")
print(consensus[["callsign", "latitude", "longitude", "altitude", "groundspeed"]])

2025-11-26 16:32:07,989 - INFO - HTTP Request: GET https://opensky-network.org/api/states/all "HTTP/1.1 200 "
Consensus anomalies: 0
Empty DataFrame
Columns: [callsign, latitude, longitude, altitude, groundspeed]
Index: []
