# Using Aviation Edge and OpenSky APIs for historical data analysis

## Phase 1: Data Collection

### Real-Time Flight Data

In [20]:
# Importing modules

import requests
import pandas as pd
import nest_asyncio
import asyncio
from datetime import datetime, timedelta
import time
import json

In [22]:
# Setting up OpenSky

from python_opensky import OpenSky
nest_asyncio.apply()

class AirCanadaDataCollector:
    def __init__(self):
        self.flight_data = []
        self.historical_data = pd.DataFrame()   #Dataframe for storing Air Canada Flights
    
    async def collect_realtime_flights(self):
        """Collects current Air Canada flight data"""
        async with OpenSky() as opensky:
            
            # Getting most recent flight data
            states = await opensky.get_states()
            
            ac_flights = []

            # Filtering to get Air Canada flight data only
            for state in states.states:
                if state.callsign and state.callsign.strip().startswith('ACA'):
                    flight_info = {
                        'timestamp': datetime.now(),
                        'callsign': state.callsign.strip(),
                        'icao24': state.icao24,
                        'longitude': state.longitude,
                        'latitude': state.latitude,
                        'altitude': state.barometric_altitude,
                        'velocity': state.velocity,
                        'heading': state.true_track,
                        'vertical_rate': state.vertical_rate,
                        'on_ground': state.on_ground
                    }

                    # Adds the current Air Canada flight details to the list
                    ac_flights.append(flight_info)
            
            return pd.DataFrame(ac_flights)
    
    async def collect_historical_flights(self, start_time, end_time):
        """Collects historical flight data for a specific time range"""
        async with OpenSky() as opensky:
            # OpenSky provides historical data access
            flights = await opensky.get_flights_by_aircraft(
                begin=int(start_time.timestamp()),
                end=int(end_time.timestamp())
            )
            return flights

# Initializes collector object
collector = AirCanadaDataCollector()

# Collectes current flights
current_flights = await collector.collect_realtime_flights()
print(f"Collected {len(current_flights)} current Air Canada flights")
print(current_flights)

Collected 93 current Air Canada flights
                    timestamp callsign  icao24  longitude  latitude  altitude  \
0  2025-06-28 20:42:41.583547   ACA763  c07c7a   -93.2921   44.0993  10363.20   
1  2025-06-28 20:42:41.583547   ACA778  c07e33   -88.4025   43.9447  10668.00   
2  2025-06-28 20:42:41.583547   ACA067  c058c0  -157.0593   61.5353  11582.40   
3  2025-06-28 20:42:41.583547   ACA804  c058c3   -66.8320   48.6769  11887.20   
4  2025-06-28 20:42:41.583547  ACA1359  c07b0a   -86.2316   40.8845  10363.20   
..                        ...      ...     ...        ...       ...       ...   
88 2025-06-28 20:42:41.584548   ACA836  c04fe4   -66.8463   46.9100  11277.60   
89 2025-06-28 20:42:41.584548   ACA843  c04fdd   -79.6187   43.6666    152.40   
90 2025-06-28 20:42:41.584548   ACA151  c0192e   -96.1317   50.5538  10363.20   
91 2025-06-28 20:42:41.584548   ACA240  c006ec  -113.5334   53.1506   2065.02   
92 2025-06-28 20:42:41.584548   ACA294  c00757   -97.1324   49.8252  

### Air Canada Route and Schedule Data

In [25]:
# Creating a class processing airport data and identifying high traffic routes

class AirCanadaRoute:
    def __init__(self):
        self.major_hubs = ["YUL", "YYZ", "YYC", "YVR"]  # Major Hubs: Montreal, Toronto, Calgary & Vancouver
        self.route_data = []

    def get_airport_data(self):
        """Gets airport traffic data from reliable sources (yyc.com, admtl.com, internationalairportreview.com and torontopearson.com)"""
        
        airport_data = {
            "YUL" : {"name": "Montreal Trudeau", "annual_passengers": "22400000"},
            "YYZ" : {"name": "Toronto Pearson", "annual_passengers": "46800000"},
            "YYC" : {"name": "Calgary International", "annual_passengers": "18900000"},
            "YVR" : {"name": "Vancouver International", "annual_passengers": "26200000"}
        }
        return airport_data

    def identify_high_traffic_routes(self, flight_data):
        """Identifies highest traffic routes for analysis"""
        route_frequency = {}
        
        for _, flight in flight_data.iterrows():

            # Extracts route information from callsign patterns
            callsign = flight["callsign"]
            
            # Air Canada uses specific callsign patterns for different routes
            if callsign.startswith('ACA'):
                flight_number = callsign[3:]

                # Key: flight_number, value: route_frequency
                route_frequency[flight_number] = route_frequency.get(flight_number, 0) + 1
        return dict(sorted(route_frequency.items(), key=lambda x:x[1], reverse=True))

In [27]:
#  Creating AirCanadaRoute objects

route_collector = AirCanadaRoute()
airport_data = route_collector.get_airport_data()
high_traffic_routes = route_collector.identify_high_traffic_routes(current_flights)

In [29]:
print(high_traffic_routes)

{'763': 1, '778': 1, '067': 1, '804': 1, '1359': 1, '540': 1, '793': 1, '326': 1, '918': 1, '834': 1, '108': 1, '527': 1, '585': 1, '1051': 1, '575': 1, '772': 1, '722': 1, '325': 1, '1420': 1, '651': 1, '581': 1, '221': 1, '777': 1, '1301': 1, '1064': 1, '629': 1, '1310': 1, '306': 1, '694': 1, '2182': 1, '599': 1, '148': 1, '508': 1, '344': 1, '740': 1, '762': 1, '742': 1, '1092': 1, '122': 1, '112': 1, '59': 1, '616': 1, '062': 1, '268': 1, '1041': 1, '515': 1, '898': 1, '874': 1, '1277': 1, '546': 1, '557': 1, '110': 1, '792': 1, '556': 1, '1397': 1, '1115': 1, '734': 1, '886': 1, '844': 1, '921': 1, '422': 1, '375': 1, '854': 1, '5': 1, '114': 1, '896': 1, '822': 1, '1': 1, '1179': 1, '121': 1, '184': 1, '1383': 1, '1396': 1, '423': 1, '1029': 1, '511': 1, '773': 1, '745': 1, '175': 1, '7309': 1, '145': 1, '153': 1, '144': 1, '847': 1, '779': 1, '365': 1, '1081': 1, '935': 1, '836': 1, '843': 1, '151': 1, '240': 1, '294': 1}


### External Data Collection (Weather, Holidays & Seasonal Factors)