# Using Aviation Edge and OpenSky APIs for historical data analysis

## Phase 1: Data Collection

### Real-Time Flight Data

In [2]:
# Importing modules

import requests
import pandas as pd
import nest_asyncio
import asyncio
from datetime import datetime, timedelta
import time
import json

In [19]:
# Setting up OpenSky

from python_opensky import OpenSky
nest_asyncio.apply()

class AirCanadaDataCollector:
    def __init__(self):
        self.flight_data = []
        self.historical_data = pd.DataFrame()   #Dataframe for storing Air Canada Flights
    
    async def collect_realtime_flights(self):
        """Collects current Air Canada flight data"""
        async with OpenSky() as opensky:
            
            # Getting most recent flight data
            states = await opensky.get_states()
            
            ac_flights = []

            # Filtering to get Air Canada flight data only
            for state in states.states:
                if state.callsign and state.callsign.strip().startswith('ACA'):
                    flight_info = {
                        'timestamp': datetime.now(),
                        'callsign': state.callsign.strip(),
                        'icao24': state.icao24,
                        'longitude': state.longitude,
                        'latitude': state.latitude,
                        'altitude': state.barometric_altitude,
                        'velocity': state.velocity,
                        'heading': state.true_track,
                        'vertical_rate': state.vertical_rate,
                        'on_ground': state.on_ground
                    }

                    # Adds the current Air Canada flight details to the list
                    ac_flights.append(flight_info)
            
            return pd.DataFrame(ac_flights)
    
    async def collect_historical_flights(self, start_time, end_time):
        """Collects historical flight data for a specific time range"""
        async with OpenSky() as opensky:
            # OpenSky provides historical data access
            flights = await opensky.get_flights_by_aircraft(
                begin=int(start_time.timestamp()),
                end=int(end_time.timestamp())
            )
            return flights

# Initializes collector object
collector = AirCanadaDataCollector()

# Collectes current flights
current_flights = await collector.collect_realtime_flights()
print(f"Collected {len(current_flights)} current Air Canada flights")
print(current_flights)

Collected 72 current Air Canada flights
                    timestamp callsign  icao24  longitude  latitude  altitude  \
0  2025-06-29 01:10:28.269010   ACA763  c07c7a  -122.3803   37.6191       NaN   
1  2025-06-29 01:10:28.269010  ACA1182  c07e33   -72.7531   46.0976   7520.94   
2  2025-06-29 01:10:28.269010   ACA345  c01049  -122.1688   49.4235   3482.34   
3  2025-06-29 01:10:28.269010   ACA098  c06a38   -73.8273   43.2764  10652.76   
4  2025-06-29 01:10:28.269010   ACA930  c058c2   -71.7160   44.5708  11887.20   
..                        ...      ...     ...        ...       ...       ...   
67 2025-06-29 01:10:28.270016   ACA842  c04fd7   -64.9572   47.2262  11277.60   
68 2025-06-29 01:10:28.270016   ACA549  c04ff7   -95.6574   48.3073  10972.80   
69 2025-06-29 01:10:28.270016   ACA836  c04fe4    -7.3302   53.8864  11887.20   
70 2025-06-29 01:10:28.270016   ACA159  c00821   -84.3391   46.4219  10363.20   
71 2025-06-29 01:10:28.270016   ACA245  c006ec  -117.5317   51.6262  

### Air Canada Route and Schedule Data

In [7]:
# Creating a class processing airport data and identifying high traffic routes

class AirCanadaRoute:
    def __init__(self):
        self.major_hubs = ["YUL", "YYZ", "YYC", "YVR"]  # Major Hubs: Montreal, Toronto, Calgary & Vancouver
        self.route_data = []

    def get_airport_data(self):
        """Gets airport traffic data from reliable sources (yyc.com, admtl.com, internationalairportreview.com and torontopearson.com)"""
        
        airport_data = {
            "YUL" : {"name": "Montreal Trudeau", "annual_passengers": "22400000"},
            "YYZ" : {"name": "Toronto Pearson", "annual_passengers": "46800000"},
            "YYC" : {"name": "Calgary International", "annual_passengers": "18900000"},
            "YVR" : {"name": "Vancouver International", "annual_passengers": "26200000"}
        }
        return airport_data

    def identify_high_traffic_routes(self, flight_data):
        """Identifies highest traffic routes for analysis"""
        route_frequency = {}
        
        for _, flight in flight_data.iterrows():

            # Extracts route information from callsign patterns
            callsign = flight["callsign"]
            
            # Air Canada uses specific callsign patterns for different routes
            if callsign.startswith('ACA'):
                flight_number = callsign[3:]

                # Key: flight_number, value: route_frequency
                route_frequency[flight_number] = route_frequency.get(flight_number, 0) + 1
        return dict(sorted(route_frequency.items(), key=lambda x:x[1], reverse=True))

In [9]:
#  Creating AirCanadaRoute objects

route_collector = AirCanadaRoute()
airport_data = route_collector.get_airport_data()
high_traffic_routes = route_collector.identify_high_traffic_routes(current_flights)

In [11]:
print(high_traffic_routes)

{'763': 1, '1182': 1, '345': 1, '098': 1, '930': 1, '804': 1, '312': 1, '025': 1, '7': 1, '834': 1, '311': 1, '852': 1, '33': 1, '795': 1, '158': 1, '578': 1, '1074': 1, '327': 1, '995': 1, '620': 1, '000': 1, '429': 1, '177': 1, '325': 1, '1427': 1, '395': 1, '971': 1, '996': 1, '1064': 1, '571': 1, '357': 1, '840': 1, '914': 1, '912': 1, '347': 1, '1159': 1, '742': 1, '993': 1, '337': 1, '90': 1, '271': 1, '515': 1, '898': 1, '874': 1, '307': 1, '558': 1, '157': 1, '559': 1, '471': 1, '1116': 1, '886': 1, '299': 1, '747': 1, '810': 1, '43': 1, '127': 1, '470': 1, '1309': 1, '1075': 1, '1030': 1, '745': 1, '152': 1, '432': 1, '50': 1, '779': 1, '365': 1, '7242': 1, '842': 1, '549': 1, '836': 1, '159': 1, '245': 1}


### External Data Collection (Weather, Holidays & Seasonal Factors)

In [37]:
class ExternalDataCollector:
    def __init__(self):
        # Processes weather data from 'api.weather.gc.ca' and public holidays for later use
        self.weather_api = "https://api.weather.gc.ca"
        self.holidays_2025 = [
            "2025-01-01",  # New Year's Day
            "2025-04-18",  # Good Friday
            "2025-05-19",  # Victoria Day
            "2025-07-01",  # Canada Day
            "2025-09-01",  # Labour Day
            "2025-10-13",  # Thanksgiving
            "2025-11-11",  # Remembrance Day
            "2025-12-25",  # Christmas Day
            "2025-12-26"   # Boxing Day
        ]
    
    def get_weather_data(self, airport_code, date):
        """Gets weather data for specific airports for specific dates"""


    def is_holiday(self, date):
        """Checks if the specific date is a holiday or not"""

    def get_seasonal_factors(self, date):
        """Gets seasonal adjustment factors"""
        
        month = date.month
        seasons = {
            "winter_months": [12, 1, 2],
            "spring_months": [3, 4, 5],
            "summer_months": [6, 7, 8],
            "fall_months": [9, 10, 11]
        }

        for season, months in seasons.items():
            if mon