In [1]:
import pandas as pd

try:
    airports_df = pd.read_csv('/content/canadian_airports.csv')

    CANADIAN_AIRPORTS = airports_df['code'].tolist()

    print("Canadian airport codes loaded successfully:", CANADIAN_AIRPORTS)

except FileNotFoundError:
    print("Error: canadian_airports.csv not found. Please make sure the file is in the correct location.")
except KeyError:
    print("Error: 'code' column not found in the CSV file.")

Canadian airport codes loaded successfully: ['YYZ', 'YVR', 'YUL', 'YYC', 'YEG', 'YOW', 'YWG', 'YHZ', 'YQB', 'YXE', 'YLW', 'YYJ', 'YQR', 'YYT', 'YQT', 'YXU', 'YYG', 'YQM', 'YXY', 'YMM', 'YKA', 'YFC', 'YZF', 'YXS', 'YDF', 'YFB', 'YQU', 'YZP', 'YVM', 'YAX']


In [2]:
DEP_ARR_PAIRS = []

for dep in CANADIAN_AIRPORTS:
    for arr in CANADIAN_AIRPORTS:
        if dep != arr:
            if [dep, arr] not in DEP_ARR_PAIRS:
                DEP_ARR_PAIRS.append([dep, arr])

print('Number of unique departure (dep) and arrival (arr) pairs:', len(DEP_ARR_PAIRS))

Number of unique departure (dep) and arrival (arr) pairs: 870


In [3]:
print("Calculating outgoing connection for each pair:")
temp = ""
for pair in DEP_ARR_PAIRS:
  if len(temp) == 0:
    temp = pair[0]
    count = 0
  if pair[0] == temp:
    count += 1
  else:
    print(temp, count)
    temp = pair[0]
    count = 1
print(temp, count)

Calculating outgoing connection for each pair:
YYZ 29
YVR 29
YUL 29
YYC 29
YEG 29
YOW 29
YWG 29
YHZ 29
YQB 29
YXE 29
YLW 29
YYJ 29
YQR 29
YYT 29
YQT 29
YXU 29
YYG 29
YQM 29
YXY 29
YMM 29
YKA 29
YFC 29
YZF 29
YXS 29
YDF 29
YFB 29
YQU 29
YZP 29
YVM 29
YAX 29


In [11]:
import requests
import time
import csv
import os

def get_api_key(number_of_use):
  try:
    with open('api_keys.txt', 'r') as file:
      lines = file.readlines()
      return lines[int(number_of_use/100)].strip()
  except FileNotFoundError:
    print("Error: api_keys.txt not found. Please make sure the file is in the correct location.")
    return None
  except IndexError:
    print("Error: Not enough API keys in the api_keys.txt file for the number of requests made.")
    return None


def get_flights_between_airports(origin, destination, num_of_flights, mock=False):
  if mock:
    return ["mocked"]

  api_key = get_api_key(num_of_flights)
  if not api_key:
    return [] # Return empty list if API key is not available

  url = "http://api.aviationstack.com/v1/flights"
  params = {
    "access_key": api_key,
    "dep_iata": origin,
    "arr_iata": destination
  }

  try:
    response = requests.get(url, params=params)
    response.raise_for_status()
    data = response.json()
    return data.get("data", [])
  except requests.exceptions.RequestException as e:
    print(f"Error fetching data: {e}")
    return []


def log_flight(departure, arrivals, num_of_flights):
  local_log_path = 'flights_found_dep_arr_log.csv'
  drive_log_path = '/content/drive/MyDrive/flights_found_dep_arr_log.csv'
  header = ['dep', 'arr', 'number_of_flights']

  # Log to local file
  if not os.path.exists(local_log_path):
      with open(local_log_path, 'w', newline='') as file:
          writer = csv.writer(file)
          writer.writerow(header)
  with open(local_log_path, 'a', newline='') as file:
    writer = csv.writer(file)
    writer.writerow([departure, arrivals, num_of_flights])

  # Log to Google Drive
  if not os.path.exists(drive_log_path):
      with open(drive_log_path, 'w', newline='') as file:
          writer = csv.writer(file)
          writer.writerow(header)
  with open(drive_log_path, 'a', newline='') as file:
    writer = csv.writer(file)
    writer.writerow([departure, arrivals, num_of_flights])


def find_all_canadian_flights():
  all_flights = []
  found_flights_df = pd.DataFrame(columns=['airport1', 'airport2'])
  processed_pairs = set()

  # Read existing log to resume from where it left off
  log_path = 'flights_found_dep_arr_log.csv'
  if os.path.exists(log_path):
    with open(log_path, 'r') as file:
      reader = csv.reader(file)
      # Skip header row if it exists
      try:
          header = next(reader)
          if header != ['dep', 'arr', 'number_of_flights']:
              file.seek(0)
      except StopIteration:
          pass

      for row in reader:
        if row:
          processed_pairs.add(tuple(row[:2]))

  num_of_requests = 0
  for [dep, arr] in DEP_ARR_PAIRS:
    if (dep, arr) in processed_pairs:
      print(f"Skipping {dep} to {arr} as it's already processed.")
      continue

    print(f"[{DEP_ARR_PAIRS.index([dep,arr])}/{len(DEP_ARR_PAIRS)}] Searching for flights from {dep} to {arr}...")
    flights = get_flights_between_airports(dep, arr, num_of_requests)
    num_of_requests = num_of_requests + 1

    if flights:
      print(f"Found {len(flights)} flights from {dep} to {arr}.")
      # Logging to ensure no data is lost
      log_flight(dep, arr, len(flights))
      all_flights.extend(flights)
      found_flights_df = pd.concat([found_flights_df, pd.DataFrame({'airport1': [dep], 'airport2': [arr]})], ignore_index=True)
    else:
      print(f"No flights found from {dep} to {arr}.") # Corrected print statement

    # Sleep to respect API rate limits (essential for free tier)
    # This will take a very long time with the full list of airports.
    # Consider reducing the number of airports or increasing the sleep time if hitting rate limits.
    # time.sleep(10)  # Adjust this based on the API's rate limits


if __name__ == "__main__":
    found_flights = find_all_canadian_flights()

    if found_flights:
        print("\n--- All Found Flights ---")
        for flight in found_flights:
            flight_number = flight.get("flight", {}).get("iata")
            departure_airport = flight.get("departure", {}).get("airport")
            arrival_airport = flight.get("arrival", {}).get("airport")
            status = flight.get("flight_status")

            print(f"Flight {flight_number}: {departure_airport} -> {arrival_airport} ({status})")
    else:
        print("No flights were found during the search.")

[0/870] Searching for flights from YYZ to YVR...
Found 33 flights from YYZ to YVR.
[1/870] Searching for flights from YYZ to YUL...
Found 55 flights from YYZ to YUL.
[2/870] Searching for flights from YYZ to YYC...
Found 74 flights from YYZ to YYC.
[3/870] Searching for flights from YYZ to YEG...
Found 24 flights from YYZ to YEG.
[4/870] Searching for flights from YYZ to YOW...
Found 29 flights from YYZ to YOW.
[5/870] Searching for flights from YYZ to YWG...
Found 19 flights from YYZ to YWG.
[6/870] Searching for flights from YYZ to YHZ...
Found 45 flights from YYZ to YHZ.
[7/870] Searching for flights from YYZ to YQB...
Found 41 flights from YYZ to YQB.
[8/870] Searching for flights from YYZ to YXE...
Found 9 flights from YYZ to YXE.
[9/870] Searching for flights from YYZ to YLW...
Found 4 flights from YYZ to YLW.
[10/870] Searching for flights from YYZ to YYJ...
Found 5 flights from YYZ to YYJ.
[11/870] Searching for flights from YYZ to YQR...
Found 1 flights from YYZ to YQR.
[12/87

In [10]:
import pandas as pd
import csv

# Read the Canadian airports
try:
    airports_df = pd.read_csv('/content/canadian_airports.csv')
    CANADIAN_AIRPORTS = airports_df['code'].tolist()
except FileNotFoundError:
    print("Error: canadian_airports.csv not found. Please make sure the file is in the correct location.")
    CANADIAN_AIRPORTS = []
except KeyError:
    print("Error: 'code' column not found in the canadian_airports.csv file.")
    CANADIAN_AIRPORTS = []

# Generate all possible departure and arrival pairs
all_possible_pairs = set()
if CANADIAN_AIRPORTS:
  for dep in CANADIAN_AIRPORTS:
      for arr in CANADIAN_AIRPORTS:
          if dep != arr:
              all_possible_pairs.add(tuple([dep, arr]))

logged_pairs = set()
try:
    with open('/content/flights_found_dep_arr_log.csv', 'r') as file:
        reader = csv.reader(file)
        try:
            header = next(reader)
            if header != ['dep', 'arr', 'number_of_flights']:
                file.seek(0)
        except StopIteration:
            pass # File is empty

        for row in reader:
            if len(row) >= 2:
                logged_pairs.add(tuple(row[:2]))
except FileNotFoundError:
    print("Error: flights_found_dep_arr_log.csv not found. Cannot check combinations.")
    logged_pairs = set()


# Check if all possible combinations exist in the logged pairs
if all_possible_pairs and logged_pairs:
    if all_possible_pairs.issubset(logged_pairs):
        print("All possible combinations exist in the log.")
    else:
        print("Not all possible combinations exist in the log.")
        missing_pairs = all_possible_pairs - logged_pairs
        print("Missing pairs:", missing_pairs)
elif not CANADIAN_AIRPORTS:
    print("Could not generate possible pairs due to issues reading canadian_airports.csv")
elif not logged_pairs:
    print("No logged pairs found in flights_found_dep_arr_log.csv")

All possible combinations exist in the log.


In [None]:
# # UNUSED
# def find_all_canadian_flights():
#     all_flights = []
#     found_link = []
#     found_flights_df = pd.DataFrame(columns=['airport1', 'airport2'])

#     for origin_airport in CANADIAN_AIRPORTS:
#         for destination_airport in CANADIAN_AIRPORTS:

#             if [destination_airport, origin_airport] in found_link:
#               continue

#             if origin_airport != destination_airport:  # Don't check flights to the same airport
#                 print(f"Searching for flights from {origin_airport} to {destination_airport}...")
#                 flights = get_flights_between_airports(origin_airport, destination_airport)

#                 if flights:
#                     print(f"Found {len(flights)} flights from {origin_airport} to {destination_airport}.")
#                     all_flights.extend(flights)
#                     found_flights_df = pd.concat([found_flights_df, pd.DataFrame({'airport1': [origin_airport], 'airport2': [destination_airport]})], ignore_index=True)
#                     found_link.append([destination_airport, origin_airport]) # This line is not needed if not skipping reverse links
#                 else:
#                     print(f"No flights found from {origin_airport} to {destination_airport}.")

#                 # Sleep to respect API rate limits (essential for free tier)
#                 # This will take a very long time with the full list of airports.
#                 # Consider reducing the number of airports or increasing the sleep time if hitting rate limits.
#                 time.sleep(10)  # Adjust this based on the API's rate limits

#     found_flights_df.to_csv('connected_canadian_airports.csv', index=False)

#     return all_flights