<a href="https://colab.research.google.com/github/frank-morales2020/MLxDL/blob/main/fp_routes_waypoints.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## waypoints

In [None]:
!pip install colab-env -q
!pip install geopy -q
!pip install datasets -q
!pip install airportsdata -q
import colab_env

In [None]:
import random
import math
from datasets import Dataset
from geopy.geocoders import Nominatim
from geopy.distance import geodesic
from tqdm import tqdm
from geopy.point import Point # Import Point object
import warnings
warnings.filterwarnings("ignore")


# Define data
airports = [
    # North America
    "ATL", "LAX", "ORD", "DFW", "DEN", "JFK", "SFO", "LAS", "SEA", "CLT",
    "MIA", "PHL", "EWR", "BOS", "PHX", "MCO", "IAH", "DAL", "FLL", "DTW",
    "MSP", "IAD", "BWI", "SLC", "SAN", "MDW", "TPA", "PDX", "BNA", "STL",

    # Europe
    "LHR", "CDG", "AMS", "FRA", "MAD", "BCN", "FCO", "MUC", "DUB", "ZRH",

    # Asia
    "HND", "PVG", "ICN", "SIN", "KUL", "BKK", "DXB", "DEL", "HKG", "NRT",

    # Caribbean
    "CUN", "PUJ", "SJU", "MBJ", "NAS",

    # South America
    "GRU", "BOG", "EZE", "SCL", "LIM",

    # Australia & New Zealand
    "SYD", "MEL", "BNE", "PER", "AKL"
]

aircraft_types = [
    "Boeing 737", "Airbus A320", "Boeing 777", "Airbus A330",
    "Embraer E190", "Bombardier CRJ900", "Boeing 747", "Airbus A380",
    "Cessna 172", "Piper PA-28",
    "Boeing 757", "Boeing 767", "Boeing 787", "Airbus A350"  # Added aircraft types
]

weather_conditions = [
    "Clear", "Cloudy", "Rainy", "Snowy", "Windy",
    "Foggy", "Stormy", "Sunny", "Overcast", "Partly Cloudy"
]

# Airport names dictionary
airport_names = {
    "ATL": "Hartsfield-Jackson Atlanta International Airport",
    "LAX": "Los Angeles International Airport",
    "ORD": "Chicago O'Hare International Airport",
    "DFW": "Dallas/Fort Worth International Airport",
    "DEN": "Denver International Airport",
    "JFK": "John F. Kennedy International Airport",
    "SFO": "San Francisco International Airport",
    "LAS": "McCarran International Airport",
    "SEA": "Seattle-Tacoma International Airport",
    "CLT": "Charlotte Douglas International Airport",
    "MIA": "Miami International Airport",
    "PHL": "Philadelphia International Airport",
    "EWR": "Newark Liberty International Airport",
    "BOS": "Logan International Airport",
    "PHX": "Phoenix Sky Harbor International Airport",
    "MCO": "Orlando International Airport",
    "IAH": "George Bush Intercontinental Airport",
    "DAL": "Dallas Love Field",
    "FLL": "Fort Lauderdale-Hollywood International Airport",
    "DTW": "Detroit Metropolitan Airport",
    "MSP": "Minneapolis-Saint Paul International Airport",
    "IAD": "Washington Dulles International Airport",
    "BWI": "Baltimore/Washington International Airport",
    "SLC": "Salt Lake City International Airport",
    "SAN": "San Diego International Airport",
    "MDW": "Midway International Airport",
    "TPA": "Tampa International Airport",
    "PDX": "Portland International Airport",
    "BNA": "Nashville International Airport",
    "STL": "St. Louis Lambert International Airport",

    # Europe
    "LHR": "London Heathrow Airport",
    "CDG": "Charles de Gaulle Airport",
    "AMS": "Amsterdam Airport Schiphol",
    "FRA": "Frankfurt Airport",
    "MAD": "Adolfo Suárez Madrid–Barajas Airport",
    "BCN": "Barcelona–El Prat Airport",
    "FCO": "Leonardo da Vinci–Fiumicino Airport",
    "MUC": "Munich Airport",
    "DUB": "Dublin Airport",
    "ZRH": "Zurich Airport",

    # Asia
    "HND": "Haneda Airport",
    "PVG": "Shanghai Pudong International Airport",
    "ICN": "Incheon International Airport",
    "SIN": "Singapore Changi Airport",
    "KUL": "Kuala Lumpur International Airport",
    "BKK": "Suvarnabhumi Airport",
    "DXB": "Dubai International Airport",
    "DEL": "Indira Gandhi International Airport",
    "HKG": "Hong Kong International Airport",
    "NRT": "Narita International Airport",

    # Caribbean
    "CUN": "Cancún International Airport",
    "PUJ": "Punta Cana International Airport",
    "SJU": "Luis Muñoz Marín International Airport",
    "MBJ": "Sangster International Airport",
    "NAS": "Lynden Pindling International Airport",

    # South America
    "GRU": "São Paulo–Guarulhos International Airport",
    "BOG": "El Dorado International Airport",
    "EZE": "Ministro Pistarini International Airport",
    "SCL": "Arturo Merino Benítez International Airport",
    "LIM": "Jorge Chávez International Airport",

    # Australia & New Zealand
    "SYD": "Sydney Airport",
    "MEL": "Melbourne Airport",
    "BNE": "Brisbane Airport",
    "PER": "Perth Airport",
    "AKL": "Auckland Airport"
}


# Aircraft ranges (in kilometers)
aircraft_ranges = {
    "Boeing 737": (0, 6000),
    "Airbus A320": (0, 6100),
    "Boeing 777": (2440, 17000),
    "Airbus A330": (6400, 13430),
    "Embraer E190": (0, 4500),
    "Bombardier CRJ900": (0, 2405),
    "Boeing 747": (9200, 14815),
    "Airbus A380": (14800, 15700),
    "Cessna 172": (0, 1111),
    "Piper PA-28": (0, 963),
    "Boeing 757": (0, 7275),  # Range for Boeing 757
    "Boeing 767": (0, 11000), # Range for Boeing 767
    "Boeing 787": (15045, 15750), # Range for Boeing 787
    "Airbus A350": (15000, 16100)  # Range for Airbus A350
}

# Initialize Nominatim geolocator
geolocator = Nominatim(user_agent="flight_data_generator", timeout=20)

def create_flight_data_point(origin, destination, departure_date, aircraft, weather):
    try:
        location_origin = geolocator.geocode(origin)
        location_destination = geolocator.geocode(destination)
        if location_origin and location_destination:
            # --- Output Origin/Destination Airport Names (English-focused) ---
            origin_name = airport_names.get(origin)  # Get name from dictionary
            destination_name = airport_names.get(destination)  # Get name from dictionary
            if not origin_name:
                origin_name = origin #If no name use code
            if not destination_name:
                 destination_name = destination #If no name use code


            #print(f"\nOrigin: {origin} - Name: {origin_name}")
            #print(f"Destination: {destination} - Name: {destination_name}")

            # Calculate distance
            distance = geodesic(
                (location_origin.latitude, location_origin.longitude),
                (location_destination.latitude, location_destination.longitude)
            ).kilometers
            #print(f"Distance between {origin} ({origin}) and {destination}: {distance} kilometers")
            # Categorize distance
            if distance < 1000:
                distance_category = "short"
            elif distance < 4000:
                distance_category = "medium"
            elif distance < 10000:
                distance_category = "long"
            elif distance < 20000:
                distance_category = "longhaul"
            else:
                distance_category = "unknown"
            # Determine number of waypoints
            if distance_category == "short":
                num_waypoints = random.randint(1, 2)
            elif distance_category == "medium":
                num_waypoints = random.randint(3, 4)
            elif distance_category == "longhaul":
                num_waypoints = random.randint(6, 8)
            elif distance_category == "long":
                num_waypoints = random.randint(5, 7)
            else:
                num_waypoints = random.randint(9, 11)
            # Aircraft Selection based on Distance
            eligible_aircraft = [
                aircraft for aircraft, ranges in aircraft_ranges.items()
                if ranges[0] <= distance <= ranges[1]
            ]
            if eligible_aircraft:
                aircraft = random.choice(eligible_aircraft)
            else:
                aircraft = random.choice(aircraft_types)  # Fallback if no match
            #print(f"Selected Aircraft: {aircraft}")
            # Add waypoints (constrained to be closer to the route)
            waypoints = [(location_origin.latitude, location_origin.longitude)]
            waypoint_names = [origin]
            #print(f"Number of waypoints: {num_waypoints}")
            #print(f"Distance category: {distance_category}")
            #print('\n')
            for _ in range(num_waypoints):
                fraction = random.uniform(0.2, 0.9)
                waypoint_lat = location_origin.latitude + fraction * (
                    location_destination.latitude - location_origin.latitude)
                waypoint_lon = location_origin.longitude + fraction * (
                    location_destination.longitude - location_origin.longitude)
                radius = 500  # Radius in kilometers
                max_offset_lat = radius / 111.32
                max_offset_lon = radius / (111.32 * math.cos(math.radians(waypoint_lat)))
                offset_lat = random.uniform(-max_offset_lat, max_offset_lat)
                offset_lon = random.uniform(-max_offset_lon, max_offset_lon)
                waypoint = Point(waypoint_lat, waypoint_lon)
                waypoint_location = geolocator.reverse(
                    query=waypoint, exactly_one=True, timeout=20)
                waypoint_name = waypoint_location.address.split(",")[
                    0] if waypoint_location else "Waypoint"
                waypoint_names.append(waypoint_name)
                waypoints.append((waypoint_lat, waypoint_lon))
            waypoints.append((location_destination.latitude, location_destination.longitude))
            waypoint_names.append(destination)
            input_text = f"Calculate the waypoints from {origin} to {destination}. Departure: {departure_date}, Aircraft: {aircraft}, Weather: {weather}"
            number_of_waypoints = len(waypoints) - 2  # Calculate the number of waypoints
            return {
                "input": input_text,
                "label": num_waypoints, # Use num_waypoints as the label
                "distance": distance,
                "distance_category": distance_category,
                "waypoints": waypoints,
                "waypoint_names": waypoint_names
            }
        else:
            print(f"Could not find coordinates for {origin} or {destination}")
            return None
    except Exception as e:
        print(f"Error generating flight data point: {e}")
        return None

# --- Dataset Creation Loop
flight_data = []
number_routes = 2000  # You can adjust this number
for i in tqdm(range(number_routes), desc="Generating flight data"):
    origin = random.choice(airports)
    destination = random.choice(airports)
    while origin == destination:
        destination = random.choice(airports)
    departure_date = f"2024-{random.randint(1, 12):02}-{random.randint(1, 28):02}"
    aircraft = random.choice(aircraft_types)
    weather = random.choice(weather_conditions)
    data_point = create_flight_data_point(origin, destination, departure_date, aircraft, weather)
    if data_point:
        flight_data.append(data_point)
# Create the Dataset
dataset = Dataset.from_list(flight_data)
print(dataset)
dataset.save_to_disk("/content/gdrive/MyDrive/datasets/flight_dataset_waypoints")

Generating flight data: 100%|██████████| 2000/2000 [4:07:00<00:00,  7.41s/it]

Dataset({
    features: ['input', 'label', 'distance', 'distance_category', 'waypoints', 'waypoint_names'],
    num_rows: 2000
})





Saving the dataset (0/1 shards):   0%|          | 0/2000 [00:00<?, ? examples/s]

In [None]:
dataset = Dataset.load_from_disk("/content/gdrive/MyDrive/datasets/flight_dataset_waypoints")
dataset

Dataset({
    features: ['input', 'label', 'distance', 'distance_category', 'waypoints', 'waypoint_names'],
    num_rows: 100
})

In [None]:
dataset[4]

{'input': 'Calculate the waypoints from TPA to BOG. Departure: 2024-11-20, Aircraft: Boeing 737, Weather: Sunny',
 'label': 3,
 'distance': 2725.044115895908,
 'distance_category': 'medium',
 'waypoints': [[27.9791649, -82.5349276153517],
  [12.319376925792984, -76.89237906078647],
  [20.752671055674476, -79.93107100739284],
  [19.58292853579609, -79.50958835541668],
  [4.7020946, -74.14771320401294]],
 'waypoint_names': ['TPA', 'Waypoint', 'Waypoint', 'Waypoint', 'BOG']}

In [None]:
dataset[1]

{'input': 'Calculate the distance from ZRH to CLT. Departure: 2024-02-26, Aircraft: Airbus A330, Weather: Foggy',
 'label': 5,
 'distance': 7195.243708058524,
 'distance_category': 'long',
 'waypoints': [[47.463388050000006, 8.553366081422642],
  [40.713349041172556, -40.75210242711043],
  [42.32546849492803, -28.976421596942654],
  [40.649241863129745, -41.22037149104135],
  [40.61225975183321, -41.49050627419237],
  [43.6114600327595, -19.58293287963759],
  [35.21074145, -80.9457435226013]],
 'waypoint_names': ['ZRH',
  'Waypoint',
  'Waypoint',
  'Waypoint',
  'Waypoint',
  'Waypoint',
  'CLT']}