<a href="https://colab.research.google.com/github/frank-morales2020/Cloud_curious/blob/master/fp_routes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## waypoints

In [None]:
!pip install colab-env -q
!pip install geopy -q
!pip install datasets -q
!pip install airportsdata -q

In [22]:
!pip install colab-env -q
!pip install geopy -q
!pip install datasets -q

import random
import math  # Import math for calculations
from datasets import Dataset
from geopy.geocoders import Nominatim
from geopy.distance import geodesic
from tqdm import tqdm
from geopy.point import Point

# Define lists for airports, aircraft types, and weather conditions
airports = [
    "JFK", "LAX", "LGA", "BOS", "SFO", "ORD", "DFW", "ATL", "SEA", "MIA",
    "DEN", "IAH", "MSP", "DTW", "PHX", "CLT", "LAS", "MCO", "EWR", "PHL",
    "LHR", "CDG", "AMS", "FRA", "MAD", "FCO", "MUC", "BCN", "DUB", "ZRH",
    "IST", "OSL", "ARN", "BRU", "VIE", "PRG", "CPH", "LIS", "ATH", "HEL",
    "HND", "NRT", "ICN", "PVG", "PEK", "KIX", "BKK", "SIN", "DXB", "HKG",
    "DEL", "BOM", "KUL", "CGK", "MNL", "TPE", "CAN", "SZX", "CTU", "XIY"
]
aircraft_types = [
    "Boeing 747", "Airbus A320", "Boeing 777", "Boeing 737", "Airbus A330",
    "Boeing 757", "Airbus A321", "Airbus A319", "Boeing 787", "Embraer E190"
]
weather_conditions = ["Clear", "Cloudy", "Rainy", "Snowy", "Windy"]

# Initialize geolocator
geolocator = Nominatim(user_agent="flight_planner", timeout=20)

def create_flight_data_point(origin, destination, departure_date, aircraft, weather):
    """Creates a flight data point with distance category, waypoints, and waypoint names."""
    try:
        location_origin = geolocator.geocode(origin)
        location_destination = geolocator.geocode(destination)

        if location_origin and location_destination:
            # Calculate distance
            distance = geodesic(
                (location_origin.latitude, location_origin.longitude),
                (location_destination.latitude, location_destination.longitude)
            ).kilometers

            print('\n')
            print(f"Distance between {origin} and {destination}: {distance} kilometers")

            #Airlines often classify routes by distance for operational and economic reasons,
            #using categories like short-haul (e.g., under 1,000 km), medium-haul (e.g., 1,600-4,000 km),
            #and long-haul (e.g., over 4,000 km).

            # Categorize distance (with a default category)
            if distance < 1000:
                distance_category = "short"
            elif distance < 4000:
                distance_category = "medium"
            elif distance < 10000:
                distance_category = "long"
            elif distance < 20000:
                distance_category = "longhaul"
            else:
                distance_category = "unknown"

            # Determine number of waypoints
            if distance_category == "short":
                num_waypoints = random.randint(1, 2)
            elif distance_category == "medium":
                num_waypoints = random.randint(3, 4)
            elif distance_category == "longhaul":
                num_waypoints = random.randint(6, 8)
            elif distance_category == "long":
                num_waypoints = random.randint(5, 7)
            else:
                num_waypoints = random.randint(9, 11)

            # Add waypoints (constrained to be closer to the route)
            waypoints = [(location_origin.latitude, location_origin.longitude)]
            waypoint_names = [origin]

            print(f"Number of waypoints: {num_waypoints}")
            print(f"Distance category: {distance_category}")
            print('\n')

            for _ in range(num_waypoints):
                fraction = random.uniform(0.2, 0.8)
                waypoint_lat = location_origin.latitude + fraction * (location_destination.latitude - location_origin.latitude)
                waypoint_lon = location_origin.longitude + fraction * (location_destination.longitude - location_origin.longitude)

                radius = 500  # Radius in kilometers
                max_offset_lat = radius / 111.32
                max_offset_lon = radius / (111.32 * math.cos(math.radians(waypoint_lat)))

                offset_lat = random.uniform(-max_offset_lat, max_offset_lat)
                offset_lon = random.uniform(-max_offset_lon, max_offset_lon)

                waypoint_lat += offset_lat
                waypoint_lon += offset_lon

                waypoint = Point(waypoint_lat, waypoint_lon)
                waypoint_location = geolocator.reverse(query=waypoint, exactly_one=True, addressdetails=True)
                waypoint_name = waypoint_location.address.split(",")[0] if waypoint_location else "Waypoint"
                waypoint_names.append(waypoint_name)
                waypoints.append((waypoint_lat, waypoint_lon))

            waypoints.append((location_destination.latitude, location_destination.longitude))
            waypoint_names.append(destination)

            input_text = f"Calculate the distance from {origin} to {destination}. Departure: {departure_date}, Aircraft: {aircraft}, Weather: {weather}"

            return {
                "input": input_text,
                "label": distance_category,
                "waypoints": waypoints,
                "waypoint_names": waypoint_names
            }
        else:
            print(f"Could not find coordinates for {origin} or {destination}")
            return None

    except Exception as e:
        print(f"Error generating flight data point: {e}")
        return None

# --- Dataset Creation Loop ---
flight_data = []
number_routes = 10  # You can adjust this number

for _ in tqdm(range(number_routes), desc="Generating flight data"):
    origin = random.choice(airports)
    destination = random.choice(airports)
    while origin == destination:
        destination = random.choice(airports)
    departure_date = f"2024-{random.randint(1, 12):02}-{random.randint(1, 28):02}"
    aircraft = random.choice(aircraft_types)
    weather = random.choice(weather_conditions)

    data_point = create_flight_data_point(origin, destination, departure_date, aircraft, weather)
    if data_point:
        flight_data.append(data_point)

# --- Create the Dataset ---
dataset = Dataset.from_list(flight_data)
print(dataset)
dataset.save_to_disk("flight_dataset_with_waypoints")

Generating flight data:   0%|          | 0/10 [00:00<?, ?it/s]



Distance between MIA and MAD: 7109.69356063665 kilometers
Number of waypoints: 7
Distance category: long




Generating flight data:  10%|█         | 1/10 [00:09<01:24,  9.33s/it]



Distance between CGK and MAD: 12172.559978985006 kilometers
Number of waypoints: 7
Distance category: longhaul




Generating flight data:  20%|██        | 2/10 [00:18<01:13,  9.14s/it]



Distance between PHX and LAS: 13004.906923680726 kilometers
Number of waypoints: 6
Distance category: longhaul




Generating flight data:  30%|███       | 3/10 [00:26<01:00,  8.62s/it]



Distance between PEK and MNL: 2865.9803693453764 kilometers
Number of waypoints: 3
Distance category: medium




Generating flight data:  40%|████      | 4/10 [00:31<00:43,  7.20s/it]



Distance between PRG and MUC: 10542.524450269531 kilometers
Number of waypoints: 7
Distance category: longhaul




Generating flight data:  50%|█████     | 5/10 [00:40<00:39,  7.84s/it]



Distance between SEA and LIS: 8393.461302974172 kilometers
Number of waypoints: 7
Distance category: long




Generating flight data:  60%|██████    | 6/10 [00:49<00:32,  8.25s/it]



Distance between MIA and AMS: 13848.154681881959 kilometers
Number of waypoints: 7
Distance category: longhaul




Generating flight data:  70%|███████   | 7/10 [00:58<00:25,  8.51s/it]



Distance between PHL and MIA: 1630.7918468072676 kilometers
Number of waypoints: 4
Distance category: medium




Generating flight data:  80%|████████  | 8/10 [01:04<00:15,  7.71s/it]



Distance between DUB and CTU: 676.5232560843866 kilometers
Number of waypoints: 1
Distance category: short




Generating flight data:  90%|█████████ | 9/10 [01:07<00:06,  6.25s/it]



Distance between CAN and DXB: 5459.237471205346 kilometers
Number of waypoints: 7
Distance category: long




Generating flight data: 100%|██████████| 10/10 [01:16<00:00,  7.64s/it]

Dataset({
    features: ['input', 'label', 'waypoints', 'waypoint_names'],
    num_rows: 10
})





Saving the dataset (0/1 shards):   0%|          | 0/10 [00:00<?, ? examples/s]

In [23]:
dataset

Dataset({
    features: ['input', 'label', 'waypoints', 'waypoint_names'],
    num_rows: 10
})

In [24]:
dataset[9]

{'input': 'Calculate the distance from CAN to DXB. Departure: 2024-02-19, Aircraft: Boeing 747, Weather: Rainy',
 'label': 'long',
 'waypoints': [[49.1813403, -0.3635615],
  [28.195739951019338, 40.26631446572423],
  [39.54913666699523, 33.926827270115794],
  [46.551574079095644, 18.0195396159328],
  [41.133354278683456, 18.753091629009944],
  [35.628987425271205, 40.893903785907334],
  [35.623121079138585, 32.27758685936468],
  [44.72059943625497, 19.157995109519398],
  [25.2500008, 55.3760231]],
 'waypoint_names': ['CAN',
  'بنيّة عمّاش',
  'Çelebiuşağı',
  'Somogyszil',
  'Waypoint',
  'ناحية مركدة',
  'Waypoint',
  'M-I 113',
  'DXB']}