In [1]:
import pandas as pd
import heapq
import folium

In [36]:
in_path = 'in'
cpp_data = pd.read_csv(f"{in_path}/awards_2026.csv")
iata_icao_data = pd.read_csv(f"{in_path}/iata-icao.csv")
per_diem_data = pd.read_csv(f"{in_path}/FY2026_PerDiemMasterRatesFile.csv")

In [37]:
# Filter the DataFrame to only include routes where the country code is US
iata_icao_data = iata_icao_data[
    (iata_icao_data['country_code'] == 'US')
]
out_path = "out"
iata_icao_data.to_csv(f"{out_path}/iata_icao_us_data.csv", index=False)
# Filter the DataFrame to only include routes where both origin and destination are in the United States
cpp_data = cpp_data[
    (cpp_data['ORIGIN_COUNTRY'] == 'UNITED STATES') &
    (cpp_data['DESTINATION_COUNTRY'] == 'UNITED STATES')
]
# Reset the index after filtering
cpp_data.reset_index(drop = True, inplace = True)
# Save to CSV for Svelte
cpp_data.to_csv(f"{out_path}/awards_us_2026.csv", index=False)

In [38]:
# cpp_data.nunique()
# cpp_data.isnull().sum()
# cpp_data.head()
# cpp_data.describe()
# cpp_data.info()
# cpp_data.columns
# cpp_data[cpp_data['ORIGIN_AIRPORT_ABBREV'] == 'SEA']
per_diem_data.head()

Unnamed: 0,ID,STATE,DESTINATION,COUNTY/LOCATION DEFINED,SEASON BEGIN,SEASON END,FY26 Lodging Rate,FY26 M&IE,Unnamed: 8,Unnamed: 9,Unnamed: 10
0,1,AL,Birmingham,Jefferson,,,$ 126,$ 80,,,
1,2,AL,Gulf Shores,Baldwin,October 1,February 28,$ 134,$ 74,,,
2,2,AL,Gulf Shores,Baldwin,March 1,May 31,$ 163,$ 74,,,
3,2,AL,Gulf Shores,Baldwin,June 1,July 31,$ 216,$ 74,,,
4,2,AL,Gulf Shores,Baldwin,August 1,September 30,$ 134,$ 74,,,


In [39]:
node = 'MSP'
node_df = cpp_data[(cpp_data['ORIGIN_AIRPORT_ABBREV'] == node) | (cpp_data['DESTINATION_AIRPORT_ABBREV'] == node)]
print(node_df)

       ITEM_NUM  AWARD_YEAR ORIGIN_AIRPORT_ABBREV DESTINATION_AIRPORT_ABBREV  \
25           28        2026                   ABQ                        MSP   
99          148        2026                   ANC                        MSP   
207         301        2026                   ATL                        MSP   
263         389        2026                   ATW                        MSP   
303         438        2026                   AUS                        MSP   
...         ...         ...                   ...                        ...   
10174     14731        2026                   MSP                        YUM   
11148     17031        2026                   ACV                        MSP   
11360     17560        2026                   DAL                        MSP   
11402     17674        2026                   ESC                        MSP   
11602     18186        2026                   MHK                        MSP   

      ORIGIN_CITY_NAME ORIGIN_STATE ORI

In [40]:
def get_fares(start, goal):
    # Filter for matching pairs in either direction
    match = cpp_data[((cpp_data['ORIGIN_AIRPORT_ABBREV'] == start) & (cpp_data['DESTINATION_AIRPORT_ABBREV'] == goal)) |
                     ((cpp_data['ORIGIN_AIRPORT_ABBREV'] == goal) & (cpp_data['DESTINATION_AIRPORT_ABBREV'] == start))]
    
    if not match.empty:
        # Extract all columns ending with '_FARE'
        fare_columns = [col for col in match.columns if col.endswith('_FARE')]
        fares = {col: match[col].iloc[0] for col in fare_columns}
        
        return fares
    else:
        print(f"No direct fares found between {start} and {goal}.")
        return None

# Example usage of the fare lookup
start = 'DCA'
goal = 'ORD'
fares = get_fares(start, goal)
if fares is not None:
    print(f"Direct fares between {start} and {goal}: {fares}")


Direct fares between DCA and ORD: {'YCA_FARE': np.int64(114), '_CA_FARE': np.int64(0), 'BUSINESS_FARE': np.int64(0), '_CP_FARE': np.int64(0)}


In [41]:
# Heuristic function for A* (currently using path cost only)
def heuristic(current_cost, weight):
    return current_cost + weight

In [42]:
# Build the graph using ORIGIN and DESTINATION airport codes with YCA_FARE as edge weights
graph = {}
for _, row in cpp_data.iterrows():
    start = row['ORIGIN_AIRPORT_ABBREV']
    goal = row['DESTINATION_AIRPORT_ABBREV']
    weight = row['YCA_FARE']
    
    if start not in graph:
        graph[start] = []
    if goal not in graph:
        graph[goal] = []
    
    # Assuming bidirectional flights
    graph[start].append((goal, weight))
    graph[goal].append((start, weight))

# A* Algorithm Implementation
def a_star(graph, start, goal):
    queue = []
    heapq.heappush(queue, (0, start))
    came_from = {start: None}
    cost_so_far = {start: 0}
    
    while queue:
        current_priority, current_node = heapq.heappop(queue)
        
        if current_node == goal:
            break
        
        for neighbor, weight in graph.get(current_node, []):
            new_cost = heuristic(cost_so_far[current_node], weight)
            if neighbor not in cost_so_far or new_cost < cost_so_far[neighbor]:
                cost_so_far[neighbor] = new_cost
                priority = new_cost  # No heuristic used here
                heapq.heappush(queue, (priority, neighbor))
                came_from[neighbor] = current_node
    
    # Reconstruct path
    path = []
    node = goal
    while node is not None:
        path.append(node)
        node = came_from.get(node)
    path.reverse()
    
    return path, cost_so_far.get(goal, float('inf'))

# Example run: CDC to MID
start = 'MSP'
goal = 'DCA'
path, total_cost = a_star(graph, start, goal)
print(f"Path from {start} to {goal}: {path}")
print(f"Total YCA Fare: ${total_cost}")

Path from MSP to DCA: ['MSP', 'ORD', 'DCA']
Total YCA Fare: $203


In [43]:
# Map IATA codes to their coordinates
iata_coords = {row['iata']: (row['latitude'], row['longitude']) for _, row in iata_icao_data.iterrows()}

def plot_path_on_map(path):
    # Initialize the map centered on the first airport in the path
    if path[0] in iata_coords:
        start_coords = iata_coords[path[0]]
        route_map = folium.Map(location = start_coords, zoom_start = 5)
    else:
        route_map = folium.Map(zoom_start = 5)
    
    # Plot each airport in the path
    for i in range(len(path) - 1):
        origin = path[i]
        destination = path[i + 1]
        if origin in iata_coords and destination in iata_coords:
            folium.Marker(iata_coords[origin], popup = origin, icon = folium.Icon(color = 'green')).add_to(route_map)
            folium.Marker(iata_coords[destination], popup=destination, icon=folium.Icon(color = 'red')).add_to(route_map)
            folium.PolyLine(locations=[iata_coords[origin], iata_coords[destination]], color = 'blue').add_to(route_map)
    
    return route_map

# Plot the path on the map
route_map = plot_path_on_map(path)
out_path = "out"
route_map.save(f"{out_path}/flight_route_map.html")
print("Map has been saved as 'flight_route_map.html'.")

Map has been saved as 'flight_route_map.html'.


In [None]:
# ---- Export graph + airport coordinates to JSON for Svelte ----

import json

# Build airport dictionary from your coordinate map
# (Make sure your iata_coords dict contains lat/lon for each airport)
airport_dict = {
    code: {
        "code": code,
        "name": code,        # You can replace with full airport name later
        "lat": coords[1],
        "lon": coords[0]
    }
    for code, coords in iata_coords.items()
}

# Convert adjacency list to the Svelte-friendly format
adjacency_list = {
    origin: [
        {"to": dest, "cost": cost}
        for dest, cost in neighbors
    ]
    for origin, neighbors in graph.items()
}

graph_json = {
    "airports": airport_dict,
    "edges": adjacency_list
}
out_path = "out"
with open(f"{out_path}/graph.json", "w") as f:
    json.dump(graph_json, f, indent=2)

print("Export complete!")
print("Airports:", len(airport_dict))
print("Nodes with outgoing edges:", len(adjacency_list))

Export complete!
Airports: 2030
Nodes with outgoing edges: 312


In [47]:
import json

out_path = "out"
with open(f"{out_path}/graph.json", "r") as f:
    graph = json.load(f)

airports = graph["airports"]
edges = graph["edges"]

fixed_airports = {}

for code, info in airports.items():
    wrong_lat = info["lat"]
    wrong_lon = info["lon"]

    # Swap them ALWAYS
    correct_lat = wrong_lon
    correct_lon = wrong_lat

    fixed_airports[code] = {
        "code": code,
        "name": info["name"],
        "lat": correct_lat,
        "lon": correct_lon
    }

fixed_graph = {
    "airports": fixed_airports,
    "edges": edges
}

with open(f"{out_path}/graph_fixed.json", "w") as f:
    json.dump(fixed_graph, f, indent=2)

print("Forced-swap correction complete.")


Forced-swap correction complete.


In [30]:
import heapq

# Build the graph using ORIGIN and DESTINATION airport codes with YCA_FARE as edge weights
graph = {}
for _, row in cpp_data.iterrows():
    start = row['ORIGIN_AIRPORT_ABBREV']
    goal = row['DESTINATION_AIRPORT_ABBREV']
    weight = row['YCA_FARE']
    
    if start not in graph:
        graph[start] = []
    if goal not in graph:
        graph[goal] = []
    
    # Assuming bidirectional flights
    graph[start].append((goal, weight))
    graph[goal].append((start, weight))

# A* Algorithm Implementation
def a_star(graph, start, goal):
    queue = []
    heapq.heappush(queue, (0, start))
    came_from = {start: None}
    cost_so_far = {start: 0}
    
    while queue:
        current_priority, current_node = heapq.heappop(queue)
        
        if current_node == goal:
            break
        
        for neighbor, weight in graph.get(current_node, []):
            new_cost = heuristic(cost_so_far[current_node], weight)
            if neighbor not in cost_so_far or new_cost < cost_so_far[neighbor]:
                cost_so_far[neighbor] = new_cost
                priority = new_cost
                heapq.heappush(queue, (priority, neighbor))
                came_from[neighbor] = current_node
    
    # Reconstruct path
    path = []
    node = goal
    while node is not None:
        path.append(node)
        node = came_from.get(node)
    path.reverse()
    
    return path, cost_so_far.get(goal, float('inf'))

# Function to find the optimal meeting point for multiple travelers
def optimal_meeting_point(graph, start_points):
    min_total_cost = float('inf')
    best_meeting_point = None
    
    for airport in graph.keys():
        total_cost = 0
        for start in start_points:
            _, cost = a_star(graph, start, airport)
            total_cost += cost
        
        if total_cost < min_total_cost:
            min_total_cost = total_cost
            best_meeting_point = airport
    
    return best_meeting_point, min_total_cost

# Function to print the paths from multiple starting points to a target airport
def print_paths_to_destination(graph, start_points, destination):
    for start in start_points:
        path, cost = a_star(graph, start, destination)
        print(f"Path from {start} to {destination}: {path}")
        print(f"Total YCA Fare from {start} to {destination}: ${cost}")

# Example run with a list of starting points
start_points = ['MSP', 'DCA']
# start_points = ['DCA', 'FLL', 'ORD', 'RDU']
# start_points = ['DCA', 'MIA', 'MDW', 'RDU', 'DEN']
# start_points = ['ATL', 'DFW', 'DEN', 'LIT', 'PIT', 'MIA', 'BUF', 'DCA']
# start_points = ['ATL', 'DFW', 'DEN', 'LIT', 'PIT', 'MIA', 'BUF']
# start_points = ['BDL', 'MSP', 'BWI', 'RDU', 'ORD', 'DCA', 'IAD', 'MIA', 'FLL'] # DS
# start_points = ['ABE', 'OKC', 'SEA', 'BWI']
# start_points = ['ABE', 'OKC']
meeting_point, total_cost = optimal_meeting_point(graph, start_points)
print(f"Optimal meeting point for {start_points}: {meeting_point}")
print(f"Total YCA Fare: ${total_cost}")

print_paths_to_destination(graph, start_points, meeting_point)

Optimal meeting point for ['MSP', 'DCA']: DCA
Total YCA Fare: $203
Path from MSP to DCA: ['MSP', 'ORD', 'DCA']
Total YCA Fare from MSP to DCA: $203
Path from DCA to DCA: ['DCA']
Total YCA Fare from DCA to DCA: $0


In [None]:
# Function to compute paths, show both A* and direct paths, and print both YCA fares
def print_paths_to_destination_with_direct_fares(graph, start_points, destination):
    results = []
    for start in start_points:
        path, computed_cost = a_star(graph, start, destination)

        # Check if a direct fare exists between start and destination
        direct_fares = get_fares(start, destination)
        if direct_fares and 'YCA_FARE' in direct_fares:
            direct_cost = direct_fares['YCA_FARE']
            direct_path = [start, destination]  # Direct path
        else:
            direct_cost = None  # No direct fare available
            direct_path = None

        results.append((start, path, computed_cost, direct_path, direct_cost))
    
    return results

# Example run with start points
start_points = ['MSP', 'DCA']
# start_points = ['IAD', 'MIA', 'MDW', 'RDU'] # 629
# start_points = ['IAD', 'MIA', 'ORD', 'RDU'] # 559
# start_points = ['DCA', 'MIA', 'ORD', 'RDU'] # 464
# start_points = ['IAD', 'FLL', 'ORD', 'RDU'] # 543
# start_points = ['DCA', 'FLL', 'ORD', 'RDU'] # $1 difference! 434

# Compute A* optimal meeting point
optimal_meeting_point_result, optimal_total_cost = optimal_meeting_point(graph, start_points)

# Compute meeting point and total cost using preferred direct fares
paths_results_with_direct_fares = print_paths_to_destination_with_direct_fares(graph, start_points, optimal_meeting_point_result)

direct_fare_total_cost = sum(cost for _, _, cost, _, cost in paths_results_with_direct_fares if cost is not None)

# Print both solutions
optimal_meeting_point_result, optimal_total_cost, (optimal_meeting_point_result, direct_fare_total_cost)

# Display both paths and fares
for start, path, computed_cost, direct_path, direct_cost in paths_results_with_direct_fares:
    print(f"From {start} to {optimal_meeting_point_result}:")
    print(f"  A* Path: {path} | A* Fare: ${computed_cost}")
    if direct_path:
        print(f"  Direct Path: {direct_path} | Direct Fare: ${direct_cost}")
    print()

# Print final total costs for both approaches
print(f"Total A* Optimal Fare: ${optimal_total_cost}")
print(f"Total YCA Direct Fare (if preferred): ${direct_fare_total_cost}")

No direct fares found between DCA and DCA.
From MSP to DCA:
  A* Path: ['MSP', 'ORD', 'DCA'] | A* Fare: $203
  Direct Path: ['MSP', 'DCA'] | Direct Fare: $573

From DCA to DCA:
  A* Path: ['DCA'] | A* Fare: $0

Total A* Optimal Fare: $203
Total YCA Direct Fare (if preferred): $573
