In [22]:
# Question:
# What is the shortest distance to fly between all fifty States?

# I'll be using the dataset: 
# https://www.kaggle.com/datasets/flashgordon/usa-airport-dataset

# Here is some info about the attributes present in the dataset:

# Origin_airport: Three letter airport code of the origin airport
# Destination_airport: Three letter airport code of the destination airport
# Origin_city: Origin city name
# Destination_city: Destination city name
# Passengers: Number of passengers transported from origin to destination
# Seats: Number of seats available on flights from origin to destination
# Flights: Number of flights between origin and destination (multiple records for one month, many with flights > 1)
# Distance: Distance (to nearest mile) flown between origin and destination
# Fly_date: The date (yyyymm) of flight
# Origin_population: Origin city's population as reported by US Census
# Destination_population: Destination city's population as reported by US Census

In [39]:
import pandas as pd
import random
import numpy as np

In [None]:
# Let's get the data
dfFlights = pd.read_csv("Airports2.csv")

# Just to make it easier let's just keep what we need
dfFlights = dfFlights[['Origin_city','Destination_city','Distance']].copy()

# Drop duplicates
dfFlights = dfFlights.drop_duplicates()

dfFlights.head(10)


In [None]:
# Now we'll change the city to just have the State instead.
# Get the state code list

dfFlights["Origin_state"]=''
dfFlights["Destination_state"]=''

stateLookup = [ ['Alabama','AL'], ['Alaska','AK'], ['Arizona','AZ'], ['Arkansas','AR'], ['California','CA'], ['Colorado','CO'], ['Connecticut','CT'], ['Delaware','DE'], ['District of Columbia','DC'], ['Florida','FL'], ['Georgia','GA'], ['Hawaii','HI'], ['Idaho','ID'], ['Illinois','IL'], ['Indiana','IN'], ['Iowa','IA'], ['Kansas','KS'], ['Kentucky','KY'], ['Louisiana','LA'], ['Maine','ME'], ['Maryland','MD'], ['Massachusetts','MA'], ['Michigan','MI'], ['Minnesota','MN'], ['Mississippi','MS'], ['Missouri','MO'], ['Montana','MT'], ['Nebraska','NE'], ['Nevada','NV'], ['New Hampshire','NH'], ['New Jersey','NJ'], ['New Mexico','NM'], ['New York','NY'], ['North Carolina','NC'], ['North Dakota','ND'], ['Ohio','OH'], ['Oklahoma','OK'], ['Oregon','OR'], ['Pennsylvania','PA'], ['Rhode Island','RI'], ['South Carolina','SC'], ['South Dakota','SD'], ['Tennessee','TN'], ['Texas','TX'], ['Utah','UT'], ['Vermont','VT'], ['Virginia','VA'], ['Washington','WA'], ['West Virginia','WV'], ['Wisconsin','WI'], ['Wyoming','WY'] ]

for state in stateLookup:
    dfFlights.loc[dfFlights['Origin_city'].str.contains(', ' + state[1]), 'Origin_state'] = state[0]
    dfFlights.loc[dfFlights['Destination_city'].str.contains(', ' + state[1]), 'Destination_state'] = state[0]

dfFlights.head(10)


In [211]:
# Pick a random starting airport

def LookForRandomPath():

    r = random.randrange(0,len(dfFlights)-1)

    currentCity = (dfFlights.iloc[r])

    statesVisited = []
    citiesVisited = []
    totalDistance = 0

    while len(statesVisited)!=50:

        dfPossibleCities = dfFlights[~dfFlights['Destination_state'].isin(statesVisited)]
        dfPossibleCities = dfPossibleCities.query("Origin_city==@currentCity.Origin_city and Distance!=0")
        
        if (len(dfPossibleCities))==0:
            return -1,[],[]
            
        nextCity = dfPossibleCities.sort_values("Distance").iloc[0].Destination_city
        nextState = dfPossibleCities.sort_values("Distance").iloc[0].Destination_state
        totalDistance += dfPossibleCities.sort_values("Distance").iloc[0].Distance

        citiesVisited.append(nextCity)
        statesVisited.append(nextState)
    
    return totalDistance, statesVisited, citiesVisited

In [219]:

best = 100000000
tries = 0
while True==True:

     totalDistance, statesVisited, citiesVisited = LookForRandomPath()
     
     tries = tries + 1
     if tries%500==0:
          print("Tries:",tries)

     if totalDistance < best and totalDistance!=-1:
          print(tries, totalDistance, statesVisited)
          best = totalDistance
          print("")
          #if tries!=-1:
          #     for s in statesVisited:
          #          print(s)


2 34969 ['Illinois', 'Indiana', 'Wisconsin', 'Michigan', 'Iowa', 'Ohio', 'Missouri', 'Minnesota', 'Kentucky', 'Tennessee', 'Pennsylvania', 'Nebraska', 'West Virginia', 'Kansas', 'South Dakota', 'New York', 'Alabama', 'Georgia', 'Arkansas', 'North Carolina', 'Virginia', 'Mississippi', 'North Dakota', 'South Carolina', 'Oklahoma', 'District of Columbia', 'Maryland', 'Delaware', 'Louisiana', 'New Jersey', 'Texas', 'Vermont', 'Connecticut', 'Florida', 'Colorado', 'Massachusetts', 'New Hampshire', 'Rhode Island', 'Wyoming', 'Maine', 'Montana', 'New Mexico', 'Idaho', 'Utah', 'Arizona', 'Nevada', 'Washington', 'Oregon', 'California', 'Alaska']

