In [1]:
# import libraries
import pandas as pd
import openpyxl 
from collections import Counter
import time

In [2]:
# function to determine whether a number is prime or not
def is_prime(num):
    if num < 2:
        return False
    for i in range(2, int(num**0.5) + 1):
        if num % i == 0:
            return False
    return True

In [3]:
# function to generate an array of size n of unique prime numbers
def generate_prime_numbers(n):
    primes = []
    num = 2
    while len(primes) < n:
        if is_prime(num):
            primes.append(num)
        num += 1
    return primes

In [4]:
# build airport frequency dictionary
def airport_freq_dict(excel_file, sheet_name):
    df = pd.read_excel(excel_file, sheet_name=sheet_name)
    airports_freq_dict = Counter(df["ORIGIN"])

    return airports_freq_dict

In [5]:
# function to build the lookup table of airports
# stores airport code with a prime number in a dictionary
def build_lookup_table(excel_sheet, sheet_name, airports_freq_dict):
    sorted_airport_list = list(airports_freq_dict.keys())
    
    lookup_table = {}
    prime_num = 2

    for airport in sorted_airport_list:
        while not is_prime(prime_num):
            prime_num += 1
        lookup_table[airport] = prime_num
        prime_num += 1
        
    return lookup_table

In [6]:
# function to search the excel file and find the distance between origin-destination pairs
def distance_between(df, orig, dest):
    row = df.loc[df['ORIGIN'] == orig]
    col = row.loc[row['DEST'] == dest]
    distance = col['DISTANCE_IN_MILES']
    return distance

In [7]:
# this is taking a long time because of nested for-loop

# ordered dict hashtable implementation
# (prime: distance) as (key:value)
#def build_dict_hashtable(excel_file, sheet_name, lookup_table):
#    route_hashtable = {}
    # for every airport in the lookup table with each other airport
#    for item_1 in lookup_table:
#        for item_2 in lookup_table:
            # get prime number for each and compute unique key to store the distance
#            key = lookup_table[item_1]*lookup_table[item_2]
#            route_hashtable[key] = distance_between(excel_file, sheet_name, item_1, item_2)

In [8]:
def build_dict_hashtable(df, lookup_table):
    route_hashtable = {}
    # for every airport in the lookup table with each other airport
    for key_1, value_1 in lookup_table.items():
        print(key_1)
        print(time.time())
        for key_2, value_2 in lookup_table.items():
            # get prime number for each and compute unique key to store the distance
            key = value_1*value_2
            route_hashtable[key] = distance_between(df, key_1, key_2)

In [9]:
# mock main method
# sample input
excel_file = "//Users/ellasobhani/GPT/Distance_of_All_Airports_20230606_133617.xlsx"
sheet_name = "Sheet1"
orig = "06A"
dest = "A43"
df = pd.read_excel(excel_file, sheet_name=sheet_name)

# list of unique airports
#airports = airports_list(excel_file, sheet_name)
#n = len(airports)
#print(n)

In [None]:
# create a frequency dictionary of the airports and sort it by frequency in descending order
t0 = time.time()
airport_freq = airport_freq_dict(excel_file, sheet_name)
t1 = time.time()
print(t1-t0)
#print(airport_freq)

# assign each airport in the freq dictionary with a prime number
t2 = time.time()
lookup_table = build_lookup_table(excel_file, sheet_name, airport_freq)
t3 = time.time()
print(t3-t2)
#print(lookup_table)

#build dict hashtable
dict_hashtable = build_dict_hashtable(df, lookup_table)
#print(dict_hashtable)

19.645903825759888
0.015407323837280273
01A
1688744103.576898
06A
1688744137.2249181
09A
1688744170.824262
1AK
1688744204.524608


In [None]:
# query function
def query(hashtable, orig, dest):
    key = lookup_table[orig]*lookup_table[dest]
    return hashtable[key]

test_query = query(dict_hashtable, orig, dest)
print(test_query)