# Enter Trip Information

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# Enter a trip name (a unique identifier for the specified points of interest)
TRIP_NAME = "east"

# Enter your points of interest in the list below (include your starting location)
MY_POINTS_OF_INTEREST = [
    "San Francisco, California", # Starting Location
    "Yosemite Valley Visitor Center, Village Drive, Yosemite Valley, CA", # Yosemite NP
    "D L Bliss State Park, California 89, South Lake Tahoe, CA", # Lake Tahoe
    "Zion National Park Visitor Center, Zion – Mount Carmel Highway, Hurricane, UT", # Zion NP
    "Arches National Park Visitor Center, Moab, UT", # Arches NP
    "Monument Valley Navajo Tribal Park, Main Monument Valley Road, Oljato-Monument Valley, AZ", # Monument Valley
    "Island in the Sky Visitor Center, Grand View Point Road, Moab, UT", # Canyonlands NP
    "Bryce Canyon National Park Visitor Center, Utah 63, Bryce, UT", # Bryce Canyon NP
    "Moro Rock Trail, California", # Sequoia NP
    "Horseshoe Bend Parking Lot, Page, AZ", # Horseshoe Bend Trail
    "Grand Canyon Visitor Center, South Entrance Road, Grand Canyon Village, AZ", # Grand Canyon NP
    "Calf Creek Campground, Boulder, UT", # Grand Staircase-Escalante NM
    "Red Cliffs Recreation Area, Unnamed Road, Washington, UT", # Red Cliffs Recreation Nature Trail
    "Natural Bridges Visitor Center, Natural Bridge, Lake Powell, UT", # Natural Bridges NM
    "Kanarra Creek Trailhead, Kanarraville, UT", # Kanarra Creek Canyon Trail
    "San Simeon, CA", # South Big Sur Drive
    "Big Sur, CA", # Middle Big Sur Drive
    "Carmel-by-the-Sea, CA", # North Big Sur Drive
    "Castle Rock Entrance Station Parking Lot, Unnamed Road, Saratoga, CA", # Saratoga Gap Trail
]

# Collect Driving Distance/Duration Information

In [3]:
from src.data_collection import *
from config import GOOGLE_MAPS_API_KEY


# Determine distance and duration filename based on specified trip name
distance_duration_filename = "data/my_{}_points_of_interest_distance_duration.csv".format(TRIP_NAME)

# Try to create a distance and duration df containing all my points of interest from the filename
try:
    
    distance_duration_df = pd.read_csv(distance_duration_filename, index_col=0)

    # Create list of unique points of interest from df
    df_points_of_interest = set(pd.unique(distance_duration_df[['Venue 1', 'Venue 2']].values.ravel('K')))

    # Check if missing one or more of my points of interest in df
    if not set(MY_POINTS_OF_INTEREST).issubset(df_points_of_interest):
        
        raise Exception("Missing one or more of my points of interest in '{}'".format(distance_duration_filename))

# Create a distance and duration df with all my points of interest and save to the specified filename
except (FileNotFoundError, Exception) as e:
        
    # Query Google Maps API for one-way driving distances and durations
    distance_duration_data = query_gmaps_api_for_one_way_driving_distance_and_duration(MY_POINTS_OF_INTEREST, GOOGLE_MAPS_API_KEY)

    # Create DataFrame of one-way distances and durations
    distance_duration_df = create_distance_and_duration_df(distance_duration_data)

    # Save DataFrame to CSV
    distance_duration_df.to_csv(distance_duration_filename)
    
# Preview distance and duration df
distance_duration_df.head().sort_values('Distance (mi)', ascending=False)

Unnamed: 0,Venue 1,Venue 2,Distance (mi),Duration (s),Duration (hhmm)
3,"San Francisco, California","Arches National Park Visitor Center, Moab, UT",963,51219,14:13
4,"San Francisco, California","Monument Valley Navajo Tribal Park, Main Monum...",941,52195,14:29
2,"San Francisco, California","Zion National Park Visitor Center, Zion – Moun...",727,40019,11:06
1,"San Francisco, California","D L Bliss State Park, California 89, South Lak...",197,12542,3:29
0,"San Francisco, California","Yosemite Valley Visitor Center, Village Drive,...",191,14181,3:56


## *Optional : Display Full Name Squareform Distance/Duration Matrices*

In [3]:
# Add reverse travel information (B to A not just A to B) to distance and duration df
_df = add_reverse_travel_information_to_distance_duration_df(distance_duration_df)

# Create squareform matrices
distance_matrix = _df.pivot(index='Venue 1', columns='Venue 2', values='Distance (mi)').fillna(0).astype(int)
duration_matrix = _df.pivot(index='Venue 1', columns='Venue 2', values='Duration (s)').fillna(0).astype(int)
duration_matrix_hhmm = _df.pivot(index='Venue 1', columns='Venue 2', values='Duration (hhmm)').fillna("0:00")

In [None]:
# # Display distance matrix
# display(distance_matrix)

In [None]:
# # Display duration matrix
# display(duration_matrix)

In [None]:
# Display duration hhmm matrix
display(duration_matrix_hhmm)

## *Optional : Display Integer Name Squareform Distance/Duration Matrices*

In [4]:
# Add reverse travel information (B to A not just A to B) to distance and duration df
_df = add_reverse_travel_information_to_distance_duration_df(distance_duration_df)

# Convert venue columns to categorical type and create categorical code columns
_df['Venue 1'] = _df['Venue 1'].astype('category')
_df['Venue 1 Codes'] = _df['Venue 1'].cat.codes
_df['Venue 2'] = pd.Categorical(_df['Venue 2'], categories=_df['Venue 1'].cat.categories)
_df['Venue 2 Codes'] = _df['Venue 2'].cat.codes

# Create squareform matrices with codes
distance_matrix = _df.pivot(index='Venue 1 Codes', columns='Venue 2 Codes', values='Distance (mi)').fillna(0).astype(int)
duration_matrix = _df.pivot(index='Venue 1 Codes', columns='Venue 2 Codes', values='Duration (s)').fillna(0).astype(int)
duration_matrix_hhmm = _df.pivot(index='Venue 1 Codes', columns='Venue 2 Codes', values='Duration (hhmm)').fillna("0:00")

In [None]:
# # Preview new columns
# _df.sample(5)

In [None]:
# # Display distance matrix with code mappings
# display(distance_matrix)

# # Print dict of code: cat mappings for reference
# _ = dict(enumerate(_df['Venue 1'].cat.categories))
# for k, v in _.items():
#     print(k, ":", v)

In [None]:
# # Display duration matrix with code mappings
# display(duration_matrix)

# # Print dict of code: cat mappings for reference
# _ = dict(enumerate(_df['Venue 1'].cat.categories))
# for k, v in _.items():
#     print(k, ":", v)

In [None]:
# Display duration matrix hhmm with code mappings
display(duration_matrix_hhmm)

# Print dict of code: cat mappings for reference
_ = dict(enumerate(_df['Venue 1'].cat.categories))
for k, v in _.items():
    print(k, ":", v)

# Optimize Road Trip via Genetic Algorithm

Thank you to [Randal S. Olson](http://www.randalolson.com/) for the genetic algorithm code below (sourced from [this notebook](https://github.com/rhiever/Data-Analysis-and-Machine-Learning-Projects/blob/master/optimal-road-trip/Computing%20the%20optimal%20road%20trip%20across%20the%20U.S..ipynb) and further explained in [this blog post](http://www.randalolson.com/2015/03/08/computing-the-optimal-road-trip-across-the-u-s/)). All the credit for the code goes to him with a few minor adjustments made by me.

In [70]:
from src.genetic_algorithm import *

# Run the genetic algorithm
results = run_genetic_algorithm(MY_POINTS_OF_INTEREST, distance_duration_filename, generations=5000, population_size=100)

# Update HTML file to display correct results
update_results_html_file("./src/results_template.html", "./tmp/results.html", results)

Generation 0 best: 6628 | Unique genomes: 100
('Big Sur, CA', 'San Francisco, California', 'Zion National Park Visitor Center, Zion – Mount Carmel Highway, Hurricane, UT', 'Horseshoe Bend Parking Lot, Page, AZ', 'Monument Valley Navajo Tribal Park, Main Monument Valley Road, Oljato-Monument Valley, AZ', 'Natural Bridges Visitor Center, Natural Bridge, Lake Powell, UT', 'Yosemite Valley Visitor Center, Village Drive, Yosemite Valley, CA', 'Moro Rock Trail, California', 'D L Bliss State Park, California 89, South Lake Tahoe, CA', 'Castle Rock Entrance Station Parking Lot, Unnamed Road, Saratoga, CA', 'Carmel-by-the-Sea, CA', 'Arches National Park Visitor Center, Moab, UT', 'Island in the Sky Visitor Center, Grand View Point Road, Moab, UT', 'Bryce Canyon National Park Visitor Center, Utah 63, Bryce, UT', 'Grand Canyon Visitor Center, South Entrance Road, Grand Canyon Village, AZ', 'San Simeon, CA', 'Kanarra Creek Trailhead, Kanarraville, UT', 'Calf Creek Campground, Boulder, UT', 'Red Cl

Generation 4000 best: 2915 | Unique genomes: 87
('Red Cliffs Recreation Area, Unnamed Road, Washington, UT', 'San Simeon, CA', 'Big Sur, CA', 'Carmel-by-the-Sea, CA', 'Castle Rock Entrance Station Parking Lot, Unnamed Road, Saratoga, CA', 'San Francisco, California', 'D L Bliss State Park, California 89, South Lake Tahoe, CA', 'Yosemite Valley Visitor Center, Village Drive, Yosemite Valley, CA', 'Moro Rock Trail, California', 'Grand Canyon Visitor Center, South Entrance Road, Grand Canyon Village, AZ', 'Horseshoe Bend Parking Lot, Page, AZ', 'Monument Valley Navajo Tribal Park, Main Monument Valley Road, Oljato-Monument Valley, AZ', 'Natural Bridges Visitor Center, Natural Bridge, Lake Powell, UT', 'Arches National Park Visitor Center, Moab, UT', 'Island in the Sky Visitor Center, Grand View Point Road, Moab, UT', 'Calf Creek Campground, Boulder, UT', 'Bryce Canyon National Park Visitor Center, Utah 63, Bryce, UT', 'Zion National Park Visitor Center, Zion – Mount Carmel Highway, Hurric

In [71]:
# View map of results (map opens in new tab)
!open ./tmp/results.html

In [72]:
# View distance and duration df of results
results_df = create_results_df(distance_duration_filename, results)
print("Total driving distance {} miles ...".format(results_df['Distance (mi)'].sum()))
display(results_df)

Total driving distance 2915 miles ...


Unnamed: 0,Venue 1,Venue 2,Duration (hhmm),Distance (mi)
0,"Red Cliffs Recreation Area, Unnamed Road, Wash...","San Simeon, CA",8:42,562
1,"San Simeon, CA","Big Sur, CA",2:21,80
2,"Big Sur, CA","Carmel-by-the-Sea, CA",1:03,26
3,"Carmel-by-the-Sea, CA","Castle Rock Entrance Station Parking Lot, Unna...",1:25,70
4,"Castle Rock Entrance Station Parking Lot, Unna...","San Francisco, California",1:09,55
5,"San Francisco, California","D L Bliss State Park, California 89, South Lak...",3:29,197
6,"D L Bliss State Park, California 89, South Lak...","Yosemite Valley Visitor Center, Village Drive,...",4:36,202
7,"Yosemite Valley Visitor Center, Village Drive,...","Moro Rock Trail, California",4:29,194
8,"Moro Rock Trail, California","Grand Canyon Visitor Center, South Entrance Ro...",10:23,619
9,"Grand Canyon Visitor Center, South Entrance Ro...","Horseshoe Bend Parking Lot, Page, AZ",2:24,132


## *Optional : Save Results Map and DataFrame Shown Above*

In [73]:
import uuid

# Determine unique results identifier as tripName_resultsTotalDistance_threeCharUniqueIdentifier
unique_results_identifier = "{}_{}_{}".format(TRIP_NAME, str(results_df['Distance (mi)'].sum()), str(uuid.uuid4())[-3:])

In [74]:
unique_results_identifier

'east_2915_a53'

In [75]:
import os

# Save results map HTML
os.rename("./tmp/results.html", "./results/{}_results_map.html".format(unique_results_identifier))

In [76]:
# Save results df to CSV
results_df.to_csv("./results/{}_results_distance_duration.csv".format(unique_results_identifier))