# [Travelbrag](http://travelbrag.ca)
Andrea & Brett's travelogue.

In [30]:
import datetime
import pandas

cities = pandas.read_csv("cities.csv")
travelogue = pandas.read_csv("travelogue.csv", parse_dates=["arrived", "departed"])
today = datetime.date.today()
colours = {frozenset(["Andrea", "Brett"]): "#00FA21", frozenset(["Andrea"]): "#F2FA00",
           frozenset(["Brett"]): "#005CFA"}

# What was our last vacation?
"Vacation" is classified as the last time Andrea and I travelled together.

In [31]:
vacations = travelogue[travelogue["travellers"] == "Andrea & Brett"]
last_vacation = vacations.sort_values(by="departed").tail(1).iloc[0]

print("Our last vacation was {}, {} days ago in {}, {}.".format(last_vacation["purpose"],
                                                                (today - last_vacation["departed"].date()).days,
                                                                last_vacation["city"], last_vacation["country"]))

Our last vacation was Civic Holiday, 68 days ago in Victoria, CAN.


# Where have we been in the last 12 months?

In [32]:
import datetime

year_ago = today - datetime.timedelta(days=365)
travelogue[travelogue["arrived"] >= year_ago].sort_values(by="arrived")

Unnamed: 0,year,purpose,travellers,city,country,arrived,departed,photos,comment
147,2015,PyCon CA/PyData NYC,Andrea & Brett,Toronto,CAN,2015-11-06,2015-11-08,,
148,2015,PyCon CA/PyData NYC,Andrea & Brett,New York,USA,2015-11-08,2015-11-11,,
149,2015,PyCon CA/PyData NYC,Andrea & Brett,Union City,USA,2015-11-11,2015-11-15,,
150,2015,PyCon CA/PyData NYC,Andrea & Brett,New Haven,USA,2015-11-14,2015-11-14,,
145,2015,US Thanksgiving,Andrea & Brett,Meridian,USA,2015-11-23,2015-11-30,,
146,2015,US Thanksgiving,Andrea & Brett,Boise,USA,2015-11-28,2015-11-28,,
144,2015,MLADS,Brett,Redmond,USA,2015-12-06,2015-12-11,,
168,2016,5-year wedding anniversary,Andrea & Brett,Tofino,CAN,2016-02-12,2016-02-16,,
169,2016,5-year wedding anniversary,Andrea & Brett,Ucluelet,CAN,2016-02-15,2016-02-15,,
170,2016,Python Day,Brett,Redmond,USA,2016-04-24,2016-04-30,,


# Where have we visited?

In [33]:
full_country_names = {
    "CAN": "Canada",
    "USA": "America",
    "GBR": "Great Britain",
    "JPN": "Japan",
    "BEL": "Belgium",
    "CZE": "Czech Republic",
    "ESP": "Spain",
    "FRA": "France",
    "CHE": "Switzerland",
    "CUB": "Cuba",
    "ARG": "Argentina",
    "DOM": "Dominican Republic",
}

locations = travelogue[['city', 'country']].drop_duplicates()
countries = locations['country'].drop_duplicates().tolist()
city_names = tuple((row[0], row[1]) for row in locations.values.tolist())

print("(In chronological order of first visit)")
print()
print(len(cities), "cities across", len(countries), "countries:")
for country in countries:
    print("   ", full_country_names[country])
    for city, _ in filter(lambda x: x[1] == country, city_):
        print("       ", city)

(In chronological order of first visit)

99 cities across 12 countries:
    America
        Anaheim
        Yosemite Valley
        Chicago
        Mammoth Lakes
        Washington, D.C.
        Las Vegas
        Montclair
        New York
        Atlantic City
        Addison
        Pittsburgh
        Seattle
        Rosemont
        Roseville
        Langley
        Meridian
        Charlottesville
        Mountain View
        San Francisco
        Raleigh
        Atlanta
        Portland
        Monterey
        Santa Clara
        Ann Arbor
        Redmond
        Boise
        Union City
        New Haven
        Austin
        Palo Alto
    Canada
        Sackville
        Summerside
        Yarmouth
        Annapolis Royal
        Amherst
        Digby
        Kentville
        Moncton
        Shediac
        Calgary
        Edmonton
        Victoria
        Toronto
        Vancouver
        Bowen Island
        Montréal
        Québec
        Fredericton
        Halifax
     

# Is the data valid?
Manually entering data into a spreadsheet is always error-prone, so some quick checks are always useful to catch common mistakes.

In [34]:
import datetime
import re
import urllib.parse

# CITIES
# Latitude
if any(lat < -90 or lat > 90 for lat in cities['latitude']):
    raise ValueError('malformed latitude')
    
# Longitude
if any(lng < -180 or lng > 180 for lng in cities['longitude']):
    raise ValueError('malformed longitude')

# TRAVELOGUE
# Year
if (travelogue['year'] < 1995).any() or (travelogue['year'] > datetime.date.today().year).any():
    raise ValueError('year not between 1995 and today')

# Travellers
if any(x not in {"Andrea", "Brett", "Andrea & Brett"} for x in travelogue['travellers']):
    raise ValueError('unrecognized travellers')
    
# City
if any(pandas.isnull(travelogue['city'])):
    raise ValueError('missing the city in the travelogue data')
elif not all(travelogue["city"].isin(cities["city"].values)):
    raise ValueError("city in travelogue but not in cities")

# Country
if any(len(x) != 3 or x.upper() != x for x in travelogue['country']):
    bad_countries = []
    raise ValueError('malformed country')
elif not all(travelogue["country"].isin(cities["country"].values)):
    raise ValueError("country in travelogue but not in cities")
elif len(pandas.merge(travelogue, cities, on=["city", "country"])) != len(travelogue):
    raise ValueError("city/country in travelogue not in cities data")
    
# Arrived/Departed
if any(x.arrived > x.departed for x in travelogue.itertuples()):
    raise ValueError("arrival date passed departure date")
    
# Photos
photo_urls = travelogue['photos'].dropna(how='any')
for url in photo_urls:
    parsed_url = urllib.parse.urlparse(url)
    if not parsed_url.scheme or not parsed_url.netloc or not parsed_url.path:
        raise ValueError('malformed photo URL:', url)
        
print('All travelogue data is valid!')

TypeError: tuple indices must be integers or slices, not str

# GeoJSON map output
A GeoJSON file is generated to allow for easy mapping of visited cities.

In [35]:
import json

def split_travellers(travellers):
    return travellers.split(" & ")

complete_travelogue = pandas.merge(travelogue, cities, on=["city", "country"])
geo_data = {}
for trip in complete_travelogue.itertuples():
    location = trip.city, trip.country
    if location not in geo_data:
        data = {"coordinates": [trip.longitude, trip.latitude],
                "travellers": set(split_travellers(trip.travellers)), "last visit": trip.departed}
        geo_data[location] = data
    else:
        data = geo_data[location]
        data["travellers"].update(split_travellers(trip.travellers))
        if data["last visit"] < trip.departed:
            data["last visit"] = trip.departed
geojson = {"type": "FeatureCollection", "features": []}
for location, data in geo_data.items():
    point = {"type": "Feature", "geometry": {"type": "Point", "coordinates": data["coordinates"]},
             "properties": {"city": ", ".join(location), "last visit": data["last visit"].strftime("%Y-%m-%d"),
                            "marker-color": colours[frozenset(data["travellers"])]}}
    geojson["features"].append(point)
with open("travelogue.geojson", "w", encoding="utf-8") as file:
    json.dump(geojson, file, sort_keys=True)

ValueError: can not merge DataFrame with instance of type <class 'tuple'>