In [11]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.patches as mpatches
from mpl_toolkits.basemap import Basemap
import zipcode
%matplotlib inline 

In [12]:
master = pd.DataFrame.from_csv("nps_all.csv")
master.head(7)

Unnamed: 0,Zip,Best Season,2nd Best Season,3rd Best Season,4th Best Season,Off Season
Abraham Lincoln Birthplace NHP,42748,,,,,
Acadia NP,4660,,,,,
Adams NHP,2169,,,,,
African Burial Ground NM,10007,,,,,
Agate Fossil Beds NM,69346,,,,,
Alibates Flint Quarries NM,79036,,,,,
Allegheny Portage Railroad NHS,16641,,,,,


## This csv just contains the names and zipcodes of all the national parks. The zip codes need to be formatted to zip code format, as the leading zeroes have fallen off. The names of all the national parks were pulled from https://irma.nps.gov/Stats/SSRSReports/National%20Reports/Annual%20Recreation%20Visitation%20By%20Park%20(1979%20-%20Last%20Calendar%20Year). Zipcodes are sourced from google. 

In [13]:
#this function converts month numbers to month strings
def num_month(month):
    months = ['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL', 'AUG', 'SEP', 'OCT', 'NOV', 'DEC']
    return months.index(month) + 1

In [14]:
files = master.index

#joining up each park's data with the master table
for i in range(0, len(files)):
    #getting the file
    part = pd.DataFrame.from_csv("tables/" + files[i] + ".csv")
    part = part.drop('Textbox5', 1)
    avgs = pd.DataFrame(part.ix[0].astype(float))
    #ranking the averages
    avgs = avgs.sort(columns='Average', ascending=False)
    ranking = avgs.index
    best = num_month(ranking[0])
    best2 = num_month(ranking[1])
    best3 = num_month(ranking[2])
    best4 = num_month(ranking[3])
    worst = num_month(ranking[-1])
    #putting the best, 2nd best, so on months into the master 
    master.ix[i,1] = best
    master.ix[i,2] = best2
    master.ix[i,3] = best3
    master.ix[i,4] = best4
    master.ix[i,5] = worst
    
master.head()
    



Unnamed: 0,Zip,Best Season,2nd Best Season,3rd Best Season,4th Best Season,Off Season
Abraham Lincoln Birthplace NHP,42748,7,6,8,5,1
Acadia NP,4660,8,7,9,6,12
Adams NHP,2169,7,8,6,9,1
African Burial Ground NM,10007,7,5,6,4,1
Agate Fossil Beds NM,69346,7,6,8,9,2


## All of the visitor information files are from https://irma.nps.gov/Stats/SSRSReports/Park%20Specific%20Reports/Recreation%20Visitors%20By%20Month%20(1979%20-%20Last%20Calendar%20Year)?Park=ACAD. 
## They were reformatted using Excel VBA to include averages. To find the "best season", and "2nd best season" and so on, I averaged each month from 2000 to 2015 for every national park. The month with the highest average is marked the "best season."

In [15]:
#this function translate the best month to a corresponding color
def color_translate(color_num):
    colors = ['red','coral','orange','gold','yellow','greenyellow','green','teal','blue','slateblue','blueviolet','darkmagenta']
    return colors[color_num - 1]

color_translate(int(master.ix[0,1]))

'green'

In [6]:
#using the above function to assign a color to each park
master['Best color'] = master['Best Season'].astype(int)
master['Best color'] = master['Best color'].map(color_translate)
master.head()

Unnamed: 0,Zip,Best Season,2nd Best Season,3rd Best Season,4th Best Season,Off Season,Best color
Abraham Lincoln Birthplace NHP,42748,7,6,8,5,1,green
Acadia NP,4660,8,7,9,6,12,teal
Adams NHP,2169,7,8,6,9,1,green
African Burial Ground NM,10007,7,5,6,4,1,green
Agate Fossil Beds NM,69346,7,6,8,9,2,green


## Now each park has a color assigned to it that corresponds to its most popular season. I'll use this later when we plot the parks.

In [7]:
#adding in leading zeroes to zip codes and formatting them as strings
master['Zip'] = master['Zip'].astype(str)
master['Zip'] = master['Zip'].map(lambda x: x.rjust(5, "0"))

master.head()

Unnamed: 0,Zip,Best Season,2nd Best Season,3rd Best Season,4th Best Season,Off Season,Best color
Abraham Lincoln Birthplace NHP,42748,7,6,8,5,1,green
Acadia NP,04660,8,7,9,6,12,teal
Adams NHP,02169,7,8,6,9,1,green
African Burial Ground NM,10007,7,5,6,4,1,green
Agate Fossil Beds NM,69346,7,6,8,9,2,green
Alibates Flint Quarries NM,79036,5,10,6,9,1,yellow
Allegheny Portage Railroad NHS,16641,7,8,6,5,2,green
Amistad NRA,78840,3,6,4,7,12,orange
Andersonville NHS,31711,5,4,3,6,1,yellow
Andrew Johnson NHS,37743,5,6,7,3,2,yellow


In [8]:
#this function returns the latitude of a zipcode object
def make_lat(zobject):
    if zobject is not None:
        return zobject.lat
    else:
        return 0.0

#this function returns the longitute of a zipcode object
def make_lon(zobject):
    if zobject is not None:
        return zobject.lon
    else:
        return 0.0


Unnamed: 0,Zip,Best Season,2nd Best Season,3rd Best Season,4th Best Season,Off Season,Best color,ZObjects,Lat,Lon
Abraham Lincoln Birthplace NHP,42748,7,6,8,5,1,green,<Zip: 42748>,37.56,-85.73
Acadia NP,04660,8,7,9,6,12,teal,<Zip: 04660>,44.31,-68.36
Adams NHP,02169,7,8,6,9,1,green,<Zip: 02169>,42.26,-71.00
African Burial Ground NM,10007,7,5,6,4,1,green,<Zip: 10007>,40.71,-73.99
Agate Fossil Beds NM,69346,7,6,8,9,2,green,<Zip: 69346>,42.68,-103.88
Alibates Flint Quarries NM,79036,5,10,6,9,1,yellow,<Zip: 79036>,35.64,-101.59
Allegheny Portage Railroad NHS,16641,7,8,6,5,2,green,<Zip: 16641>,40.48,-78.55
Amistad NRA,78840,3,6,4,7,12,orange,<Zip: 78840>,29.37,-100.89
Andersonville NHS,31711,5,4,3,6,1,yellow,<Zip: 31711>,32.19,-84.14
Andrew Johnson NHS,37743,5,6,7,3,2,yellow,<Zip: 37743>,36.16,-82.81


In [None]:
#using the above functions to put latitudes and longitudes into the master
master['ZObjects'] = master['Zip'].map(lambda x: zipcode.isequal(x))
master['Lat'] = master['ZObjects'].map(lambda x: make_lat(x))
master['Lon'] = master['ZObjects'].map(lambda x: make_lon(x))
master.head()

## Almost all of the parks have latitude and longitude data now, and I'll use that when I plot the parks. Some zipcodes however are not available in the zipcode library, so I'll save this as it is and find the zipcodes on my own. 

In [9]:
#checking to see which zipcodes are not available in the zipcode library. 
master[(master.Lat == 0.0) & (master.Lon == 0.0)]

Unnamed: 0,Zip,Best Season,2nd Best Season,3rd Best Season,4th Best Season,Off Season,Best color,ZObjects,Lat,Lon
Antietam NB,21782,7,6,9,8,1,green,,0,0
Arlington House The RE Lee MEM,22211,5,4,6,7,1,yellow,,0,0
Assateague Island NS,21811,7,8,6,9,12,green,,0,0
Cape Hatteras NS,27915,7,8,6,9,2,green,,0,0
Cape Lookout NS,28531,7,8,6,9,2,green,,0,0
Carl Sandburg Home NHS,28731,10,7,5,4,2,slateblue,,0,0
Catoctin Mountain Park,21788,8,6,10,5,1,teal,,0,0
Charles Pinckney NHS,29464,4,3,9,7,12,gold,,0,0
Chattahoochee River NRA,30350,6,7,5,8,1,greenyellow,,0,0
Chesapeake & Ohio Canal NHP,20854,8,7,5,6,2,teal,,0,0


In [10]:
master.to_csv("Master.csv")