In [None]:
import pandas
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
df = pandas.read_csv("Saturday, April 02, 2016") 
df.columns = ['C/A','UNIT','SCP','STATION','LINENAME','DIVISION','DATE','TIME','DESC','IN','OUT']
df = df[['UNIT', 'SCP', 'DATE', 'TIME', "IN", "OUT"]]
df.head(5)

In [None]:
# Create a coordinates look up table and add a column

geocode = pandas.read_csv('geocoded.csv', header=None)
geocode = geocode.drop_duplicates(0)
geocode = geocode[[0,5,6]]
geocode.columns = ['UNIT', 'LAT', 'LON']
geocode_mapping = {row.values[0]:[row[1], row[2]] for index,row in geocode.iterrows()}

def map(unit):
    try:
        return geocode_mapping[unit]
    except:
        return np.nan

df['COORD'] = df['UNIT'].apply(map)

In [None]:
df.head(5)

In [None]:
#Add the times as datetime objects

import datetime
df["DATETIME"] = df["DATE"]+ ' ' + df["TIME"]
df["DATETIME"] = pandas.to_datetime(df["DATETIME"])

In [None]:
def classify_day(time):
    if time.isoweekday() in range(1,6):
        return "Weekday"
    else:
        return "Weekend"

df["DAY"] = df['DAY'].apply(classify_day)

In [None]:
def classify_time(time):
    if 5 <= time.hour <= 9:
        return "Morning"
    elif 17 < time.hour < 22:
        return "Evening"
    else:
        return None
df["M_E"] = df['DATETIME'].apply(classify_time)

In [None]:
df.head()

In [None]:
masterDF = pandas.DataFrame(columns=['UNIT', 'IN', 'OUT', 'COORDS'])
DAY = 'Weekday'
M_E = "Evening"
for unit, group in df.groupby(['UNIT']):
    
    # Filter for weekday mornings
    day = group[group.DAY == DAY]
    timeOfDay = day[day.M_E == M_E]
    
    IN = timeOfDay.IN.values; OUT = timeOfDay.OUT.values
    IN = IN[1:] - IN[:-1] # convert from cumulative
    OUT = OUT[1:] - OUT[:-1]
    mask = (IN >= 0) & (IN < 1e4) & (OUT >= 0) & (OUT < 1e4)
    masterDF.loc[len(masterDF)] = (unit, IN[mask].sum(), 
                                   OUT[mask].sum(), group['COORD'].iloc[0])

    print "%s STATION FINISHED"%unit

masterDF.to_csv("SaturdayApril022016-%s-%s"%(DAY, M_E))
    


In [None]:
%matplotlib inline
from mpl_toolkits.basemap import Basemap
plt.figure(figsize=(12,12))

for i, masterDF in enumerate([pandas.read_csv("SaturdayApril022016-Weekday-Mornings"),
                pandas.read_csv("SaturdayApril022016-Weekday-Evening")]):
    
    plt.subplot(2,1,i)
    my_map = Basemap(projection='merc', resolution = 'l',
        llcrnrlon=-74.1, llcrnrlat=40.7,
        urcrnrlon=-73.9, urcrnrlat=40.8)

    #    llcrnrlon=-74.2, llcrnrlat=40.5,
    #    urcrnrlon=-73.7, urcrnrlat=41)

    my_map.drawcoastlines()
    my_map.drawcountries()
    #my_map.fillcontinents(color='coral')
    my_map.drawmapboundary()

    #my_map.drawmeridians(np.arange(0, 360, 30))
    #my_map.drawparallels(np.arange(-90, 90, 30))

    masterDF = masterDF.dropna(subset=['COORDS', 'IN', 'OUT'])

    mycolmap = plt.get_cmap("seismic")
    colors = masterDF['IN'] - masterDF['OUT']
    sizes = 60*(masterDF['IN'] + masterDF['OUT']) / (masterDF['IN'] + masterDF['OUT']).max()
    coords = list(masterDF['COORDS'].astype(list))
    lon = [coord[0] for coord in coords]
    lat = [coord[1] for coord in coords]
    x,y = my_map(lat, lon)
    plt.scatter(x, y, c=colors, cmap=mycolmap, s=sizes)
    
plt.colorbar()
plt.show()

