In [1]:
import numpy as np
import pandas as pd
import pickle
import pandas.io.sql as pdsql
import psycopg2
import timeit
import sys
from IPython.display import clear_output

from mpl_toolkits.basemap import Basemap

import matplotlib as mpl
import pylab as pl
import matplotlib.pyplot as plt
from matplotlib.patches import Polygon
from matplotlib.collections import LineCollection, PatchCollection

import imageio

#%matplotlib inline
axis_font = {'size':'30'}
mpl.rcParams['xtick.labelsize'] = 30
mpl.rcParams['ytick.labelsize'] = 30

In [3]:
df = pickle.load(open("./intermediate_files/dataframes_visualize_ct/data_raw_ct.p", "rb"))
#df = pickle.load(open("./intermediate_files/dataframes_visualize_ct/data_raw_ct_yellow.p", "rb"))
#df = pickle.load(open("./intermediate_files/dataframes_visualize_ct/data_raw_ct_green.p", "rb"))

conn = psycopg2.connect("dbname='nyc_taxi' user='postgres' host='localhost' password='organon'")
ct = pdsql.read_sql("SELECT gid, boro_name, shape_area, boro_ct201 FROM ct", conn, coerce_float=True, params=None)
conn.close()

In [4]:
# The index of the census tracks are boro_ct201 and gid
ct2gid = {ct.boro_ct201[i]: ct.gid[i] for i in range(len(ct.gid))}

In [5]:
# The hourly pickup per square mile for each CT averaged over 365 days of 2014
sqft2sqmi=3.58701e-8
density_hourly = np.empty((2166, 24), dtype='float64')
for gid in range(1, 2167):
    density_hourly[gid-1, :] = np.array([np.mean(df['count'][(df.pickup_hour==h) & (df.pickup_gid==gid)]) for h in range(24)])\
                            / ct.shape_area[ct.gid==gid].values[0] / sqft2sqmi
    
    #process.stdout
    clear_output()
    print("gid {0} computed".format(gid))
    sys.stdout.flush()

gid 2166 computed


In [6]:
# Plot the 24-hour density map
density_hourly_max = np.max(density_hourly)
west, south, east, north = -74.15, 40.50, -73.65, 40.95

for hour in range(24):
    m = Basemap(llcrnrlon=west, llcrnrlat=south, urcrnrlon=east, urcrnrlat=north,
             resolution='i', projection='merc', lat_0=(south+north)/2, lon_0=(west+east)/2)
    fig = plt.figure(figsize=(30,30))
    ax = fig.add_subplot(111)
    ax.set_title('h={0}'.format(hour), fontsize=35)

    m.readshapefile('./datasets/ct_2010/geo_export_670644cc-4c81-49e9-9d6d-fb574f649fff', 'nyc', linewidth=1, zorder=1)

    patches = []
    cm = mpl.cm.jet
    for info, shape in zip(m.nyc_info, m.nyc):
        patches.append(Polygon(np.array(shape), True,
                               facecolor=cm(density_hourly[ct2gid[info['boro_ct201']]-1, hour] / density_hourly_max)))
    
    p = PatchCollection(patches, match_original=True, alpha=0.6, linewidths=1., zorder=2)

    ax.add_collection(p)

    fig.subplots_adjust(right=0.8)
    cbar_ax = fig.add_axes([0.85, 0.12, 0.02, 0.78])
    a = np.array([[0.0,1.0]])

    pl.figure(figsize=(9, 1.5))
    img = pl.imshow(a, cmap=mpl.cm.jet, alpha=0.6)
    pl.gca().set_visible(False)
    cbar = fig.colorbar(img, ticks=np.arange(0.0, 1.01, 0.2), orientation='vertical', cax =cbar_ax)
    cbar.ax.set_yticklabels((np.arange(0.0, 1.01, 0.2) * density_hourly_max).astype('int')) 
    cbar.ax.set_ylabel('pickup/hr/mi$^2$', **axis_font)

    fig.savefig('density_{0}.jpg'.format(hour), dpi=50, bbox_inches='tight')
    plt.close()



In [7]:
# Generate gif file
images = []
for hour in range(24):
    images.append(imageio.imread('density_{0}.jpg'.format(hour)))
imageio.mimsave('./figures/density_movie_ct.gif', images, fps=1)
#imageio.mimsave('./figures/density_movie_ct_yellow.gif', images, fps=1)
#imageio.mimsave('./figures/density_movie_ct_green.gif', images, fps=1)