In [None]:
import numpy as np
import pandas as pd
import pickle
import pandas.io.sql as pdsql
import psycopg2
import timeit
import sys
from IPython.display import clear_output

from mpl_toolkits.basemap import Basemap

import matplotlib as mpl
import pylab as pl
import matplotlib.pyplot as plt
from matplotlib.patches import Polygon
from matplotlib.collections import LineCollection, PatchCollection

import imageio

#%matplotlib inline
axis_font = {'size':'30'}
mpl.rcParams['xtick.labelsize'] = 30
mpl.rcParams['ytick.labelsize'] = 30

In [None]:
df = pickle.load(open("./intermediate_files/dataframes_visualize_nta/data_raw.p", "rb"))

conn = psycopg2.connect("dbname='nyc_taxi' user='postgres' host='localhost' password='organon'")
nta = pdsql.read_sql("SELECT gid, shape_area, ntacode, ntaname FROM nta", conn, coerce_float=True, params=None)
day_info = pdsql.read_sql("SELECT doy, holiday FROM day_info", conn, coerce_float=True, params=None)
conn.close()

In [None]:
ntacode2gid = {nta.ntacode[i]: nta.gid[i] for i in range(len(nta.gid))}

In [None]:
doys_test = [20, 100, 313] # The DOYs to test
gids_test = [26, 27, 37, 52, 53, 54, 59, 71, 72, 88, 89, 90, 103, 114, 115, 118, 128,
            129, 135, 136, 153, 154, 181, 182, 183, 184] # The NTAs to test

# The hourly pickup per square mile for each NTA averaged over 365 days of 2014
sqft2sqmi=3.58701e-8
density_hourly = np.empty((len(gids_test), 24, len(doys_test)), dtype='float64') # The true densities
for i, gid in enumerate(gids_test):
    for j, doy in enumerate(doys_test):
        for h in range(24):
            density_hourly[i, h, j] = df['count'][(df.pickup_hour==h) & (df.pickup_gid==gid) & (df.pickup_doy==doy)]\
                                    / nta.shape_area[nta.gid==gid].values[0] / sqft2sqmi

In [None]:
counts_pred = {doy: np.loadtxt('./intermediate_files/dataframes_test/data_excerpt_{0}.txt'.format(doy)) for doy in doys_test}
density_hourly_pred = np.empty((len(gids_test), 24, len(doys_test)), dtype='float64') # The predicted densities
for i, gid in enumerate(gids_test):
    for j, doy in enumerate(doys_test):
        for h in range(24):
            density_hourly_pred[i, h, j] = counts_pred[doy][(gid-1)*24+h]\
                                    / nta.shape_area[nta.gid==gid].values[0] / sqft2sqmi

In [None]:
# Plot the 24-hour density map
west, south, east, north = -74.15, 40.50, -73.65, 40.95
gid2idx = {gid: i for i, gid in enumerate(gids_test)}

for j, doy in enumerate(doys_test):
    density_hourly_test = density_hourly[:, :, j]
    density_hourly_pred_test = density_hourly_pred[:, :, j]
    density_hourly_max = max([np.max(density_hourly_test), np.max(density_hourly_pred_test)])
    
    for hour in range(24):
        m = Basemap(llcrnrlon=west, llcrnrlat=south, urcrnrlon=east, urcrnrlat=north,
                 resolution='i', projection='merc', lat_0=(south+north)/2, lon_0=(west+east)/2)
        fig = plt.figure(figsize=(30,30))
        ax = fig.add_subplot(111)
        ax.set_title('h={0}'.format(hour), fontsize=35)

        m.readshapefile('./datasets/nta_2010/geo_export_d2bec86b-a3e8-4047-b785-24286f7718ab', 'nyc', linewidth=1, zorder=1)

        patches = []
        cm = mpl.cm.jet
        for info, shape in zip(m.nyc_info, m.nyc):
            gid = ntacode2gid[info['ntacode']]
            if gid in gids_test:
                patches.append(Polygon(np.array(shape), True,
                                       facecolor=cm(density_hourly_test[gid2idx[gid], hour] / density_hourly_max)))

        p = PatchCollection(patches, match_original=True, alpha=0.6, linewidths=1., zorder=2)

        ax.add_collection(p)

        fig.subplots_adjust(right=0.8)
        cbar_ax = fig.add_axes([0.85, 0.12, 0.02, 0.78])
        a = np.array([[0.0,1.0]])

        pl.figure(figsize=(9, 1.5))
        img = pl.imshow(a, cmap=mpl.cm.jet, alpha=0.6)
        pl.gca().set_visible(False)
        cbar = fig.colorbar(img, ticks=np.arange(0.0, 1.01, 0.2), orientation='vertical', cax =cbar_ax)
        cbar.ax.set_yticklabels((np.arange(0.0, 1.01, 0.2) * density_hourly_max).astype('int')) 
        cbar.ax.set_ylabel('pickup/hr/mi$^2$', **axis_font)

        fig.savefig('density_true_{0}_{1}.jpg'.format(hour, doy), dpi=50, bbox_inches='tight')
        plt.close()
        
    for hour in range(24):
        m = Basemap(llcrnrlon=west, llcrnrlat=south, urcrnrlon=east, urcrnrlat=north,
                 resolution='i', projection='merc', lat_0=(south+north)/2, lon_0=(west+east)/2)
        fig = plt.figure(figsize=(30,30))
        ax = fig.add_subplot(111)
        ax.set_title('h={0}'.format(hour), fontsize=35)

        m.readshapefile('./datasets/nta_2010/geo_export_d2bec86b-a3e8-4047-b785-24286f7718ab', 'nyc', linewidth=1, zorder=1)

        patches = []
        cm = mpl.cm.jet
        for info, shape in zip(m.nyc_info, m.nyc):
            gid = ntacode2gid[info['ntacode']]
            if gid in gids_test:
                patches.append(Polygon(np.array(shape), True,
                                       facecolor=cm(density_hourly_pred_test[gid2idx[gid], hour] / density_hourly_max)))

        p = PatchCollection(patches, match_original=True, alpha=0.6, linewidths=1., zorder=2)

        ax.add_collection(p)

        fig.subplots_adjust(right=0.8)
        cbar_ax = fig.add_axes([0.85, 0.12, 0.02, 0.78])
        a = np.array([[0.0,1.0]])

        pl.figure(figsize=(9, 1.5))
        img = pl.imshow(a, cmap=mpl.cm.jet, alpha=0.6)
        pl.gca().set_visible(False)
        cbar = fig.colorbar(img, ticks=np.arange(0.0, 1.01, 0.2), orientation='vertical', cax =cbar_ax)
        cbar.ax.set_yticklabels((np.arange(0.0, 1.01, 0.2) * density_hourly_max).astype('int')) 
        cbar.ax.set_ylabel('pickup/hr/mi$^2$', **axis_font)

        fig.savefig('density_pred_{0}_{1}.jpg'.format(hour, doy), dpi=50, bbox_inches='tight')
        plt.close()

In [None]:
# Generate gif file
for j, doy in enumerate(doys_test):
    images = []
    images_pred = []
    for hour in range(24):
        images.append(imageio.imread('density_true_{0}_{1}.jpg'.format(hour, doy)))
        imageio.mimsave('figures/density_by_nta_pred/density_true_doy={0}.gif'.format(doy), images, fps=1)
        
        images_pred.append(imageio.imread('density_pred_{0}_{1}.jpg'.format(hour, doy)))
        imageio.mimsave('figures/density_by_nta_pred/density_pred_doy={0}.gif'.format(doy), images_pred, fps=1)