In [1]:
import os
from os import path
from collections import defaultdict

import pandas as pd
import numpy as np
import scipy.ndimage.filters

import matplotlib
import matplotlib.colors
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import matplotlib.dates as mdates
from matplotlib.colors import LinearSegmentedColormap, rgb_to_hsv, hsv_to_rgb

import seaborn as sns
%matplotlib inline

from PIL import Image, ImageDraw
import folium

In [4]:
def get_kernel(kernel_size, blur=1/20, halo=.001):
    """
    Create an (n*2+1)x(n*2+1) numpy array.
    Output can be used as the kernel for convolution.
    """
    
    # generate x and y grids
    x, y = np.mgrid[0:kernel_size*2+1, 0:kernel_size*2+1]
    
    center = kernel_size + 1  # center pixel
    r = np.sqrt((x - center)**2 + (y - center)**2)  # distance from center
    
    # now compute the kernel. This function is a bit arbitrary. 
    kernel = np.exp(-r/kernel_size/blur) + (1 - r/r[center,0]).clip(0)*halo
    return kernel

In [5]:
def add_lines(image_array, xys, width=1, weights=None):
    """
    Add a set of lines (xys) to an existing image_array
    width: width of lines
    weights: [], optional list of multipliers for lines. 
    """
    
    for i, xy in enumerate(xys):  # loop over lines
        # create a new gray scale image
        image = Image.new("L",(image_array.shape[1], image_array.shape[0]))
        
        # draw the line
        ImageDraw.Draw(image).line(xy, 200, width=width)
        
        #convert to array
        new_image_array = np.asarray(image, dtype=np.uint8).astype(float)
        
        # apply weights if provided
        if weights is not None:
            new_image_array *= weights[i]
            
        # add to existing array
        image_array += new_image_array

    # convolve image
    new_image_array = scipy.ndimage.filters.convolve(image_array, get_kernel(width*4)) 
    return new_image_array

In [6]:
def to_image(array, hue=.62):
    """converts an array of floats to an array of RGB values using a colormap"""
    
    # apply saturation function
    image_data = np.log(array + 1)
    
    # create colormap, change these values to adjust to look of your plot
    saturation_values = [[0, 0], [1, .68], [.78, .87], [0, 1]]
    colors = [hsv_to_rgb([hue, x, y]) for x, y in saturation_values]
    cmap = LinearSegmentedColormap.from_list("my_colormap", colors)
    
    # apply colormap
    out = cmap(image_data/image_data.max())
    
    # convert to 8-bit unsigned integer
    out = (out*255).astype(np.uint8)
    return out

In [8]:
def get_nyc_bikedata(years, months):
    container = []
    for year in years:
        for month in months:
            fn = path.join('NYC-citibike/', f'{year}{month:02d}-citibike-tripdata.csv')
            print('loading', fn)
            if path.isfile(fn):
                container.append(pd.read_csv(fn, parse_dates=['starttime', 'stoptime']))
    df = pd.concat(container, axis=0)
    return df

nyc = get_nyc_bikedata([2019], [1, 2, 3, 4])
nyc.columns = nyc.columns.map(lambda x: x.replace(' ', '_'))

loading NYC-citibike/201901-citibike-tripdata.csv
loading NYC-citibike/201902-citibike-tripdata.csv
loading NYC-citibike/201903-citibike-tripdata.csv
loading NYC-citibike/201904-citibike-tripdata.csv


In [9]:
nyc['hour'] = nyc['starttime'].dt.hour

### Every trip is visualized by plotting a line in the map. Multiple trips will add to show traffic density.

In [35]:
min_lat = nyc['start_station_latitude'].min()
max_lat = nyc['start_station_latitude'].max()
min_lon = nyc['start_station_longitude'].min()
max_lon = nyc['start_station_longitude'].max()
def latlon_to_pixel(lat, lon, image_shape):
    # longitude to pixel conversion (fit data to image)
    delta_x = image_shape[1]/(max_lon-min_lon)
    
    # latitude to pixel conversion (maintain aspect ratio)
    delta_y = delta_x/np.cos(lat/360*np.pi*2)
    pixel_y = (max_lat-lat)*delta_y
    pixel_x = (lon-min_lon)*delta_x
    return (pixel_y,pixel_x)


def row_to_pixel(row,image_shape):
    """
    convert a row (1 trip) to pixel coordinates
    of start and end point
    """
    start_y, start_x = latlon_to_pixel(row["start_station_latitude"], 
                                       row["start_station_longitude"], image_shape)
    end_y, end_x = latlon_to_pixel(row["end_station_latitude"], 
                                   row["end_station_longitude"], image_shape)
    xy = (start_x, start_y, end_x, end_y)
    return xy

In [29]:
def get_image_data(reso, paths, min_count=0, max_count=None):
    image_data = np.zeros(reso)
    
    if max_count is None:
        max_count = paths['trip_count'].max()
    selector = (paths['trip_count']>= min_count) & (paths['trip_count']<= max_count)
    xys = [row_to_pixel(row, image_data.shape) for i, row in paths[selector].iterrows()]

    # draw the lines
    image_data = add_lines(image_data, xys, weights=paths['trip_count'], width = 1)
    return image_data

#### Adding an alpha layer and overlay our trip paths with the map 

In [34]:
def add_alpha(image_data):
    """
    Uses the Value in HSV as an alpha channel. 
    This creates an image that blends nicely with a black background.
    """
    
    # get hsv image
    hsv = rgb_to_hsv(image_data[:,:,:3].astype(float)/255)
    
    # create new image and set alpha channel
    new_image_data = np.zeros(image_data.shape)
    new_image_data[:,:,3] = hsv[:,:,2]
    
    # set value of hsv image to either 0 or 1.
    hsv[:,:,2] = np.where(hsv[:,:,2]>0, 1, 0)
    
    # combine alpha and new rgb
    new_image_data[:,:,:3] = hsv_to_rgb(hsv)
    return new_image_data

In [46]:
def plot_map(paths):
    folium_map = folium.Map(location=[40.738, -73.98],
                            zoom_start=13,
                            tiles="CartoDB dark_matter",
                            width='65%')

    thresholds = [5,15,25]

    for i,t in enumerate(thresholds):
        upper = thresholds[i+1] if i < len(thresholds) - 1 else None
        image_data = get_image_data(reso, paths, t, upper)
        name = "{} < Trip Counts < {}".format(t,"max" if upper is None else upper)

        map_overlay = add_alpha(to_image(image_data*10))
        delta_lat = (max_lon-min_lon)/map_overlay.shape[1]*map_overlay.shape[0]*np.cos(min_lat/360*2*np.pi)
        img = folium.raster_layers.ImageOverlay(map_overlay,
                                   bounds = [(max_lat-delta_lat,min_lon),(max_lat,max_lon)],
                                   name = name)

        img.add_to(folium_map)

    lc = folium.LayerControl().add_to(folium_map)

    return folium_map

# Unique Paths 

In [23]:
locations = nyc.groupby('start_station_id')[['start_station_latitude', 'start_station_longitude']].mean()

In [31]:
reso = (1800, 800)

In [1]:
def uniq_paths(df):
    df['path_id'] = [(id1, id2) for id1, id2 in zip(df['start_station_id'], df['end_station_id'])]
    paths = df.groupby('path_id').size()
    paths = paths[paths >= 5].to_frame()
    paths.columns = ['trip_count']
    paths['start_station_id'] = paths.index.map(lambda x: x[0])
    paths['end_station_id'] = paths.index.map(lambda x: x[1])
    paths = paths.query('start_station_id != end_station_id')
    locations.columns = ['start_station_latitude', 'start_station_longitude']
    paths = paths.join(locations, on='start_station_id')
    locations.columns = ['end_station_latitude', 'end_station_longitude']
    paths = paths.join(locations, on='end_station_id')
    return paths

In [21]:
am89 = nyc.query('starttime >= "2019-04-01 00:00:00"').between_time('08:00', '09:00')

In [24]:
am89_paths = uniq_paths(am89)

In [43]:
am89_map = plot_map(am89_paths)
am89_map

In [45]:
pm56 = nyc.query('starttime >= "2019-04-01 00:00:00"').between_time('17:00', '18:00')
pm56_paths = uniq_paths(pm56)
pm56_map = plot_map(pm56_paths)
pm56_map

In [50]:
am89_paths.sort_values('trip_count', ascending=False).head(3)

Unnamed: 0_level_0,trip_count,start_station_id,end_station_id,start_station_latitude,start_station_longitude,end_station_latitude,end_station_longitude
path_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"(432.0, 3263.0)",205,432.0,3263.0,40.726218,-73.983799,40.729515,-73.990753
"(3430.0, 3086.0)",147,3430.0,3086.0,40.719079,-73.942237,40.715143,-73.944507
"(3118.0, 3119.0)",115,3118.0,3119.0,40.73555,-73.95284,40.742327,-73.954117


In [51]:
pm56_paths.sort_values('trip_count', ascending=False).head(3)

Unnamed: 0_level_0,trip_count,start_station_id,end_station_id,start_station_latitude,start_station_longitude,end_station_latitude,end_station_longitude
path_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"(3664.0, 327.0)",130,3664.0,327.0,40.720195,-74.010301,40.715338,-74.016584
"(363.0, 3002.0)",129,363.0,3002.0,40.708347,-74.017134,40.711512,-74.015756
"(458.0, 3255.0)",115,458.0,3255.0,40.751396,-74.005226,40.750585,-73.994685


In [55]:
nyc.groupby('start_station_id')['start_station_name'].first()[432]

'E 7 St & Avenue A'