In [1]:
from IPython.display import Image
from IPython.core.display import HTML 
Image(url= 'https://i.imgur.com/xw1WPVA.png')

In [3]:
from IPython.display import Image
from IPython.core.display import HTML 
Image(url='https://i.imgur.com/44JDaMS.jpg')

## Traffic data animation: net departure data throughout the day.
#### The animation was put together by `ffmpeg` through mp4 or gif output.

In [22]:
import IPython.display
from IPython.display import Image

display(Image(url='https://i.imgur.com/UUIfkD0.gif'))

# Visualizing individual trips.

In [2]:
display(Image(url='https://i.imgur.com/aGnjuzP.gif'))
# from IPython.display import HTML

# HTML("""
# <div align="middle">
# <video width="80%" controls>
#       <source src="https://i.imgur.com/YxOYyPn.mp4" type="video/mp4">
# </video></div>""")

In [1]:
import os
from os import path
import time
import datetime
import io
import multiprocessing as mp
from collections import defaultdict

import pandas as pd
import numpy as np
import seaborn as sns
import scipy.ndimage.filters


import PIL
from PIL import Image, ImageDraw, ImageFont
import matplotlib
import matplotlib.colors
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import matplotlib.dates as mdates
from matplotlib.colors import LinearSegmentedColormap, rgb_to_hsv, hsv_to_rgb

import folium
from folium import plugins

import selenium

from map_utils import *
%matplotlib inline

In [2]:
def get_nyc_bikedata(years, months):
    container = []
    for year in years:
        for month in months:
            fn = path.join('NYC-citibike/', f'{year}{month:02d}-citibike-tripdata.csv')
            print('loading', fn)
            if path.isfile(fn):
                container.append(pd.read_csv(fn, parse_dates=['starttime', 'stoptime']))
    df = pd.concat(container, axis=0)
    return df

nyc = get_nyc_bikedata([2019], [1, 2, 3, 4])
nyc.columns = nyc.columns.map(lambda x: x.replace(' ', '_'))
nyc.set_index('starttime', inplace=True, drop=False)
# get station id, name, lat/lon
stations = nyc.filter(like='start_').groupby('start_station_id').first()

loading NYC-citibike/201901-citibike-tripdata.csv
loading NYC-citibike/201902-citibike-tripdata.csv
loading NYC-citibike/201903-citibike-tripdata.csv
loading NYC-citibike/201904-citibike-tripdata.csv


In [3]:
def get_trip_counts(df):    
    departure_counts = df.groupby('start_station_id').size()
    arrival_counts = df.groupby('end_station_id').size()
    trip_counts = pd.concat([departure_counts, arrival_counts], axis=1, join='inner')
    trip_counts.columns = ['departure_counts', 'arrival_counts']
    return trip_counts.join(stations)

In [4]:
date_query = 'starttime >= "2019-04-01 00:00:00"'
# time_query = ('09:00', '10:00')

# trip_data = nyc.query(date_query).between_time(*time_query)

# trip_counts = get_trip_counts(trip_data)

In [5]:
def plot_station_counts(trip_counts, zoom_start=13):
    # generate a new map
    folium_map = folium.Map(location=[40.738, -73.98],
                            zoom_start=zoom_start,
                            tiles='CartoDB dark_matter',
                            width='100%')

    # for each row in the data, add a cicle marker
    for index, row in trip_counts.iterrows():
        net_departures = (row['departure_counts']-row['arrival_counts'])
        # generate the popup message that is shown on click.
        popup_text = "Station ID: {}<br>{}<br> total departures: {}<br> total arrivals: {}<br> net departures: {}"
        popup_text = popup_text.format(row.name, row['start_station_name'],
                          row['departure_counts'],
                          row['arrival_counts'],
                          net_departures)
        
        # radius of circles
        radius = abs(net_departures) / 25
        
        if net_departures > 0:
            color='#E37222' # tangerine
        else:
            color='#0A8A9F' # teal
        
        # add marker to the map
        folium.CircleMarker(location=(row['start_station_latitude'],
                                      row['start_station_longitude']),
                            radius=radius,
                            color=color,
                            popup=popup_text,
                            fill=True).add_to(folium_map)
    return folium_map

#### Gif to visualize the trip counts throughout the day

In [6]:
def interpo_trip_counts(df1, df2, float_hour):
    """Interpolated the trip counts of a float_hour (eg 9.5) by trip_counts for 9 and 10"""
    def interpolate(df1, df2, w):
        df = df1 * (1 - w) + df2 * w
        return df.fillna(0)
    prev_hour = df1.drop(columns=['start_station_name'])
    next_hour = df2.drop(columns=['start_station_name'])
    mid = interpolate(prev_hour, next_hour, float_hour % 1)
    mid['start_station_name'] = stations.loc[mid.index]['start_station_name']
    return mid

In [7]:
def save_frame(i, frame, float_hour, save_path):
    # generate the png file as a byte array
    png = frame._to_png()

    #string to be added to the image to indicate the time-of-day.
    hour = int(float_hour)
    minutes = int((float_hour % 1) * 60)
    suffix = 'pm' if hour > 12 else 'am'
    
    # create a PIL image object
    image = Image.open(io.BytesIO(png))
    draw = ImageDraw.Draw(image)
    
    font = ImageFont.truetype("Roboto-Light.ttf", 30)
    # draw time of day text to bottom left
    draw.text((20,image.height - 50), 
              "time: {:0>2}:{:0>2}{}".format(hour, minutes, suffix),
              fill=(255, 255, 255), 
              font=font)
    
    # draw title to topright
    draw.text((image.width - 400,20), 
              "Net Departure vs Time of Day",
              fill=(255, 255, 255), 
              font=font)
    
    # write to a png file
    filename = os.path.join(save_path, "frame_{:0>5}.png".format(i))
    image.save(filename, "PNG")
    return image

In [8]:
def frame_worker(i, float_hour, save_path):
    data = interpo_trip_counts(trip_counts_by_hour[int(float_hour)],
                               trip_counts_by_hour[int(float_hour+1)], float_hour)
    frame = plot_station_counts(data, zoom_start=14)
    save_frame(i, frame, float_hour, save_path)

In [9]:
# cache trip_counts data for each hour throughout the day
trip_counts_by_hour = {}  # THIS IS THE DATA FOR THE VIDEO
for hour in np.arange(6, 24):
    time_query = (f'{hour:02d}:00', f'{hour+1:02d}')
    trip_counts_by_hour[hour] = get_trip_counts(nyc.query(date_query).between_time(*time_query))
    
dir_name = 'trip_counts_frames'
if not os.path.exists(dir_name):
    os.mkdir(dir_name)

In [None]:

#### parallelize the tasks with multiprocessing 
TASKS = [(i, float_hour, dir_name) for i, float_hour in enumerate(np.arange(6, 23, 1/6))]
with mp.Pool(processes=8) as p:
    p.starmap(frame_worker, TASKS)
    p.close()
    p.join()

#### Re-render dropouts

In [283]:
# dropouts = [66, 77, 101]
# float_hours = np.arange(6, 23, 1/6)
# for i in dropouts:
#     frame_worker(i, float_hours[i], save_path='trip_counts_frames')

#### generate output.mp4

In [None]:
! ffmpeg -r 10 -i trip_counts_frames/frame_%05d.png -c:v libx264 -vf fps=25 -crf 17 -pix_fmt yuv420p depFrames.mp4

#### generate GIF

In [None]:
! ffmpeg -y  -t 3 -i trip_counts_frames/frame_%05d.png -vf fps=10,scale=320:-1:flags=lanczos,palettegen palette.png
! ffmpeg -r 10  -i trip_counts_frames/frame_%05d.png -i palette.png -filter_complex "fps=10,scale=720:-1:flags=lanczos[x];[x][1:v]paletteuse" depFrames.gif

## Individual Trips


In [10]:
def path_progress(trips, frame_time):
    """return a series of numbers between 0 and 1 
    indicating the progress of each trip at the given time"""
    return (frame_time - trips['starttime']).dt.total_seconds() / trips['tripduration']

In [11]:
def current_position(trips, progress):
    """ Return Latitude and Longitude for the 'current position' of each trip.
    Paths are assumed to be straight lines between start and end. 
    """
    current_latitude = trips["start_station_latitude"] + \
    progress * (trips['end_station_latitude'] - trips['start_station_latitude'])
    current_longitude = trips["start_station_longitude"] + \
    progress * (trips['end_station_longitude'] - trips['start_station_longitude'])
    return current_latitude, current_longitude

In [12]:
def get_active_trips(frame_time, trips, image_shape, line_len=.1):
    """Reture pixel coordinates for trips that have started but 
    not yet completed for the given time.
    """
    active = (trips['starttime'] <= frame_time) & (trips['stoptime'] >= frame_time)
    active_trips = trips[active].copy()
    
    progress = path_progress(active_trips, frame_time)
    
    current_latitude, current_longitude = current_position(active_trips, progress)
    start_latitude, start_longitude = current_position(active_trips, np.clip(progress-line_len, 0, 1))
    
    start_y, start_x = latlon_to_pixel(start_latitude, 
                                          start_longitude, 
                                          image_shape,
                                          bounds)
    
    end_y, end_x = latlon_to_pixel(current_latitude, 
                                      current_longitude, 
                                      image_shape,
                                      bounds)
    xys = list(zip(start_x, start_y, end_x, end_y))
    weights = np.clip((1 - progress.values)*100, 0, 1)
    
    return xys, weights

In [14]:
def get_image_map(frame_time, trips):
    """Create the folium map for the given time"""
    image_data = np.zeros((900*2,400*2))
    
    # plot the current locations in the trip
    xys, weights = get_active_trips(frame_time, trips, image_data.shape, line_len=.01)
    image_data = add_lines(image_data, xys, weights=weights*20, width = 4)
    
    # plot the path of the trip
    xys, weights = get_active_trips(frame_time, trips, image_data.shape, line_len=1)
    image_data = add_lines(image_data, xys, weights=weights*10, width = 2)
    
    # generate and return the folium map.
    return create_image_map(image_data, bounds)

In [15]:
def path_frame_worker(inputs):
    i, frame_time, save_path, trips = inputs
    frame = get_image_map(frame_time, trips)
    png = frame._to_png()
    image = Image.open(io.BytesIO(png))
    draw = ImageDraw.Draw(image)
    font = ImageFont.truetype("Roboto-Light.ttf", 30)
    
    #draw datetime
    draw.text((40, image.height - 95), "Monday", fill=(255,255,255),font=font)
    draw.text((20,image.height - 50), 
              "Date&Time: {}".format(frame_time),
              fill=(255, 255, 255), font=font)
    
    # draw title
    draw.text((image.width - 450,20), 
              "Paths of Individual Bike Trips",
              fill=(255, 255, 255), font=font)

    image.save(os.path.join(save_path, "frame_{:0>5}.png".format(i)), "png")
    return image

In [16]:
bounds = get_min_max(nyc)
subset = nyc['2019-04-15']
dir_name = "path_frames"
if not os.path.exists(dir_name):
    os.mkdir(dir_name)

    
start_time = pd.to_datetime('2019-04-15 17:30:00')
frame_times = [start_time + datetime.timedelta(seconds=i*30) for i in range(120)]

In [17]:
with mp.Pool(processes=8) as pool:
    p_res = [pool.map_async(path_frame_worker, ((i, frame_time, dir_name, subset),))
            for i, frame_time in enumerate(frame_times)]
    pool.close()
    pool.join()
    
    for res in p_res:  # check exit status
            assert type(res.get()[0]) == PIL.PngImagePlugin.PngImageFile

In [None]:
! ffmpeg -r 10 -i path_frames/frame_%05d.png -c:v libx264 -vf fps=25 -crf 17 -pix_fmt yuv420p pathFrames.mp4

# Gif

In [None]:
! ffmpeg -y  -t 3 -i path_frames/frame_%05d.png -vf fps=10,scale=320:-1:flags=lanczos,palettegen path_palette.png
! ffmpeg -r 10  -i path_frames/frame_%05d.png -i path_palette.png -filter_complex "fps=10,scale=720:-1:flags=lanczos[x];[x][1:v]paletteuse" pathFrames.gif