In [1]:
import sqlite3
from PIL import Image, ImageDraw, ImageFont
import pandas as pd
import matplotlib.pyplot as plt
import folium
from collections import Counter
import numpy as np
import os
os.environ["PATH"] += os.pathsep + "."
from folium import plugins
import scipy.ndimage.filters
from selenium import webdriver
from folium.utilities import write_png
import io
import time

In [2]:
conn = sqlite3.connect("SFdatabase.sqlite")
trip_df_orig = pd.read_sql_query("SELECT * FROM trip", conn)
station_df_orig = pd.read_sql_query("SELECT * FROM station", conn)
curr = conn.cursor()

In [3]:
trip_df = pd.concat([pd.read_sql_query("SELECT station.lat AS start_lat, station.long AS start_long, trip.* FROM station JOIN trip ON trip.start_station_id=station.id",conn),
                    pd.read_sql_query("SELECT station.lat AS end_lat, station.long AS end_long FROM station JOIN trip ON trip.end_station_id=station.id",conn)],axis=1)

In [17]:
trip_df["start_date"] = pd.to_datetime(trip_df["start_date"])
trip_df["end_date"] = pd.to_datetime(trip_df["end_date"])
trip_df["start_hour"] = trip_df["start_date"].map(lambda x: x.hour)
trip_df["end_hour"] = trip_df["end_date"].map(lambda x: x.hour)

In [5]:
trip_df.to_csv("main_data_set.csv")

In [6]:
trip_df = pd.read_csv("main_data_set.csv")

  interactivity=interactivity, compiler=compiler, result=result)


In [7]:
start_end = trip_df.drop(['duration','id','bike_id','subscription_type','zip_code'],axis=1)

In [8]:
start = start_end.set_index('start_station_id').drop(start_end.loc[:,'end_date':'end_long'],axis=1).drop(['end_hour','start_date'],axis=1)
end = start_end.set_index('end_station_id').drop(start_end.loc[:,'start_lat':'start_station_id'],axis=1).drop(['start_hour','end_date'],axis=1)

In [10]:
new_start_df=pd.DataFrame(columns=range(24))
new_end_df=pd.DataFrame(columns=range(24))
for id_num in station_df_orig['id']:
    new_start_df.loc[id_num]=start[start.index==id_num].groupby('start_hour').count().iloc[:,0]
    new_end_df.loc[id_num]=end[end.index==id_num].groupby('end_hour').count().iloc[:,0]
    
new_start_df.fillna(0,inplace=True)
new_end_df.fillna(0,inplace=True)
hours_df=new_start_df.subtract(new_end_df,fill_value=0)

In [12]:
final = pd.concat([station_df_orig.set_index('id'),hours_df],axis=1)

In [13]:
def plot_station_counts(final,hour):
    # generate a new map
    folium_map = folium.Map(location=[37.788324, -122.4],
                            zoom_start=13.5,
                            tiles="cartodbpositron",
                            width='100%')

    # for each row in the data, add a cicle marker
   
    for index, row in final.iterrows():
        # calculate net departures
        net_departure = row[int(hour)]
       # print(net_departure)

        # generate the popup message that is shown on click.
        popup_text = "{}<br> net departures: {}"
        popup_text = popup_text.format(row['name'],net_departure)

        # radius of circles
        radius = net_departure/80

        # choose the color of the marker
        if net_departure>0:
            #color="#FFCE00" # orange
            color = "#DD0000" # red
            #color="#E37222" # tangerine
        else:
            #color="#0375B4" # blue
            #color="#FFCE00" # yellow            
            #color="#0A8A9F" # teal
            #color="#007849" # green
            color = "#00CFE2" # turquoise


        # add marker to the map
        folium.CircleMarker(location=(row["lat"],
                                      row["long"]),
                            radius=radius,
                            color=color,
                            popup=popup_text,
                            fill=True).add_to(folium_map)
            
    return folium_map

In [20]:
plot_station_counts(final,16)

In [15]:
def interpolate(df1, df2, x):
    """return a weighted average of two dataframes"""
    df = df1 * (1 - x) + df2 * x
    return df.replace(np.nan, 0)
  

def get_trip_counts_by_minute(hour, data):
    """get an interpolated dataframe for any time, based
    on hourly data"""
    
    columns = ["name","lat","long"]
    col1 = columns+[int(hour)]
    col2 = columns+[int(hour)+1]
    df1 = data[col1]
    df2 = data[col2]
    #print (type(data[7]))
    df = interpolate(df1[int(hour)], df2[int(hour)+1],hour % 1)
    
    df = pd.DataFrame(df)
    df["name"] = df1["name"]
    df['lat']=df1['lat']
    df['long']=df1['long']
    df.columns = [int(hour),'name','lat','long']
    #print (df)
    return df

In [16]:
def go_arrivals_frame(i, hour_of_day, save_path):
    
    # create the map object
    data = get_trip_counts_by_minute(hour_of_day, final)
    my_frame = plot_station_counts(data, hour_of_day)
    
    delay=2.5
 
    #Save the map as an HTML file
    fn='testmap.html'
    tmpurl='file://{path}/{mapfile}'.format(path=os.getcwd(),mapfile=fn)
    my_frame.save(fn)
 
    #Open a browser window...
    browser = webdriver.Chrome("C://Users//Daud//AppData//Local//Google//Chrome//chromedriver.exe")
    #..that displays the map...
    browser.get(tmpurl)
    #Give the map tiles some time to load
    time.sleep(delay)
    #Grab the screenshot
    browser.save_screenshot('map.png')
    #Close the browser
    browser.quit()
    
    #  now add a caption to the image to indicate the time-of-day.
    hour = int(hour_of_day)
    minutes = int((hour_of_day % 1) * 60)
    
    # create a PIL image object
    image = Image.open('map.png')
    draw = ImageDraw.ImageDraw(image)
    
    # load a font
    font = ImageFont.truetype("arial.ttf", 25)
    
    # draw time of day text
    draw.text((20,image.height - 50), 
              "time: {:0>2}:{:0>2}h".format(hour, minutes),
              fill="black", 
              font=font)
    
    # draw title
    draw.text((image.width - 400,20), 
              "Net Arrivals vs Time of Day",
              fill="black", 
              font=font)
    
    # write to a png file
    filename = os.path.join(save_path, "frame_{:0>5}.png".format(i))
    image.save(filename, "PNG")
    return image

def get_counts_per_hr(hour,data):
    start_stat = start[start['start_hour']==hour]
    end_stat = end[end['end_hour']==hour]
    start_counts = start_stat.groupby("start_station_id").count()
    end_counts = end_stat.groupby("end_station_id").count()
    return start_counts,end_counts
    
    
s_counts,e_counts = get_counts_per_hr(10,final)

for i, hour in enumerate(np.arange(6, 23, .2)):
    go_arrivals_frame(i, hour, save_path="frames")

folium_map = folium.Map(location=[37.774814,-122.418954],
                        zoom_start=13,
                        tiles="cartodbpositron")
marker = folium.CircleMarker(location=[37.774814,-122.418954])
marker.add_to(folium_map)