# Entradas y Salidas de bicicletas a estaciones

Se pretende visualizar como es el comportamiento de las estaciones según la cantidad de bicicletas que se enganchan y desenganchan en cada una, teniendo en cuenta día de la semana y horario.

In [None]:
import json
import pandas as pd
from PIL import Image, ImageDraw
import geopandas as gpd
from shapely.geometry import Point
import geopandas as gpd
import folium
import psycopg2
conn = psycopg2.connect("host='postgre-sqltest.cpdeokpzufj1.us-west-2.rds.amazonaws.com' port=5432 dbname='postgres' user=xseed password=LosTilos114")
crs = {'init': 'epsg:4326'}
import numpy as np
import scipy.ndimage.filters
import matplotlib.pyplot as plt
import matplotlib.colors
from matplotlib.colors import LinearSegmentedColormap, rgb_to_hsv, hsv_to_rgb

Consulta sobre la base de datos, NO teniendo en cuenta los movimientos realizados por los trabajadores de Bicimad. Esos se analizarán aparte.

In [None]:
fecha_origen = '2018-06-21 08:00:00'
fecha_destino = '2018-06-28 23:00:00'

query = '''
select user_type_code, travel_time, idunplug_station,age_range_code,idplug_station,unplug_hourtime,
s.code_station as code_station_departure, s.latitude as latitude_departure, s.longitude as longitude_departure, s.name as name_departure,
s2.code_station as code_station_arrival, s2.latitude as latitude_arrival, s2.longitude as longitude_arrival, s2.name as name_arrival
from bike_movement bm
join station s on bm.idunplug_station = s.id
join station s2 on bm.idplug_station = s2.id
where bm.unplug_hourtime >= '{0}'
and bm.unplug_hourtime  <= '{1}' and bm.user_type_code <> 3
'''

query = query.format(fecha_origen, fecha_destino)

data = pd.read_sql(query, conn)

Transformacion de columnas

In [None]:
#Calcular hora de enganche de la bicicleta en base al unplug hour time y el travel time
data['plug_hourtime'] = pd.to_datetime(data.unplug_hourtime) + pd.to_timedelta(data.travel_time, unit='s')
data["hour"] = data.unplug_hourtime.map(lambda x: x.hour)
data["hour_arrival"] = data.plug_hourtime.map(lambda x: x.hour)

En este primer ejemplo se quiere visualizar como es la migración de las bicicletas, si hay alguna relación entre la hora del día y las salidas o arribos de las bicis a las estaciones

Selecciono la hora del día que quiero visualizar

In [None]:
def get_trip_counts_by_hour(selected_hour):

    locations = data.groupby("code_station_departure").first()
    # and select only the tree columns we are interested in
    locations = locations.loc[:, ["latitude_departure",
                              "longitude_departure",
                              "name_departure"]]
    
    subset = data[data["hour"]==selected_hour]
    subset_arrival = data[data["hour_arrival"]==selected_hour]
    
    departure_counts =  subset.groupby("code_station_departure").count()
    # select one column
    departure_counts = departure_counts.iloc[:,[0]]
    # and rename that column
    departure_counts.columns= ["Departure Count"]

    arrival_counts =  subset_arrival.groupby("code_station_arrival").count()
    # select one column
    arrival_counts = arrival_counts.iloc[:,[0]]
    # and rename that column
    arrival_counts.columns= ["Arrival Count"]
    trip_counts = departure_counts.join(locations).join(arrival_counts)
    
    return trip_counts
    

Asignamos un color diferente según el balance entre salidas y arribos a la estación en esa hora. Si hay mas salidas que arribos el color es naranja, de lo contrario se utiliza azul. El radio depende del número del balance, cuanto mayor el número mayor el radio.

In [None]:
def plot_station_counts(trip_counts):

    folium_map = folium.Map(location=[40.4, -3.7], zoom_start=13,
                        tiles="CartoDB dark_matter")
    for index, row in trip_counts.iterrows():
        
        net_departures = (row["Departure Count"]-row["Arrival Count"])
        popup_text = "{}<br> Total Salidas: {}<br> Total Llegadas: {}<br> Balance: {}"
        popup_text = popup_text.format(row["name_departure"],
                              row["Departure Count"],
                              row["Arrival Count"],
                              net_departures)
        
        radius = abs(net_departures/20)
        if net_departures>0:
            color="#E37222" # tangerine
        else:
            color="#0A8A9F" # teal

        folium.CircleMarker(location=(row["latitude_departure"],
                                      row["longitude_departure"]),
                            radius=radius,
                            color=color,
                            popup=popup_text,
                            fill=True).add_to(folium_map)
        
    return folium_map

In [None]:
# Ver comportamiento a las 9 AM
trip_counts = get_trip_counts_by_hour(9)
plot_station_counts(trip_counts)

In [None]:
trip_counts = get_trip_counts_by_hour(20)
plot_station_counts(trip_counts)

Se puede visualizar como hay regiones que varian su balance entradas/salidas según la hora del día. Muchas de las que tienen mayor cantidad de partidas durante la mañana son de las que tienen más arribos al finalizar el día. Las del centro (que son las estaciones más utilizadas) mantienen una demanda constante en cualquier horario del día.

# Visualización animada según hora del día

In [None]:


    locations = data.groupby(["hour","code_station_departure"]).first()
    # and select only the tree columns we are interested in
    locations = locations.loc[:, ["latitude_departure",
                              "longitude_departure",
                              "name_departure"]]
    
    subset = data
    subset_arrival = data
    
    departure_counts =  subset.groupby(["hour","code_station_departure"]).count()
    # select one column
    departure_counts = departure_counts.iloc[:,[0]]
    # and rename that column
    departure_counts.columns= ["Departure Count"]

    arrival_counts =  subset_arrival.groupby(["hour","code_station_arrival"]).count()
    # select one column
    arrival_counts = arrival_counts.iloc[:,[0]]
    # and rename that column
    arrival_counts.columns= ["Arrival Count"]
    trip_counts_by_hour = departure_counts.join(locations, on=["hour","code_station_departure"]).join(arrival_counts, on = ["hour","code_station_departure"])
    
    trip_counts_by_hour = trip_counts_by_hour.reset_index()
    trip_counts_by_hour


In [None]:
import os
os.environ["PATH"] += os.pathsep + "."

In [None]:
def interpolate(df1, df2, x):
    """return a weighted average of two dataframes"""
    df = df1 * (1 - x) + df2 * x
    return df.replace(np.nan, 0)
  

def get_trip_counts_by_minute(float_hour):
    """get an interpolated dataframe for any time, based
    on hourly data"""
    
    columns = ["latitude_departure",
               "longitude_departure",
               "Departure Count",
               "Arrival Count"]
    df1 = get_trip_counts_by_hour(int(float_hour))
    df2 = get_trip_counts_by_hour(int(float_hour) + 1)
    
    df = interpolate(df1.loc[:,columns], 
                     df2.loc[:,columns], 
                     float_hour % 1)
    
    df["name_departure"] = df1["name_departure"]
    return df

In [None]:
data2 = get_trip_counts_by_minute(9.5)
plot_station_counts(data2)

In [None]:
def go_arrivals_frame(i, hour_of_day, save_path):
    
    # create the map object
    data = get_trip_counts_by_minute(hour_of_day)
    my_frame = plot_station_counts(data)
    
    # generate the png file as a byte array
    png = my_frame._to_png()
    
    #  now add a caption to the image to indicate the time-of-day.
    hour = int(hour_of_day)
    minutes = int((hour_of_day % 1) * 60)
    
    # create a PIL image object
    image = Image.open(io.BytesIO(png))
    draw = ImageDraw.ImageDraw(image)
    
    # load a font
    font = ImageFont.truetype("Roboto-Light.ttf", 30)
    
    # draw time of day text
    draw.text((20,image.height - 50), 
              "time: {:0>2}:{:0>2}h".format(hour, minutes),
              fill=(255, 255, 255), 
              font=font)
    
    # draw title
    draw.text((image.width - 400,20), 
              "Net Arrivals vs Time of Day",
              fill=(255, 255, 255), 
              font=font)
    
    # write to a png file
    filename = os.path.join(save_path, "frame_{:0>5}.png".format(i))
    image.save(filename, "PNG")
    return image

In [None]:
arrival_times = np.arange(6, 23, .2)
frames_to_redo = [27, 41, 74, 100, 105]
for i in frames_to_redo:
    hour = arrival_times[i]
    go_arrivals_frame(i, hour, "frames")

# Relaciones entre estaciones

Se pretende visualizar ahora como se relacionan las estaciones según la hora del día. O sea como es la relación origen - destino en los movimientos de las bicicletas.