In [1]:
import numpy as np
import pandas as pd
import folium
import branca.colormap as cm
import geopandas as gpd
from geopandas.tools import sjoin
from shapely.geometry import Point, Polygon

# Import data and convert to geopandas df

In [2]:
trips = pd.read_csv('../data/processed/trips.csv')

## Create geopandas df for start locations

In [3]:
geo_trips_start = gpd.GeoDataFrame(trips, geometry=gpd.points_from_xy(trips.start_lng, trips.start_lat)).copy()

geo_trips_start.head(2)

Unnamed: 0,bike,bike_type,identification,start_time,end_time,duration_sec,start_lng,start_lat,end_lng,end_lat,start_place,end_place,geometry
0,20507,29,14776184,2019-04-21 17:34:00,2019-04-21 17:47:00,780.0,8.884911,53.078923,8.899906,53.078711,2985.0,0.0,POINT (8.88491 53.07892)
1,20507,29,16155376,2019-05-23 15:33:00,2019-05-23 15:40:00,420.0,8.884911,53.078923,8.876828,53.078004,2985.0,0.0,POINT (8.88491 53.07892)


geo_trips_end = gpd.GeoDataFrame(trips, geometry=gpd.points_from_xy(trips.end_lng, trips.end_lat))
geo_trips_end.head(2)

## Import geodata of Bremen (PLZ based)
https://public.opendatasoft.com/explore/dataset/postleitzahlen-deutschland/table/?refine.note=Bremen

In [4]:
plz_df = gpd.read_file('../data/external/plz_bremen.geojson')

plz_df.head(2)

Unnamed: 0,note,plz,geometry
0,Bremen,28199,"POLYGON ((8.76410 53.06703, 8.77230 53.06924, ..."
1,Bremen,28309,"POLYGON ((8.86527 53.03539, 8.86535 53.03602, ..."


## Function to get center of geo data set

In [5]:
def get_center(gpd_df):

    x_map = gpd_df.centroid.x.mean()
    y_map = gpd_df.centroid.y.mean()
    
    return [y_map, x_map]

## Visualization of plz areas of Bremen

Note: 28329 comprises two disjunct areas - anyway the task is to count starts and end per PLZ

In [6]:
plz_bremen = folium.Map(location=get_center(plz_df), zoom_start=11)

folium.TileLayer('CartoDB positron',name="Light Map",control=False).add_to(plz_bremen)

folium.Choropleth(geo_data=plz_df).add_to(plz_bremen)

folium.LayerControl().add_to(plz_bremen)

plz_bremen

# Create new data frame with plz, the corresponding geometry and number of trips started and ended in this area

## Starts

In [7]:
def get_trips_per_plz_df(trips_df, plz_df):

    trips_df['count'] = 1

    dfsjoin = gpd.sjoin(plz_df,trips_df[['geometry','count']])

    dfpivot = pd.pivot_table(dfsjoin,index='plz',columns='count',aggfunc={'count':len})

    dfpivot.columns = dfpivot.rename(columns={1:'trips_count'}).columns.droplevel()

    trips_per_plz_df = plz_df.merge(dfpivot, how='left',on='plz')
    
    return trips_per_plz_df

In [8]:
plz_starts = get_trips_per_plz_df(geo_trips_start, plz_df)

  "(%s != %s)" % (left_df.crs, right_df.crs)


In [9]:
def visualize_trips_per_plz_1(plz_starts, aliases, legend_name):

    # Can be used for checking individual quantiles.
    myscale = (plz_starts['trips_count'].quantile((0,0.6,0.75,0.9,0.98,1))).tolist()

    m = folium.Map(location=get_center(plz_starts), zoom_start=11)
    folium.TileLayer('CartoDB positron',name="Light Map",control=False).add_to(m)

    folium.Choropleth(
        geo_data=plz_starts,
        name='choropleth',
        data=plz_starts,
        columns=['plz','trips_count'],
        key_on="feature.properties.plz",
        fill_color='YlGnBu',
        #threshold_scale=myscale,
        fill_opacity=1,
        line_opacity=0.5,
        legend_name=legend_name
    ).add_to(m)

    folium.LayerControl().add_to(m)

    style_function = lambda x: {'fillColor': '#ffffff',
                                'color':'#ffffff', 
                                'fillOpacity': 0, 
                                'weight': 0.1}
    highlight_function = lambda x: {'fillColor': '#000000', 
                                    'color':'#000000', 
                                    'fillOpacity': 0.5, 
                                    'weight': 0.1}
    popup = folium.features.GeoJson(
        plz_starts,
        style_function=style_function, 
        control=False,
        highlight_function=highlight_function, 
        tooltip=folium.features.GeoJsonTooltip(
            fields=['plz','trips_count'],
            aliases=aliases,
            style=("background-color: white; color: #333333; font-family: arial; font-size: 12px; padding: 10px;") 
        )
    )

    m.add_child(popup)
    m.keep_in_front(popup)

    return m

In [10]:
def visualize_trips_per_plz_2(plz_starts, aliases):

    colormap = cm.linear.YlGnBu_09.scale(0, plz_starts.trips_count.max())

    mymap = folium.Map(location=get_center(plz_starts), zoom_start=11,tiles=None)
    
    folium.TileLayer('CartoDB positron',name="Light Map",control=False).add_to(mymap)
    
    colormap.caption = "number of starts in 2019"
    
    style_function = lambda x: {"weight":0.5, 
                                'color':'black',
                                'fillColor':colormap(x['properties']['trips_count']), 
                                'fillOpacity':1}
    highlight_function = lambda x: {'fillColor': '#000000', 
                                    'color':'#000000', 
                                    'fillOpacity': 0.50, 
                                    'weight': 0.1}
    
    trips_count=folium.features.GeoJson(
            plz_starts,
            style_function=style_function,
            control=False,
            highlight_function=highlight_function,
            tooltip=folium.features.GeoJsonTooltip(fields=['plz','trips_count'],
                aliases=aliases,
                style=("background-color: white; color: #333333; font-family: arial; font-size: 12px; padding: 10px;"),
                sticky=True
            )
        )
    colormap.add_to(mymap)
    mymap.add_child(trips_count)
    
    return mymap

## Visualizations of trips per plz of all available data of the year 2019

In [11]:
visualize_trips_per_plz_2(plz_starts, ['Postleitzahl: ', 'Number of starts in august 2019: '])

### Save to reports

In [None]:
visualize_trips_per_plz_2(plz_starts, ['Postleitzahl: ', 'Number of starts in august 2019: ']).save('../reports/figures/starts_per_plz_2019.html')

## Visualizations of trips per plz of all available data of august 2019

In [12]:
geo_trips_start['start_time'] = pd.to_datetime(geo_trips_start['start_time'])

In [13]:
geo_trips_start_august = geo_trips_start[geo_trips_start.set_index('start_time').index.month == 8]

In [14]:
plz_starts_august = get_trips_per_plz_df(geo_trips_start_august, plz_df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
  "(%s != %s)" % (left_df.crs, right_df.crs)


### Fill null values with 0

In [15]:
plz_starts_august['trips_count'] = pd.DataFrame(plz_starts_august['trips_count']).fillna(0)

In [16]:
visualize_trips_per_plz_2(plz_starts_august,['Postleitzahl: ', 'Number of starts in august 2019: '])

### Save to reports

In [None]:
visualize_trips_per_plz_2(plz_starts_august,['Postleitzahl: ', 'Number of starts in august 2019: ']).save('../reports/figures/starts_per_plz_august_2019.html')