# Importing libraries

In [None]:
import pandas as pd
import numpy as np
import geopandas as gpd
from pyproj import Proj, transform
from shapely.geometry import Point, Polygon
import plotly.express as px
import pickle

# Read data

In [None]:
def clean_plr(df):
#After reading csv file, cleaning PLR_ID into correct str format
    df['PLR_ID'] = df['PLR_ID'].apply(int)
    df['PLR_ID'] = df['PLR_ID'].apply(lambda x: "0" + str(x) if len(str(x))== 7 else x)
    return df

In [None]:
def read_data(path):
    #Read csv and clean PLR_ID
    df = pd.read_csv(path)
    df = clean_plr(df)
        
    #Merge it to LOR DataFrame
    _df = gdf_plr.merge(df, how='left', on="PLR_ID")
    df = _df
    df.fillna(0, inplace=True)
    return df

LOR file

In [None]:
def read_lor():
    #PLR (smallest 542 sub-districts)
    path_to_data_plr = "../raw_data/LOR_shpfiles/lor_plr.shp"
    gdf_plr = gpd.read_file(path_to_data_plr)
    gdf_plr = clean_plr(gdf_plr)
    return gdf_plr

In [None]:
gdf_plr = read_lor()

CSV file

In [None]:
def round_up(df, num):
#Round up numerical columns on DataFrame with num digit
    numerical = df.select_dtypes(include=float).columns.tolist()
    df[numerical] = df[numerical].applymap(lambda x: round(x, num))
    return df

In [None]:
def round_int(df):
#Make numerical columns on DataFrame into integer
    numerical = df.select_dtypes(include=float).columns.tolist()
    df[numerical] = df[numerical].applymap(lambda x: int(x))
    return df

In [None]:
def read_all():
    df_accident = read_data('../data/accident_counts.csv')
    df_accident_year = read_data('../data/accident_counts_animation.csv')
    df_accident_hour = read_data('../data/accident_animation_min.csv')
    df_theft = read_data('../data/theft_counts.csv')
    df_location = read_data('../data/nextbike_location_change_mean.csv')
    df_hour_mean = read_data('../data/nextbike_location_animation_mean.csv')
    
    
    #Round up on Location data
    df_accident = round_int(df_accident)
    df_accident_year = round_int(df_accident_year)
    df_accident_hour = round_up(df_accident_hour, 3)
    df_accident_hour['hour'] = df_accident_hour['hour'].astype('int16')
    df_theft = round_int(df_theft)
    df_location = round_up(df_location, 3)
    df_hour_mean = round_up(df_hour_mean, 3)
    return df_accident, df_accident_year, df_accident_hour, df_theft, df_location, df_hour_mean

In [None]:
df_accident, df_accident_year, df_accident_hour, df_theft, df_location, df_hour_mean = read_all()

UTM to WGS84 conversion in order to make GeoJSON

In [None]:
def convert_polygon(df):
    '''
    Take a DataFrame with POLYGON in UTM coordinates, convert them into WGS84 and
    save it as GeoJSON file
    '''
    p = Proj(proj='utm', zone=33, ellps='WGS84', preserve_units=False)   
    
    for i in range(len(df)):
        #Strip UTM coordinate from POLYGON data
        poly = str(df.iloc[i, 2]).replace('((', '').replace('))','').replace(',','').split()
        poly.remove('POLYGON')
        l = [float(st) for st in poly]
        utm_len = int(len(l)/2)
        l_utm = np.array(l).reshape((utm_len, 2))
        
        #Convert UTM to WGS84 for a row
        l_wgs = []
        for j in range(utm_len):
            lon, lat = p(l_utm[j, 0], l_utm[j, 1], inverse=True)
            l_wgs.append([lon, lat])
        df.iloc[i, 2] = Polygon(l_wgs)
            
    df.to_file('../data/plr.geojson', driver='GeoJSON')

In [None]:
#Run once to get GeoJSON file
#convert_polygon(gdf_plr)

In [None]:
def read_geojson():
    #Read GeoJSON
    f = open('../data/plr.geojson')
    geojson = json.load(f)
    return geojson

In [None]:
def get_geojson(area):
    #0: Limited area for bike sharing locaiton, 1: Full area for others
    geojson = read_geojson()
    #Inject id for mapping (somehow we need to do it after loading the geojson)
    
    if area == 1:
        for k in range(len(geojson['features'])):
            geojson['features'][k]['PLR_ID'] = gdf_plr.iloc[k, 0]
        
    elif area == 0:
        for k in range(len(geojson['features'])):
            n = str(gdf_plr.iloc[k, 0])[:3]
            if  n != '032' and n != '033' and n != '034' and n != '035' and n != '042'and \
                n != '051' and n != '052' and n != '053' and n != '054' and n != '062' and \
                n != '063' and n != '064' and n != '075' and n != '076' and n != '082' and \
                n != '083' and n != '084' and n != '092' and n != '093' and \
                n != '094' and n != '095' and n != '115' and n != '101' and n != '102' and \
                n != '103' and n != '111' and n != '112' and n != '113' and n != '114' and \
                n != '104' and n != '121' and n != '122' and n != '124' and n != '125' and n != '126':
                geojson['features'][k]['PLR_ID'] = gdf_plr.iloc[k, 0]                
                
    else:
        return print('Failed. Please enter 0 or 1')   
    return geojson

## Exploring Bezirke GeoJSON

In [None]:
f = open('../raw_data/bezirksgrenzen.geojson')
geojson_berlin = json.load(f)
#.replace('0','')

#geojson_berlin['features'][1]

for k in range(len(geojson_berlin['features'])):
    geojson_berlin['features'][k]['Bezirk'] = str(geojson_berlin['features'][k]['properties']['Gemeinde_schluessel'])        

    
    
#['Gemeinde_schluessel']
#['Gemeinde_schluessel']

# Mapping

## Bike Sharing Location Distribution

### Hourly Distribution

In [None]:
labels = {'value': 'Relative % (avg. = 0)', 'avg':'% of bikes', 'theft_count':'count', 'hourly_accident':'Relative % (min. = 0)'}

In [None]:
def map_hourly_location(df):
    fig = px.choropleth_mapbox(df, geojson=get_geojson(0), 
                               featureidkey='PLR_ID', locations='PLR_ID', 
                               color='value', 
                               range_color = [-0.5, 0.5],
                               animation_frame="hour",
                               color_continuous_midpoint = 0,
                               hover_name='PLR_NAME',
                               color_continuous_scale="RdBu_r",                           
                               mapbox_style="open-street-map",
                               zoom=10, opacity=0.8,
                               center={'lat': 52.52, 'lon': 13.405},
                               labels=labels,
                              )
    fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 1500
    fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
#    fig.update_layout(title='NextBike Location Hourly Distribution')
    #Save as pickle
    file = open('../pickle/map_hourly_location.pkl', 'wb')
    pickle.dump(fig, file)
    return fig.show()

In [None]:
map_hourly_location(df_hour_mean)

In [None]:
#Pickle loading test
#file = open('../pickle/map_hourly_location.pkl', 'rb')
#object_file = pickle.load(file)
#file.close()
#object_file.show()

### Average Distribution

In [None]:
def map_location_average(df):
    fig = px.choropleth_mapbox(df, geojson=get_geojson(1), 
                               featureidkey='PLR_ID', locations='PLR_ID', 
                               color='avg', 
                               range_color = [0, df['avg'].max()],
                               color_continuous_midpoint = 0,
                               hover_name='PLR_NAME',
                               color_continuous_scale="OrRd",                           
                               mapbox_style="open-street-map",
                               zoom=9, opacity=0.6,
                               center={'lat': 52.52, 'lon': 13.405},
                               labels=labels,
                              )
    fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
#    fig.update_layout(title='NextBike Average Distribution')
    file = open('../pickle/map_avg_location.pkl', 'wb')
    pickle.dump(fig, file)
    return fig.show()

In [None]:
map_location_average(df_location)

## Bike Accident Animation

### Hourly Distribution

In [None]:
def map_hourly_accident(df):
    fig = px.choropleth_mapbox(df, geojson=get_geojson(1), 
                               featureidkey='PLR_ID', locations='PLR_ID', 
                               color='hourly_accident', 
                               range_color = [0, 500],
                               animation_frame="hour",
                               color_continuous_midpoint = 0,
                               hover_name='PLR_NAME',
                               color_continuous_scale="OrRd",                                                          
                               mapbox_style="open-street-map",
                               zoom=9, opacity=0.8,
                               center={'lat': 52.52, 'lon': 13.405},
                               labels=labels,
                              )
    fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 1500
    fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
#    fig.update_layout(title='Bike Accident Hourly Distribution')
    #Save pickle file
    file = open('../pickle/map_hourly_accident.pkl', 'wb')
    pickle.dump(fig, file)
    return fig.show()

In [None]:
#import matplotlib.pyplot as plt
#df_accident_hour[df_accident_hour['PLR_ID']=='01100206'].value
#plt.hist(df_accident_hour.value)

In [None]:
map_hourly_accident(df_accident_hour)

### Average Distribution

In [None]:
def map_yearly_accident(df):
    fig = px.choropleth_mapbox(df, geojson=get_geojson(1), 
                               featureidkey='PLR_ID', locations='PLR_ID', 
                               color='count', 
                               range_color = [0, 100],
                               animation_frame="year",
                               hover_name='PLR_NAME',
                               color_continuous_scale="OrRd",                           
                               mapbox_style="open-street-map",
                               zoom=9, opacity=0.6,
                               center={'lat': 52.52, 'lon': 13.405},
                               labels=labels,
                              )
    fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 2000
    fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
#    fig.update_layout(title='Bike Accident Counts')
    #Save pickfile
    file = open('../pickle/map_yearly_accident.pkl', 'wb')
    pickle.dump(fig, file)
    return fig.show()

In [None]:
map_yearly_accident(df_accident_year)

## Bike Theft

### Total Theft

In [None]:
def map_theft(df):
    fig = px.choropleth_mapbox(df, geojson=get_geojson(1), 
                               featureidkey='PLR_ID', locations='PLR_ID', 
                               color='theft_count', 
                               range_color = [0, 200],
                               color_continuous_midpoint = 0,
                               hover_name='PLR_NAME',
                               color_continuous_scale="OrRd",                           
                               mapbox_style="open-street-map",
                               zoom=9, opacity=0.6,
                               center={'lat': 52.52, 'lon': 13.405},
                               labels=labels,
                              )
    fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
#    fig.update_layout(title='Bike Theft Total Counts')
    file = open('../pickle/map_total_theft.pkl', 'wb')
    pickle.dump(fig, file)
    return fig.show()

In [None]:
map_theft(df_theft)