# Semester Project - Nextbike
## Task 2 - Visualization

### a) For the summer month (i.e., June, August, or September) with most trips, visualize the number of started trips per PLZ region (you’ll have to find geo data for that yourselves!) in a map.

In [None]:
#import relevant libraries for visualization
#Requirement: execute conda install -c conda-forge geopy
import pandas as pd
import numpy as np
import geopy
import geopandas as gpd
import folium
from folium import plugins
from folium.plugins import HeatMapWithTime
from folium.plugins import MarkerCluster
#import geopandas as gpd
from shapely.geometry import Point
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.utils import shuffle



# import timeit to measure execution times
import timeit

#imports of folium for map visualization
import folium
from folium import plugins
from folium.plugins import HeatMap

#import Matplotlib Ticker to style axes of figures
import matplotlib.ticker as ticker

import warnings
warnings.filterwarnings('ignore')


#### Get the geodata of Dortmund

source of geodata: https://www.suche-postleitzahl.org/plz-karte-erstellens

In [None]:
# get geodata of germany (postal codes and their areas/polygons)
districts_germany = gpd.read_file("../../data/external/germany_postalcodes.geojson")
districts_germany.head()

In [None]:
# filter for districts of dortmund
districts_dortmund = districts_germany[districts_germany["note"].str.contains("Dortmund")]

# calculate the center of the districts (for later analysis)
districts_dortmund["longitude"] = districts_dortmund["geometry"].centroid.x
districts_dortmund["latitude"] = districts_dortmund["geometry"].centroid.y

districts_dortmund

#### Get the trip data (from Task 1)

In [None]:
trips = pd.read_csv("../../data/processed/dortmund_trips.csv", index_col = 0)

In [None]:
# returns "Point(longtitude, latitude)" of the starting position of a rental
def make_point(row):
    return Point(row.longitude_start, row.latitude_start)

##### Convert start positions of trip data to geographical points

In [None]:
# Go through every row, and make a point out of its lat and lon
trips["geometry"] = trips.apply(make_point, axis=1)
# It doesn't come with a CRS because it's a CSV, so it has to be set
trips.crs = {'init': 'epsg:4326'}

In [None]:
# additional column for aggregations
trips["count"] = 1

In [None]:
trips.head()

In [None]:
#convert dataset of trips to geodataframe (so it can be merged later with the geodataframe of dortmund)
geo_trips = gpd.GeoDataFrame(trips, crs={'init': 'epsg:4326'}, geometry=trips.geometry)

#### Calculate amount of trips per station

##### Group the data per station

In [None]:
trips_per_station = trips.groupby(["latitude_start", "longitude_start", "p_name_start"]).count()[["count"]]

In [None]:
trips_per_station

Some stations are listed multiple times, so we have to fix this

In [None]:
trips_per_station.reset_index(inplace=True)

In [None]:
# get the station name with its coordinates and the amount of trips at the station
trips_per_station = trips_per_station.groupby("p_name_start").agg({'count':'sum', 'latitude_start':'mean', 'longitude_start':'mean'})

In [None]:
trips_per_station.reset_index(inplace=True)

In [None]:
trips_per_station

#### Visualize the stations and their amount of rentals

In [None]:
stations_map = folium.Map(location = [51.5, 7.5], zoom_start=11)

# draw the borders of Dortmund and its districts
folium.Choropleth(
    geo_data=districts_dortmund, 
    fill_color = 'grey',
    fill_opacity=0.4,
    ).add_to(stations_map)

# draw an interactive (clickable) circle for each station - circle gets bigger with the amounts of rentals at the station
for index, row in trips_per_station.iterrows():
    
    station_rentals = row['count']

    station_name = row['p_name_start']
    
    station_info = "Name: {}\n\nAmount of rentals: {}\n".format(station_name,station_rentals)
    
    #folium.Marker(location=[row['latitude_start'],row['longitude_start']], popup=station_info).add_to(stations_map)
    
    folium.Circle(
        location=[row['latitude_start'],row['longitude_start']],
        popup=station_info,
        radius=row['count']*0.009,
        color='red',
        fill=True,
        fill_color='#red'
    ).add_to(stations_map)

display(stations_map)

This map shows each station in Dortmund as a red circle. The bigger the circle the higher is the demand. 
Click on the circles for more information.

#### Get the summer month with the most trips

In [None]:
# get the amount of rentals per month
trips_per_month = trips.groupby("month").count()

# get the month with the highest amount of trips/highest value
maxValue = trips_per_month['datetime_start'].idxmax()
 
print("Month number", maxValue, "has the highes value")


June has the most trips

In [None]:
#convert dataset of trips to geodataframe (so it can be merged with the geodataframe of dortmund)
geo_trips_june = geo_trips[geo_trips["month"] == 6]

# drop unnecessary attributes
geo_trips_june = geo_trips_june[['geometry', 'count']]

In [None]:
# join the data
# merges data when POINT of trips is within POLYGON of a dortmund district
trips_with_postalcode_june = gpd.sjoin(geo_trips_june, districts_dortmund, how='left', op='within')

In [None]:
trips_with_postalcode_june

This dataframe shows the trips with their geographical location and their postalcode of the start-station 

##### Calculate the amount of trips per postal code

In [None]:
trips_with_postalcode_june = trips_with_postalcode_june.drop(columns = ["index_right", "note", "qkm", "einwohner"])

In [None]:
trips_with_postalcode_june.head()

In [None]:
amount_per_postalcode = trips_with_postalcode_june.groupby("plz").count()[["count"]]

In [None]:
amount_per_postalcode

In [None]:
# transfer amount of data to the geojson of the district
district_with_count = districts_dortmund.merge(amount_per_postalcode, on='plz', how='left', indicator=True)

In [None]:
district_with_count

This dataframe shows the geo data/postalcodes of dortmund with the respective amount of rentals

Some values show left_only/NaN => no rentals were done in these districts / there are no stations

#### Visualize the amount of rentals per postalcode for the month June in Dortmund

In [None]:
district_with_count["count"] = district_with_count["count"].fillna(0)

In [None]:
quan = np.arange(0,1.1,0.1)
bins = list(district_with_count['count'].quantile(quan))
districts_map = folium.Map(location = [51.514244, 7.468429], zoom_start=10.5)

folium.Choropleth(
    geo_data=district_with_count,
    data=district_with_count,
    columns=['plz','count'],
    key_on='properties.plz',
    fill_color='RdYlGn',
    fill_opacity=0.7,
    line_opacity=0.2,
    line_weight=1,
    line_color='black',
    legend_name='Number of bookings per postcode',
    bins=bins    
    ).add_to(districts_map)

# Show information (postal code and rental amount) when mouse over district
folium.GeoJson(
    district_with_count,
    style_function=lambda feature:{"color":"black", "weight":1,},
    highlight_function=lambda x: {"weight":2, "color":"black","fillOpacity":0.5},
    tooltip=folium.features.GeoJsonTooltip(fields=['count', 'plz'], aliases=['Amount of rentals:', 'Postcal code:'])   
    ).add_to(districts_map)

districts_map

# the district in the south west is displayed dark green becauce the University of Dortmund is located within this district 

This map shows each district/postal code in Dortmund with its demand. The demand can be differentiated by color.
Hower over the districts for more information.

### Visualize daily heatmap 

### Get the postalcode for each trip

In [None]:
# merges data when POINT of trips is within POLYGON of a dortmund district
trips_with_postalcode = gpd.sjoin(geo_trips, districts_dortmund, how='left', op='within')

In [None]:
# drop unnecessary columns
trips_with_postalcode = trips_with_postalcode.drop(columns = ["datetime_start", "b_number", "p_name_start", "datetime_end", "latitude_end", "p_name_end", "longitude_end", "trip_duration", "distance", "weekday", "weekend", "day", "index_right", "note", "qkm", "einwohner"])
trips_with_postalcode.head()

In [None]:
# get the trip data for different times of a day
fife_nine = trips_with_postalcode.loc[(trips_with_postalcode.hour < 10) & (trips_with_postalcode.hour > 5)]
ten_three = trips_with_postalcode.loc[(trips_with_postalcode.hour < 16) & (trips_with_postalcode.hour > 9)]
four_eight = trips_with_postalcode.loc[(trips_with_postalcode.hour < 21) & (trips_with_postalcode.hour > 15)]
nine_four = trips_with_postalcode.loc[(trips_with_postalcode.hour < 6) | (trips_with_postalcode.hour > 20)]

In [None]:
# aggregate the amount of rentals per station for each time period
fife_nine = fife_nine.groupby(['latitude_start','longitude_start']).count()
ten_three = ten_three.groupby(['latitude_start','longitude_start']).count()
four_eight = four_eight.groupby(['latitude_start','longitude_start']).count()
nine_four= nine_four.groupby(['latitude_start','longitude_start']).count()

In [None]:
# further processing, so the data can be visualized in a heatmap with a slider
fife_nine.reset_index(inplace=True)
ten_three.reset_index(inplace=True)
four_eight.reset_index(inplace=True)
nine_four.reset_index(inplace=True)

time_data = [fife_nine[['latitude_start', 'longitude_start', 'hour']].values.tolist(),
      ten_three[['latitude_start', 'longitude_start', 'hour']].values.tolist(),
      four_eight[['latitude_start', 'longitude_start', 'hour']].values.tolist(),
      nine_four[['latitude_start', 'longitude_start', 'hour']].values.tolist()]

In [None]:
# visualize
heatmap_daily = folium.Map(location = [51.5135872,7.4652981], zoom_start=11.2)

plugins.HeatMapWithTime(time_data,
                        index=['5:00 - 9:00','10:00 - 15:00','16:00 - 20:00','21:00 - 4:00'],
                        auto_play=True,
                        radius=30,
                        overlay=False,
                        use_local_extrema=True).add_to(heatmap_daily)

folium.Choropleth(
    geo_data=districts_dortmund,
    fill_opacity=0.1,
    line_opacity=1,).add_to(heatmap_daily)

display(heatmap_daily)

This heatmap shows the distribution of the demand (per station) at different times of day
Use the slider to select a time period

### Visualize monthly heatmap

This is not done per station (like previously).
This is done per postalcode. However, we had to map the districts to their center, because otherwise it could not be visualized. So the heats run from the center of a district to another

In [None]:
# get the data per month
trips_jan = trips_with_postalcode.loc[(trips_with_postalcode.month == 1)]
trips_feb = trips_with_postalcode.loc[(trips_with_postalcode.month == 2)]
trips_mar = trips_with_postalcode.loc[(trips_with_postalcode.month == 3)]
trips_apr = trips_with_postalcode.loc[(trips_with_postalcode.month == 4)]
trips_may = trips_with_postalcode.loc[(trips_with_postalcode.month == 5)]
trips_jun = trips_with_postalcode.loc[(trips_with_postalcode.month == 6)]

# july does not exist

trips_aug = trips_with_postalcode.loc[(trips_with_postalcode.month == 8)]
trips_sep = trips_with_postalcode.loc[(trips_with_postalcode.month == 9)]
trips_oct = trips_with_postalcode.loc[(trips_with_postalcode.month == 10)]
trips_nov = trips_with_postalcode.loc[(trips_with_postalcode.month == 11)]
trips_dec = trips_with_postalcode.loc[(trips_with_postalcode.month == 12)]

In [None]:
# aggregate the amount of rentals per (mapped) district
trips_jan = trips_jan.groupby("plz").agg({'count':'sum', 'longitude':'mean', 'latitude':'mean'})
trips_feb = trips_feb.groupby("plz").agg({'count':'sum', 'longitude':'mean', 'latitude':'mean'})
trips_mar = trips_mar.groupby("plz").agg({'count':'sum', 'longitude':'mean', 'latitude':'mean'})
trips_apr = trips_apr.groupby("plz").agg({'count':'sum', 'longitude':'mean', 'latitude':'mean'})
trips_may = trips_may.groupby("plz").agg({'count':'sum', 'longitude':'mean', 'latitude':'mean'})
trips_jun = trips_may.groupby("plz").agg({'count':'sum', 'longitude':'mean', 'latitude':'mean'})

trips_aug = trips_jun.groupby("plz").agg({'count':'sum', 'longitude':'mean', 'latitude':'mean'})
trips_sep = trips_sep.groupby("plz").agg({'count':'sum', 'longitude':'mean', 'latitude':'mean'})
trips_oct = trips_oct.groupby("plz").agg({'count':'sum', 'longitude':'mean', 'latitude':'mean'})
trips_nov = trips_nov.groupby("plz").agg({'count':'sum', 'longitude':'mean', 'latitude':'mean'})
trips_dec = trips_dec.groupby("plz").agg({'count':'sum', 'longitude':'mean', 'latitude':'mean'})

In [None]:
# further processing for visualization
data=[trips_jan[['latitude', 'longitude','count']].values.tolist(),
      trips_feb[['latitude', 'longitude','count']].values.tolist(),
      trips_mar[['latitude', 'longitude','count']].values.tolist(),
      trips_apr[['latitude', 'longitude','count']].values.tolist(),
      trips_may[['latitude', 'longitude','count']].values.tolist(),
      trips_jun[['latitude', 'longitude','count']].values.tolist(),
      trips_aug[['latitude', 'longitude','count']].values.tolist(),
      trips_sep[['latitude', 'longitude','count']].values.tolist(),
      trips_oct[['latitude', 'longitude','count']].values.tolist(),
      trips_nov[['latitude', 'longitude','count']].values.tolist(),
      trips_dec[['latitude', 'longitude','count']].values.tolist()]

    

In [None]:
heatmap_monthy_per_district = folium.Map(location = [51.5135872,7.4652981], zoom_start=11.2)

plugins.HeatMapWithTime(data,
                        index=['Jan','Feb','Mar', 'Apr', 'May', 'Jun', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'],
                        auto_play=True,
                        radius=30,
                        overlay=False,
                        use_local_extrema=True).add_to(heatmap_monthy_per_district)

folium.Choropleth(
    geo_data=districts_dortmund,
    fill_opacity=0.1,
    line_opacity=1,).add_to(heatmap_monthy_per_district)

display(heatmap_monthy_per_district)

### b) For one moment in time, visualize the number of bikes at fixed stations meaningfully.

In [None]:
# get the initial dataset and delete the "non-stations"
df = pd.read_csv("../../data/internal/dortmund.csv", index_col = 0)
df = df[df['p_number'] != 0]
df.loc[:,'datetime'] = pd.to_datetime(df.loc[:, 'datetime'])

In [None]:
df.head()

#### Create dataframe with bike numbers of every station

##### Create a column for every station in the dataset

In [None]:
stationlist = df['p_name'].unique()

In [None]:
# Create dataframe with hourly timestamps
station_bikenumbers = pd.DataFrame(np.arange('2019-01-20', '2019-12-31', dtype='datetime64[h]'), columns = ['time'])

In [None]:
station_bikenumbers['time'] = station_bikenumbers['time'].dt.strftime('%Y-%m-%d-%H')
station_bikenumbers

In [None]:
for station in stationlist: 
    station_bikenumbers[station] = np.NaN

In [None]:
station_bikenumbers = station_bikenumbers.set_index('time')

In [None]:
station_bikenumbers

#### Fill the dataframe by going through the eventlog

In [None]:
# get the amount of available bikes at each station for different time stamps
for index, row in df.iterrows():
    timestamp = pd.to_datetime(row.get(key = 'datetime')).ceil('H').strftime('%Y-%m-%d-%H')
    station_bikenumbers.at[timestamp, row.get(key = 'p_name')] = pd.to_numeric(row.get(key = 'p_bikes'))

In [None]:
station_bikenumbers

####  Fill NaN values with last available value

In [None]:
station_bikenumbers.fillna(method='ffill', inplace=True)
station_bikenumbers.fillna(value='0.0', inplace=True)
station_bikenumbers

This matrix shows the amount of available bikes at each station at different times

#### Generate a station list with geo coordinates

In [None]:
stations = trips_per_station.drop(columns = ["count"], axis = 1)
stations.rename(columns={'p_name_start':'p_name', 'latitude_start':'latitude', 'longitude_start': 'longitude'}, inplace=True)
#stations.set_index('p_name')
stations

#### Visualization of one moment

In [None]:
data = pd.DataFrame(station_bikenumbers.loc['2019-12-31-11'])


In [None]:
data.reset_index(inplace=True)

In [None]:
data = data.rename(columns={'index': 'p_name', '2019-12-31-11':'NumberOfBikes'})
data

In [None]:
bikenumber_one_moment = pd.merge(data, stations, on = "p_name")

In [None]:
bikenumber_one_moment

In [None]:
bikenumer_per_stations_map = folium.Map(location = [51.5, 7.5], zoom_start=11)

# draw the borders of Dortmund and its districts
folium.Choropleth(
    geo_data=districts_dortmund, 
    fill_color = 'grey',
    fill_opacity=0.4,
    ).add_to(bikenumer_per_stations_map)

for index, row in bikenumber_one_moment.iterrows():
    bikenumber = int(row['NumberOfBikes'])
    station_name = row['p_name']    
    station_info = "Name: {}\n\nNumber of bikes: {}\n".format(station_name,bikenumber)   
    
    folium.Circle(
      location=[row['latitude'],row['longitude']],
      popup=station_info,
      radius=row['NumberOfBikes']*20,
      color='red',
      fill=True,
      fill_color='#red'
    ).add_to(bikenumer_per_stations_map)

display(bikenumer_per_stations_map)

In [None]:
bikenumer_per_stations_map = folium.Map(location = [51.5, 7.5], zoom_start=11)

# draw the borders of Dortmund and its districts
folium.Choropleth(
    geo_data=districts_dortmund, 
    fill_color = 'grey',
    fill_opacity=0.4,
    ).add_to(bikenumer_per_stations_map)

for index, row in bikenumber_one_moment.iterrows():
    bikenumber = int(row['NumberOfBikes'])
    station_name = row['p_name']    
    station_info = "Name: {}\n\nNumber of bikes: {}\n".format(station_name,bikenumber)   
    folium.Marker(location=[row['latitude'],row['longitude']], popup=station_info).add_to(bikenumer_per_stations_map)    

display(bikenumer_per_stations_map)

These maps show the number of available bikes per station at 2019-12-31, 11'o clock

### c) Create a heatmap based on an interesting aspect of the data, e.g., end locations of trips shortly before the start of a major public event.

In [None]:
df_station = trips[["p_name_start","latitude_start", "longitude_start", "p_name_end", "latitude_end","longitude_end", "coordinates_start", "coordinates_end"]]

In [None]:
df_station.head()

In [None]:
def create_map(shape= "../../data/external/dortmund_plz.geojson", center= [51.511838, 7.456943], tiles = 'Stamen Toner', zoom_start= 12, min_zoom=11, height="80%", width="80%"): 
    city_boundaries = folium.Map(
        location= center,
        tiles=tiles,
        zoom_start=zoom_start,
        min_zoom=min_zoom,
        height=height,
        width=width
    )
    folium.GeoJson(shape, name='geojson').add_to(city_boundaries)
    
    return city_boundaries

In [None]:
create_map(width="100%")

#### Visualize station

In [None]:
df_station

In [None]:
'''


'''
def station_capacity():
    
    tmp_map = create_map(width="100%")
    tmp_map.add_child(plugins.HeatMap(df_station[["latitude_start","longitude_start"]], radius=20))
    tmp_map.add_child(plugins.HeatMap(df_station[["latitude_end","longitude_end"]], radius=20))
    
    return tmp_map

In [None]:
station_capacity()

In [None]:
'''


'''
from sklearn.utils import shuffle

def most_used_station(amount = 1000):
    tmp_map = create_map()
    mc = MarkerCluster()
    df = shuffle(df_station)
    
    i = 0 
    for index, row in df.iterrows():
        if i <= amount:
            mc.add_child(folium.Marker(location = [row["latitude_start"],row["longitude_start"]]))
            mc.add_child(folium.Marker(location = [row["latitude_end"],row["longitude_end"]]))
            tmp_map.add_child(mc)
            i = i + 1
        else:
            break
    return tmp_map

In [None]:
most_used_station(2000)

In [None]:
def show_trips(amount = 500):
    tmp_map = create_map()
    
    df = shuffle(df_station)

    i = 0 
    for index, row in df.iterrows():
        if i <= amount:
            folium.ColorLine([[row["latitude_start"],row["longitude_start"]],[row["latitude_end"],row["longitude_end"]]],
            colors=[0,1,2],
            colormap=["blue","green"],
            weight=1, 
            opacity=0.3).add_to(tmp_map)
            i = i + 1
        else:
            break
    return tmp_map

In [None]:
show_trips(2000)

In [None]:
'''


'''

def test(date = "2019-01-20", street = "Signal Iduna Park", coord = []):
    tmp_map = create_map()
    
    if not coord:
        try:
            from geopy.geocoders import Nominatim
        except ImportError as e:
            print(e)

    geolocator = Nominatim(user_agent="http")
    loc = geolocator.geocode(street) 
    
    folium.Marker(location = [loc.latitude,loc.longitude],
                  popup=loc,
                  icon=folium.Icon(color='blue'),
                 ).add_to(tmp_map)
    
    df_tmp = trips[(trips['datetime_start'] >= date +" 00:00:00") & (trips['datetime_start'] <= date+" 23:59:59")]

    tmp_map.add_child(plugins.HeatMap(df_tmp[["latitude_start","longitude_start"]], radius=20))
    tmp_map.add_child(plugins.HeatMap(df_tmp[["latitude_end","longitude_end"]], radius=20))
    
    for index, row in df_tmp.iterrows():
            folium.ColorLine([[row["latitude_start"],row["longitude_start"]],[row["latitude_end"],row["longitude_end"]]],
            colors=[0,1,2],
            colormap=["red","blue"],
            weight=1, 
            opacity=0.5).add_to(tmp_map)
    
    return tmp_map

In [None]:
test()

### d) Visualize the distribution of trip lengths per month. Compare the distributions to normal distributions with mean and standard deviation as calculated before (1.d))

### Outlier identification

In [None]:
#identifiy outlier record based on rentalDuration
fig_dims = (12, 8)
fig, ax = plt.subplots(figsize=fig_dims)

ax = sns.boxplot (x=trips["trip_duration"])
ax.set_xticks(np.arange(0, 55),1)
ax.set_xlim([0, 55])
fig.savefig("../../doc/figures/Outlier_identification.png")

In [None]:
trips_new = trips[trips["trip_duration"] < 49]
trips.trip_duration.describe()

In [None]:
trips_new.trip_duration.describe()

Round about 27000 trips are dropped. The mean dropps down significally, so the outlier which containing day rentals are effecting the data significantly

### Visualization of the trips

### Trip duration

##### Trip duration per hour

In [None]:
weekday_names = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]

# without July (no data for July)
month_names = ["January","February","March","April","May","June","August","September","October","November","December"]

In [None]:
fig_dims = (12, 8)
fig, ax = plt.subplots(figsize=fig_dims)

sns.barplot(x="hour", y="trip_duration", data=trips, estimator=np.mean,ax=ax)
plt.title("Mean trip duration per hour with outliers")
plt.show()
fig.savefig("../../doc/figures/hourly_trips.png")

In [None]:
fig_dims = (12, 8)
fig, ax = plt.subplots(figsize=fig_dims)

sns.barplot(x="hour", y="trip_duration", data=trips_new, estimator=np.mean,ax=ax)
plt.title("Mean trip duration per hour")
plt.show()
fig.savefig("../../doc/figures/hourly_trips_outlier.png")

In [None]:
fig_dims = (12, 8)
fig, ax = plt.subplots(figsize=fig_dims)

sns.barplot(x="hour", y="trip_duration", data=trips_new, estimator=np.median,ax=ax)
plt.title("Median of trip duration per hour")
plt.show()
fig.savefig("../../doc/figures/hourly_trips_median.png")

The difference of the dataset with and wothout outliers can be seen here. Nevertheless the dataset including the outliers is considered and further visualized due to the fact that rentals for a whole day should be taken into account aswell.

##### Trip duration per month

In [None]:
fig_dims = (12, 8)
fig, ax = plt.subplots(figsize=fig_dims)

sns.barplot(x="month", y="trip_duration", data=trips, estimator=np.mean,ax=ax)
plt.xticks([0,1,2,3,4,5,6,7,8,9,10,11], month_names)
plt.title("Trip duration per month without outliers")
plt.show()
fig.savefig("../../doc/figures/monthy_trips.png")

##### Trip duration per weekday

In [None]:
fig_dims = (12, 8)
fig, ax = plt.subplots(figsize=fig_dims)

sns.barplot(x="weekday", y="trip_duration", data=trips, estimator=np.mean,ax=ax)
plt.xticks([0,1,2,3,4,5,6], weekday_names)
plt.title("Trip duration per weekday")
plt.show()
fig.savefig("../../doc/figures/Trip duration per weekdays.png")

##### Trip duration on weekdays and weekends

In [None]:
fig_dims = (8, 6)
fig, ax = plt.subplots(figsize=fig_dims)

sns.barplot(x="weekend", y="trip_duration", data=trips, estimator=np.mean)
plt.title("Trip duration on weekdays and weekends")
plt.show()
fig.savefig("../../doc/figures/Trip duration on weekdays and weekends.png")

#### Amount of trips on per hour compared by the weekday or weekend

### Trip Distance
#### Trip distance per month

In [None]:
fig_dims = (12, 8)
fig, ax = plt.subplots(figsize=fig_dims)

sns.barplot(x="month", y="distance", data=trips, estimator=np.mean, ax=ax)
plt.xticks([0,1,2,3,4,5,6,7,8,9,10,11], month_names)
plt.title("Trip distance per month")
plt.show()
fig.savefig("../../doc/figures/Trip distance per month.png")

##### Trip distance per weekday

In [None]:
fig_dims = (10, 6)
fig, ax = plt.subplots(figsize=fig_dims)

sns.barplot(x="weekday", y="distance", data=trips, estimator=np.mean,ax=ax)
plt.title("Trip distance per day")
plt.xticks([0,1,2,3,4,5,6], weekday_names)
plt.show()
fig.savefig("../../doc/figures/Trip distance per weekday.png")

##### Trip distance per hour

In [None]:
fig_dims = (10, 6)
fig, ax = plt.subplots(figsize=fig_dims)

sns.barplot(x="hour", y="distance", data=trips, estimator=np.mean,ax=ax)
plt.title("Trip distance per hour")
plt.show()
fig.savefig("../../doc/figures/Trip distance per hour.png")

##### Trip distance on weekdays and weekends

In [None]:
fig_dims = (10, 6)
fig, ax = plt.subplots(figsize=fig_dims)

sns.barplot(x="weekend", y="distance", data=trips, estimator=np.mean,ax=ax)
plt.title("Trip distance on weekdays and weekends")
plt.show()
fig.savefig("../../doc/figures/Trip distance weekend_weekday.png")

### Amount of trips
#### Amount of trips per month

In [None]:
fig_dims = (12, 8)
fig, ax = plt.subplots(figsize=fig_dims)

sns.countplot(x="month", data=trips, ax=ax)
plt.xticks([0,1,2,3,4,5,6,7,8,9,10,11], month_names)
plt.title("Amount of trips per month")
plt.show()
fig.savefig("../../doc/figures/Amount of trips per month.png")

Data recording began on 20th of January. That's the reason for the low amount of trips in January.

##### Amount of trips per weekday

In [None]:
fig_dims = (12, 6)
fig, ax = plt.subplots(figsize=fig_dims)

sns.countplot(x="weekday", data=trips)
plt.title("Amount of trips per weekday")
plt.xticks([0,1,2,3,4,5,6], weekday_names)
plt.show()
fig.savefig("../../doc/figures/Amount of trips per weekday.png")

##### Amount of trips per hour

In [None]:
fig_dims = (12, 6)
fig, ax = plt.subplots(figsize=fig_dims)

sns.countplot(x="hour", data=trips)
plt.title("Amount of trips per hour")
plt.show()
fig.savefig("../../doc/figures/Amount of trips per hour.png")

#### Amount of trips on weekdays and weekends

In [None]:
fig_dims = (8, 6)
fig, ax = plt.subplots(figsize=fig_dims)

sns.countplot(x="weekend", data=trips)
plt.title("Amount of trips on weekdays and weekends")
plt.show()
fig.savefig("../../doc/figures/Amount of trips weekday_weekend.png")

#### Amount of trips on per hour compared by the weekday or weekend

In [None]:
data_weekend = trips[trips["weekend"]==1].groupby(['hour']).agg(['mean'])
data_weekday = trips[trips["weekend"]==0].groupby(['hour']).agg(['mean'])
data_weekend = data_weekend["trip_duration"]
data_weekday = data_weekday["trip_duration"]

In [None]:
# Visualize relative hourly demand per hour
x = np.arange(24)

Fig = plt.figure(figsize=(16, 8))
ax = Fig.add_axes([0,0,1,1])

ax.plot(x, data_weekend, label="Weekend", color = 'LightCoral')
ax.plot(x, data_weekday, label="Weekday", color = 'cornflowerblue')

ax.legend(loc=0)
ax.set_xlabel('Time in hours')
ax.set_ylabel('Demand in percent')
ax.set_title('Relative hourly trips per daytype')
fig.savefig("../../doc/figures/Amount of trips hour weekend_weekday.png")