In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
from datetime import timedelta
import requests
import json

%load_ext autoreload
%autoreload 2
%matplotlib inline

Importing the data

In [2]:
df = pd.read_csv('data/E-Scooter_Trips_-_2019_Pilot.csv', low_memory=False)

Taking a first look

In [3]:
df.head()

Unnamed: 0,Trip ID,Start Time,End Time,Trip Distance,Trip Duration,Accuracy,Start Census Tract,End Census Tract,Start Community Area Number,End Community Area Number,Start Community Area Name,End Community Area Name,Start Centroid Latitude,Start Centroid Longitude,Start Centroid Location,End Centroid Latitude,End Centroid Longitude,End Centroid Location
0,758e9d21-609f-5479-8e2c-5e8f64258202,07/01/2019 05:00:00 PM,07/01/2019 05:00:00 PM,421,3,1,,,,,,,,,,,,
1,ff33490c-254a-5af2-9315-d6b2a45b07f7,06/29/2019 06:00:00 PM,06/29/2019 06:00:00 PM,6318,31,1,,,,,,,,,,,,
2,f8a25729-e853-40f3-9200-7eea9f9c45f2,09/16/2019 01:00:00 PM,09/16/2019 01:00:00 PM,77,732,10,,,,25.0,,AUSTIN,,,,41.894101,-87.763112,POINT (-87.7631118242259 41.8941012961134)
3,11d42b99-e839-346c-11d4-2b99e839346c,06/24/2019 07:00:00 PM,06/24/2019 07:00:00 PM,917,359,10,,,25.0,25.0,AUSTIN,AUSTIN,41.894101,-87.763112,POINT (-87.7631118242259 41.8941012961134),41.894101,-87.763112,POINT (-87.7631118242259 41.8941012961134)
4,0c226130-0fa6-518f-8fe6-219b9fa5f42f,07/12/2019 07:00:00 PM,07/12/2019 07:00:00 PM,0,218,0,,,21.0,21.0,AVONDALE,AVONDALE,41.938666,-87.711211,POINT (-87.7112106378686 41.938665887698),41.938666,-87.711211,POINT (-87.7112106378686 41.938665887698)


Column information and non-null counts

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 710839 entries, 0 to 710838
Data columns (total 18 columns):
Trip ID                        710839 non-null object
Start Time                     710839 non-null object
End Time                       710839 non-null object
Trip Distance                  710839 non-null int64
Trip Duration                  710839 non-null int64
Accuracy                       710839 non-null int64
Start Census Tract             382637 non-null float64
End Census Tract               382475 non-null float64
Start Community Area Number    613450 non-null float64
End Community Area Number      612923 non-null float64
Start Community Area Name      613450 non-null object
End Community Area Name        612923 non-null object
Start Centroid Latitude        613451 non-null float64
Start Centroid Longitude       613451 non-null float64
Start Centroid Location        613451 non-null object
End Centroid Latitude          612923 non-null float64
End Centroid Longitud

Converting 'Start Time' and 'End Time' to datetime objects

In [5]:
df['Start Time'] = df['Start Time'].astype('datetime64[ns]')
df['End Time'] = df['End Time'].astype('datetime64[ns]')

We'll be combining our scooter dataset with weather data from [meteostat](https://api.meteostat.net). First, we'll figure out the range of dates that we'll need from the api.

In [6]:
print(df['Start Time'].min(), df['End Time'].max(), sep=', ')

2019-06-15 04:00:00, 2019-10-30 15:00:00


Obtaining this data through the meteostat API is described in the "weather_data.ipynb" notebook.

---

### Joining Datasets

We'll now join the weather data that we obtained from meteostat with the scooter data.

In [35]:
weather_df = pd.read_csv("data/weather_data.csv", index_col='time_local', parse_dates=['time_local'])

In [36]:
weather_df.head()

Unnamed: 0_level_0,condition,dewpoint,humidity,precipitation,pressure,temperature,winddirection,windspeed
time_local,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2019-06-15 00:00:00,4,13.2,83,0.0,1010.7,60.98,190,11.2
2019-06-15 01:00:00,4,12.8,78,0.0,1009.7,62.06,170,11.2
2019-06-15 02:00:00,3,12.3,70,0.0,1009.4,64.04,200,20.5
2019-06-15 03:00:00,7,12.9,73,0.0,1008.7,64.04,200,14.8
2019-06-15 04:00:00,7,12.9,73,0.0,1008.4,64.04,210,22.3


In [37]:
scooter_weather_df = df.join(weather_df, on='Start Time', how='left')

In [38]:
scooter_weather_df.head()

Unnamed: 0,Trip ID,Start Time,End Time,Trip Distance,Trip Duration,Accuracy,Start Census Tract,End Census Tract,Start Community Area Number,End Community Area Number,...,End Centroid Longitude,End Centroid Location,condition,dewpoint,humidity,precipitation,pressure,temperature,winddirection,windspeed
0,758e9d21-609f-5479-8e2c-5e8f64258202,2019-07-01 17:00:00,2019-07-01 17:00:00,421,3,1,,,,,...,,,18,20.7,54,0.0,1012.9,87.98,230,16.6
1,ff33490c-254a-5af2-9315-d6b2a45b07f7,2019-06-29 18:00:00,2019-06-29 18:00:00,6318,31,1,,,,,...,,,18,20.0,57,0.0,1016.7,84.92,90,14.8
2,f8a25729-e853-40f3-9200-7eea9f9c45f2,2019-09-16 13:00:00,2019-09-16 13:00:00,77,732,10,,,,25.0,...,-87.763112,POINT (-87.7631118242259 41.8941012961134),3,17.8,76,0.0,1020.4,71.96,50,16.6
3,11d42b99-e839-346c-11d4-2b99e839346c,2019-06-24 19:00:00,2019-06-24 19:00:00,917,359,10,,,25.0,25.0,...,-87.763112,POINT (-87.7631118242259 41.8941012961134),7,14.4,59,0.0,1003.0,73.04,270,25.9
4,0c226130-0fa6-518f-8fe6-219b9fa5f42f,2019-07-12 19:00:00,2019-07-12 19:00:00,0,218,0,,,21.0,21.0,...,-87.711211,POINT (-87.7112106378686 41.938665887698),2,14.5,39,0.0,1012.4,86.0,240,18.4


In [39]:
scooter_weather_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 710839 entries, 0 to 710838
Data columns (total 26 columns):
Trip ID                        710839 non-null object
Start Time                     710839 non-null datetime64[ns]
End Time                       710839 non-null datetime64[ns]
Trip Distance                  710839 non-null int64
Trip Duration                  710839 non-null int64
Accuracy                       710839 non-null int64
Start Census Tract             382637 non-null float64
End Census Tract               382475 non-null float64
Start Community Area Number    613450 non-null float64
End Community Area Number      612923 non-null float64
Start Community Area Name      613450 non-null object
End Community Area Name        612923 non-null object
Start Centroid Latitude        613451 non-null float64
Start Centroid Longitude       613451 non-null float64
Start Centroid Location        613451 non-null object
End Centroid Latitude          612923 non-null float64
End C

In [51]:
scooter_weather_df.groupby([scooter_weather_df['Start Time'].dt.day, scooter_weather_df['Start Time'].dt.hour]).count()['Trip ID']

Start Time  Start Time
1           0              146
            1              108
            2               40
            5               14
            6               83
            7              262
            8              518
            9              595
            10             613
            11             817
            12            1183
            13            1389
            14            1456
            15            1566
            16            1772
            17            2160
            18            2315
            19            2047
            20            1710
            21            1461
            22             616
            23             234
2           0              142
            1               85
            2               18
            5               23
            6               78
            7              269
            8              510
            9              597
                          ... 
30          16  