In [1]:
from shapely.geometry import Point
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import folium
import numpy as np

from math import radians
from numpy import sin, cos, arcsin, sqrt
from haversine import haversine, Unit

from datetime import datetime
from datetime import timedelta

# Reading in the data for one scooter

In [7]:
singlescooter = pd.read_csv('./data/singlescooter.csv')

In [8]:
singlescooter.head()

Unnamed: 0,pubdatetime,latitude,longitude,sumdid,sumdtype,chargelevel,sumdgroup,costpermin,companyname,date,time,elapsed_time
0,2019-05-01 00:01:41.247,36.136822,-86.799877,PoweredLIRL1,Powered,93.0,scooter,0.0,Bird,2019-05-01,00:01:41.247000,
1,2019-05-01 00:06:41.537,36.136822,-86.799877,PoweredLIRL1,Powered,93.0,scooter,0.0,Bird,2019-05-01,00:06:41.537000,0 days 00:05:00.290000000
2,2019-05-01 01:01:43.917,36.136751,-86.799921,PoweredLIRL1,Powered,92.0,scooter,0.0,Bird,2019-05-01,01:01:43.917000,0 days 00:55:02.380000000
3,2019-05-01 01:06:44.237,36.136751,-86.799921,PoweredLIRL1,Powered,92.0,scooter,0.0,Bird,2019-05-01,01:06:44.237000,0 days 00:05:00.320000000
4,2019-05-01 01:11:44.167,36.136751,-86.799921,PoweredLIRL1,Powered,92.0,scooter,0.0,Bird,2019-05-01,01:11:44.167000,0 days 00:04:59.930000000


In [9]:
singlescooter.shape

(2859, 12)

### Converting all applicable columns to datetime format

In [7]:
# singlescooter['pubdatetime'] = pd.to_datetime(singlescooter['pubdatetime'])

In [8]:
# singlescooter['date'] = pd.to_datetime(singlescooter['date'])

In [9]:
# singlescooter['time'] = pd.to_datetime(singlescooter['time'])

In [10]:
# singlescooter['elapsed_time'] = pd.to_int(singlescooter['elapsed_time'])

# Exporting the completed dataframe to a pickle file for easier read in/out

In [11]:
# singlescooter = singlescooter.to_pickle("data/may.pkl")

In [12]:
# singlescooter = pd.read_pickle("data/may.pkl")

### Adding a "geometry" column combining the latitude/longitude columns

In [13]:
# singlescooter['geometry'] = singlescooter.apply(lambda x: Point((float(x.longitude), float(x.latitude))), axis =1)

In [14]:
# singlescooter.head(1)

### Creating a new dataframe that is a GeoPanda, reading in the CRS structre of "4326", which is a standard format for lat/long information

In [15]:
# singlescooter_geo = gpd.GeoDataFrame(singlescooter, crs = {'init': 'epsg = 4326'}, geometry = singlescooter.geometry)

In [16]:
# type(singlescooter_geo)

In [17]:
# singlescooter_geo.head()

### Verifying that the dataframe is under the correct EPSG format

In [18]:
# singlescooter_geo.crs = {'init': 'epsg:4326'}

### Converting the EPSG format to "3857" which is a format that is measured in meters instead of degrees

In [19]:
# singlescooter_geo.geometry = singlescooter_geo.geometry.to_crs(epsg = 3857)
# singlescooter_geo.head()

# Adding a "dist" column that calculates the distance from row 1 to row 2 (row 2 to row 3, etc. etc.) using the .shift() function to move from one row to the next, and the .distance() function to do the calculation

In [20]:
# singlescooter_geo['dist'] = singlescooter_geo.geometry.distance(singlescooter_geo.geometry.shift())
# singlescooter_geo.crs

In [21]:
# singlescooter_geo.head()

### Showing the description of values within the "dist" column

In [22]:
# singlescooter_geo.dist.describe()

In [23]:
# singlescooter_geo = singlescooter_geo.to_pickle("data/singlescooter_geo.pkl")

In [24]:
# singlescooter_geo = pd.read_pickle("data/singlescooter_geo.pkl")
# singlescooter_geo.shape

In [25]:
# singlescooter_geo.info()

In [26]:
# # singlescooter_geo[~singlescooter_geo['dist'] < 10]
# singlescooter_filter = singlescooter_geo[~(singlescooter_geo['dist'] < 10)]
# # singlescooter_geo[[~singlescooter_geo['dist'] < 10], [~singlescooter_geo['dist'] > 10000]]
# singlescooter_filter.head()

In [27]:
# singlescooter_geo['mins_wait'] = singlescooter_geo['elapsed_time'].apply(lambda x: x.dt.total_seconds())

In [28]:
# distance_calc = singlescooter.apply(lambda row: haversine(row.coordinate, row.coordinate, axis = 1))

In [29]:
# # Here is how to go about getting time in total_seconds
# singlescooter_filter['time_between'] = pd.to_timedelta(singlescooter_filter['elapsed_time'].astype(str))

# # and then
# singlescooter_filter['mins_elapsed']= singlescooter_filter['time_between'].dt.total_seconds()/60
# singlescooter_filter.head(100)

In [30]:
# singlescooter_filter = singlescooter_filter.to_pickle("data/singlescooter_filter.pkl")

In [3]:
singlescooter_filter = pd.read_pickle("data/singlescooter_filter.pkl")

In [4]:
singlescooter_filter.head(2)

Unnamed: 0,pubdatetime,latitude,longitude,sumdid,sumdtype,chargelevel,sumdgroup,costpermin,companyname,date,time,elapsed_time,geometry,dist,time_between,mins_elapsed
0,2019-05-01 00:01:41.247,36.136822,-86.799877,PoweredLIRL1,Powered,93.0,scooter,0.0,Bird,2019-05-01,2019-08-31 00:01:41.247,,POINT (-9662518.108558778 4319464.236882115),,NaT,
2,2019-05-01 01:01:43.917,36.136751,-86.799921,PoweredLIRL1,Powered,92.0,scooter,0.0,Bird,2019-05-01,2019-08-31 01:01:43.917,0 days 00:55:02.380000000,POINT (-9662523.006616373 4319454.450388775),10.943785,00:55:02.380000,55.039667


In [6]:
singlescooter_filter.shape

(740, 16)

In [10]:
singlescooter_filter = singlescooter_filter.drop(columns = ['pubdatetime', 'time_between'])

In [11]:
singlescooter_filter.head(2)

Unnamed: 0,latitude,longitude,sumdid,sumdtype,chargelevel,sumdgroup,costpermin,companyname,date,time,elapsed_time,geometry,dist,mins_elapsed
0,36.136822,-86.799877,PoweredLIRL1,Powered,93.0,scooter,0.0,Bird,2019-05-01,2019-08-31 00:01:41.247,,POINT (-9662518.108558778 4319464.236882115),,
2,36.136751,-86.799921,PoweredLIRL1,Powered,92.0,scooter,0.0,Bird,2019-05-01,2019-08-31 01:01:43.917,0 days 00:55:02.380000000,POINT (-9662523.006616373 4319454.450388775),10.943785,55.039667


In [14]:
singlescooter_final = singlescooter_filter.loc[(singlescooter_filter["dist"] > 10) & 
                         (singlescooter_filter["mins_elapsed"] >= 6) & 
                         (singlescooter_filter["mins_elapsed"] <= 180)]

In [16]:
singlescooter.shape

(2859, 12)

In [15]:
singlescooter_final.shape

(78, 14)