In [None]:
#importing necessary geospatial modules.

from shapely.geometry import Point
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import folium
from folium.plugins import MarkerCluster
from folium.plugins import FastMarkerCluster

In [None]:
#importing first .csv

tripsgeo = pd.read_csv('../data/tripscleanedwithtime.csv')

In [None]:
#cleaning unnecessary columns

tripsgeo = tripsgeo.reset_index(drop=True)
tripsgeo = tripsgeo.drop(columns='Unnamed: 0')

In [None]:
#creating GeoDataFrame, geometry is created from the starting location of a scooter.

tripsgeo = gpd.GeoDataFrame(
    tripsgeo, geometry=gpd.points_from_xy(tripsgeo.startlongitude, tripsgeo.startlatitude))

In [None]:
#importing zipcodes for future join, checking crs type.
#selecting only necessary columns

zipcodes = gpd.read_file('../data/zipcodes.geojson')
zipcodes = zipcodes[['zip', 'po_name', 'geometry']]
print(zipcodes.crs)
zipcodes.head()

In [None]:
print(tripsgeo.crs)

In [None]:
#assigning Coordinate Reference System to tripsgeo, matching to the zipcodes given CRS.

tripsgeo = tripsgeo.set_crs(crs='epsg:4326')

In [None]:
#reducing the number of columns for future ease of use.

tripsgeo = tripsgeo[['triprecordnum', 'companyname', 'sumdid', 'tripduration', 'tripdistance', 
                     'startdate', 'startdatetime', 'enddatetime', 'day_of_year', 'startlatitude', 
                     'startlongitude', 'hour', 'minute', 'geometry']]

In [None]:
#joining tripsgeo with zipcodes to continue question 4 analysis.
#checking new shape, lost two rows. Likely data points that fall outside of the zipcode regions.

tripsgeo = gpd.sjoin(tripsgeo, zipcodes, op = 'within')
tripsgeo.shape

# 4. What is the highest count of scooters being used at the same time? When did it occur? Does this vary by zip code or other geographic region?
The first portion of this question is answered in the 'scooters_q4' notebook. I will continue to explore scooter usage below, with an emphasis on scooter usage throughout zipcodes.

In [None]:
#creating some datetime objects.

tripsgeo['startdatetime'] = pd.to_datetime(tripsgeo['startdatetime'])
tripsgeo['enddatetime'] = pd.to_datetime(tripsgeo['enddatetime'])

In [None]:
#creating some values to utilize in a choropleth
#additionally beginning the creation of a table for # of scooter recommendation analysis

tripsbyzip = tripsgeo.groupby(by=['zip', 'day_of_year', 'hour']).minute.value_counts(sort=True, ascending=False)
tripsbyzipdayhour = tripsgeo.groupby(by=['zip', 'day_of_year']).hour.value_counts(sort=True, ascending=False)

In [None]:
#converting to dataframe

tripsbyzip = pd.DataFrame(tripsbyzip)
tripsbyzipdayhour = pd.DataFrame(tripsbyzipdayhour)

In [None]:
#cleaning up column names, resetting index

tripsbyzip.columns = ['number_of_trips']
tripsbyzip = tripsbyzip.reset_index()
tripsbyzipdayhour.columns = ['numberoftrips']
tripsbyzipdayhour = tripsbyzipdayhour.reset_index()

In [None]:
#sorting by number of trips, making sure there is varied data and inspecting new column names. 

tripsbyzip.sort_values('number_of_trips', ascending=False)

In [None]:
#creating some arrays of day_of_year and startdate to create a dictionary
#to rejoin the proper date back to table

uniquedayofyear = tripsgeo['day_of_year'].sort_values().unique()
uniquedates = tripsgeo['startdate'].sort_values().unique()

In [None]:
#creating dictionary

datedict= dict(zip(uniquedayofyear, uniquedates))

In [None]:
#creating new column of proper date from datedict for easier presentation

tripsbyzip['date'] = tripsbyzip['day_of_year'].map(datedict)
tripsbyzipdayhour['date'] = tripsbyzipdayhour['day_of_year'].map(datedict)
tripsbyzip.head()

In [None]:
#converting tripduration to timedelta to perform some aggregations

tripsgeo['tripduration'] = pd.to_timedelta(tripsgeo['tripduration'])

In [None]:
#creating aggregates for trip duration

durationdescribe = tripsgeo.groupby(by='zip').tripduration.describe().reset_index()

In [None]:
#creating aggregates for trip distance

distancedescribe = tripsgeo.groupby(by='zip').tripdistance.describe().reset_index()

In [None]:
#calculating sum to join to previous aggregations for both distance and duration.
#maybe not useful, but a fun fact for distance travelled by zipcode.

durationsum = pd.DataFrame(tripsgeo.groupby(by='zip').tripduration.sum()).reset_index()
distancesum = pd.DataFrame(tripsgeo.groupby(by='zip').tripdistance.sum()).reset_index()
durationdescribe = pd.merge(durationdescribe, durationsum, on='zip')
distancedescribe = pd.merge(distancedescribe, distancesum, on='zip')

In [None]:
#exporting all the created DataFrames above to .csv for possible use in Tableau.

#durationdescribe.to_csv('../data/durationaggs.csv', index=False)
#distancedescribe.to_csv('../data/distanceaggs.csv', index=False)
#tripsgeo.to_csv('../data/tripsgeo.csv', index=False)
#tripsbyzip.to_csv('../data/tripsbytimeandzipcode.csv', index=False)

In [None]:
#merging zipcode geometry to trips by zip.

tripsbyzipgeo = pd.merge(tripsbyzip, zipcodes, on='zip')

In [None]:
#creating geodataframe of tripsbyzipgeo

tripsbyzipgeo = gpd.GeoDataFrame(tripsbyzipgeo, geometry='geometry')

In [None]:
#checking that CRS is correct and a proper geometry column exists.

tripsbyzipgeo.info()
print(tripsbyzipgeo.crs)

In [None]:
#creating a column for maximum number of trips for a given zip code

tripsmax = pd.DataFrame(tripsbyzipgeo.groupby(by='zip').number_of_trips.max()).reset_index()
tripsmax.columns = ['zip', 'number_of_trips']

In [None]:
#merging maximum number of trips to tripsbyzipgeo to display on a graph

tripsbyzipgeomaxrides = pd.merge(tripsbyzipgeo, tripsmax, on=['zip', 'number_of_trips'])

# Creating a quick dataframe for scooter recommendations deliverable.

In [None]:
#getting unique scooters per day and hour

scootercount = pd.DataFrame(tripsgeo.groupby(by=['zip', 'day_of_year', 'hour']).sumdid.nunique()).reset_index()

In [None]:
#merging to tripsbyzipdayhour

tripsbyzipdayhour = pd.merge(tripsbyzipdayhour, scootercount, on=['zip', 'day_of_year', 'hour'])

In [None]:
#renaming columns

tripsbyzipdayhour.columns = ['zip', 'day_of_year', 'hour', 'numberoftrips', 'date', 'uniqueSUMDID']

In [None]:
#creating datetime object to extract weekday name from

tripsbyzipdayhour['date'] = pd.to_datetime(tripsbyzipdayhour['date'])

In [None]:
#Extracting weekday name

tripsbyzipdayhour['dayname'] = tripsbyzipdayhour['date'].dt.day_name()

In [None]:
#saving as .csv

tripsbyzipdayhour.to_csv('../data/scooterrecommendations.csv')

# End scooter recommendations table

# Continue Q4 Geopspatial

In [None]:
#Creating a choropleth showing the MAXIMUM concurrent rides reported by Zipcode

fig, ax = plt.subplots(figsize=(10, 10))
tripsbyzipgeomaxrides.plot(column = 'number_of_trips', 
              cmap = 'Reds',              
              edgecolor = 'black',
              legend=True,
              ax = ax)
ax.axis('off');
plt.title('Maximum Concurrent Rides by Zipcode')
plt.show()

In [None]:
#setting up a new table for plotting a choropleth of average distance or duration by zipcode

averagedistance = distancedescribe[['zip', 'mean']]
averagedistance.columns = ['zip', 'avg_distance']
averageduration = durationdescribe[['zip', 'mean']]
averageduration.columns = ['zip', 'avg_duration']

In [None]:
#creating a table to utilize for a choropleth showing average distance or duration by zipcode

avgdistancechoro = pd.merge(tripsbyzipgeo, averagedistance, on='zip')
avgdurationchoro = pd.merge(tripsbyzipgeo, averageduration, on='zip')

In [None]:
#formatting avg_duration for easier plotting.

avgdurationchoro['avg_duration'] = pd.to_timedelta(avgdurationchoro['avg_duration'])
avgdurationchoro['avg_minutes'] = avgdurationchoro['avg_duration'] / pd.Timedelta(minutes=1)

In [None]:
#looking at maximum average distances.

avgdistancechoro.sort_values('avg_distance', ascending=False).head()

In [None]:
#plotting choropleth of average distance by zipcode.

fig, ax = plt.subplots(figsize=(10, 10))
avgdistancechoro.plot(column = 'avg_distance', 
              cmap = 'Greens',              
              edgecolor = 'black',
              legend=True,
              ax = ax)
ax.axis('off');
plt.title('Average Distance in Feet by Zipcode')
plt.show()

In [None]:
#plotting choropleth of average duration by zipcode

fig, ax = plt.subplots(figsize=(10, 10))
avgdurationchoro.plot(column = 'avg_minutes', 
              cmap = 'Reds',              
              edgecolor = 'black',
              legend=True,
              ax = ax)
ax.axis('off');
plt.title('Average Duration in Minutes by Zipcode')
plt.show()

# 4. What is the highest count of scooters being used at the same time? When did it occur? Does this vary by zip code or other geographic region?
Scooter use varies dramatically by zipcode with the majority of use being centered around downtown Nashville. As you move away from the city center, scooter use decreases but distance travelled increases. 

In [None]:
tripsgeo = tripsgeo.drop(columns='index_right')

In [None]:
#tripsgeo.to_csv('../data/tripsgeo.csv')