### Scooters & Sustainability

### Import Packages and Data

In [211]:
import matplotlib.pyplot as plt
import numpy as np
import datetime as dt
import pandas as pd
from datetime import datetime, timedelta
from sodapy import Socrata
#raw = pd.read_csv("Dockless_Vehicle_Trips.csv")
# CSV from: https://data.austintexas.gov/widgets/7d8e-dm7r
# dir

In [212]:
# plotly
import plotly.plotly as py
from plotly.offline import iplot, init_notebook_mode
import plotly.graph_objs as go
import plotly.io as pio
import plotly.tools
plotly.tools.set_credentials_file(username='AsherMeyers', api_key='x6WJxUVsTsMwhr5MNLcZ')

import dash
import dash_core_components as dcc
import dash_html_components as html

### Function to Filter Out Dubious Rides

In [213]:
def filter_trips(df):

    orig_row_count = df.shape[0]
    print("Trips to be removed (statistics are sequential):")
    
    
    # Remove trips under 1 minute
    low_duration_trips_num = df[df.duration < 1].shape[0] 
    print("Trips under 1 minute, n = ", low_duration_trips_num, ", ", 
          round(100*low_duration_trips_num/df.shape[0],1),"%", sep = '')
    df = df[df.duration >= 1]
    
    
    # Remove trips under 0.1 miles
    low_distance_trips_num = df[df.distance < 0.1].shape[0] 
    print("Trips under 0.1 miles, n = ", low_distance_trips_num, ", ", 
          round(100*low_distance_trips_num/df.shape[0],1),"%", sep = '')    
    df = df[df.distance >= 0.1] # 0.1 mile minimum
    
    
    # Remove trips with speeds of <1 mph
    low_speed_trips_num = df[df.speed <= 1].shape[0] 
    print("Trips with <1 mph speed, n = ", low_speed_trips_num, ", ", 
          round(100*low_speed_trips_num/df.shape[0],1),"%", sep = '')    
    df = df[df.speed > 0.5]
    
    
    print()
    #print("Removal of trips with duration, distance or speed 3*IQR above 75th percentile")
    print("Removal of trips with distance above 25 miles,  approximate max range of a fully charged scooter \n",
         "and speed over 25 mph, duration over 90 minutes")


    # Remove trips with over 3*Inter-quartile range (75%ile - 25%ile) for duration and distance
    duration_hi = 90 #np.percentile(df.duration,75) + 3*(np.percentile(df.duration,75) - np.percentile(df.duration,25))
    distance_hi = 25 #np.percentile(df.distance,75) + 3*(np.percentile(df.distance,75) - np.percentile(df.distance,25))
    speed_hi = 25 #np.percentile(df.speed,75) + 3*(np.percentile(df.speed,75) - np.percentile(df.speed,25))
    
    print("Trips with duration above 90 minutes, n= ",
      df[df.duration > duration_hi].shape[0], ", ", 
          round(100*df[df.duration > duration_hi].shape[0] / df.shape[0],1), "%",  sep = '')
    df = df[df.duration <= duration_hi]
    
    #print("Trips with distance above ",round(distance_hi,1)," miles, n= ",
    print("Trips with distance above 25 miles, n= ",
      df[df.distance > distance_hi].shape[0], ", ", 
          round(100*df[df.distance > distance_hi].shape[0] / df.shape[0],1), "%",  sep = '')
    df = df[df.distance <= distance_hi] 
    
    
    print("Trips with speed above ",round(speed_hi,1)," MPH, n= ",
      df[df.speed > speed_hi].shape[0], ", ", 
          round(100*df[df.speed > speed_hi].shape[0] / df.shape[0],1), "%",  sep = '')
    df = df[df.speed <= speed_hi]
    
    
    print("Total Trips Removed: n = ", orig_row_count - df.shape[0], ", ",
          round(100*(orig_row_count - df.shape[0])/orig_row_count,1), 
          "%", sep ='')
    
    return(df)

### Function: Get new trips

In [214]:
def get_trips_df(): # vehicle_types = "scooter", "bicycle"
    client = Socrata("data.austintexas.gov", "Ag12LzMiA3HV3xLIBH83VAjcD", "anm39@cornell.edu", "hkPLjcT2AyS63")

    results = client.get("7d8e-dm7r",content_type="csv", order="end_time ASC", #vehicle_type=vehicle_type, 
                         select="device_id,end_time,trip_distance,trip_duration,vehicle_type", limit=10**7) 

    # Convert to pandas DataFrame
    results_df=pd.DataFrame(results)
    results_df.columns=results_df.iloc[0,:] # rename columns
    results_df=results_df[1:] # start with first row of data

    results_df.device_id = results_df.device_id.str.slice(0,8) #trim unique IDs
    results_df.end_time = pd.to_datetime(results_df.end_time, infer_datetime_format=True)
    
    # Filter out values with strings
    results_df = results_df[results_df.trip_duration != '']
    results_df = results_df[results_df.trip_distance != '']
    
    results_df["duration"] = round(results_df.trip_duration.astype(int) / 60 ,1)
    results_df["distance"] = round(results_df.trip_distance.astype(int) / 1609.34,1)
    
    results_df["speed"] = round(60*results_df.distance/results_df.duration,1)
    results_df["revenue"] = np.ceil(results_df.duration)*.15 + 1
    results_df["date"] = results_df.end_time.dt.date
    
    results_df=results_df.drop(["trip_duration", "trip_distance"], axis=1)
    
    return(results_df)

In [626]:
trips_raw = get_trips_df()
trips_raw = trips_raw[trips_raw.date != trips_raw.date.max()] # Remove partial day data
scoot_trips_raw = trips_raw[trips_raw.vehicle_type == "scooter"].drop(["vehicle_type"],axis=1)
bike_trips_raw = trips_raw[trips_raw.vehicle_type == "bicycle"].drop(["vehicle_type"],axis=1)

trips = filter_trips(trips_raw)
scoot_trips = trips[trips.vehicle_type == "scooter"].drop(["vehicle_type"],axis=1)
bike_trips = trips[trips.vehicle_type == "bicycle"].drop(["vehicle_type"],axis=1)

Trips to be removed (statistics are sequential):
Trips under 1 minute, n = 154602, 3.3%
Trips under 0.1 miles, n = 488610, 10.9%
Trips with <1 mph speed, n = 76881, 1.9%

Removal of trips with distance above 25 miles,  approximate max range of a fully charged scooter 
 and speed over 25 mph, duration over 90 minutes
Trips with duration above 90 minutes, n= 19065, 0.5%
Trips with distance above 25 miles, n= 608, 0.0%
Trips with speed above 25 MPH, n= 2255, 0.1%
Total Trips Removed: n = 688921, 14.9%


In [620]:
print("Filtered Trip Count: ", scoot_trips.shape[0])

Filtered Trip Count:  3724715


In [8]:
scoot_trips.shape[0] - scoot_trips_raw.shape[0]

-634703

In [9]:
scoot_trips.tail()

Unnamed: 0,device_id,end_time,duration,distance,speed,revenue,date
4279929,ec7d107d,2019-04-18 11:15:00,5.0,0.6,7.2,1.75,2019-04-18
4279930,d43f0f08,2019-04-18 11:15:00,4.4,0.2,2.7,1.75,2019-04-18
4279931,8ae67063,2019-04-18 11:15:00,5.5,0.5,5.5,1.9,2019-04-18
4279932,de64bc26,2019-04-18 11:15:00,5.3,0.4,4.5,1.9,2019-04-18
4279933,c7446c07,2019-04-18 11:15:00,3.3,0.6,10.9,1.6,2019-04-18


In [57]:
scoot_trips_raw.to_csv("scoot_trips_raw.csv", index=False)
scoot_trips.to_csv("scoot_trips.csv", index=False)

### Create Vehicles Dataframe

In [358]:
def create_vehicles_df(df):
    df["date"] = df.end_time.dt.date
    vehicles = pd.DataFrame({"id": df.groupby('device_id')['distance'].sum().index, # vehicle ID
                            "rides": df.groupby('device_id')['duration'].count(),
                             "duration": df.groupby('device_id')['duration'].sum()/60, # total duration
                             "distance": df.groupby('device_id')['distance'].sum(), # total distance traveled
                             "start": df.groupby('device_id')['end_time'].min(), # first ride completed
                             "end": df.groupby('device_id')['end_time'].max(), # last ride completed
                             "active_days": df.groupby('device_id')['date'].nunique()
                            })
    vehicles = vehicles.reset_index()


    vehicles["lifespan"] = (vehicles["end"]-vehicles["start"])
    vehicles["revenue"] = np.round(vehicles["rides"] + 0.15*60*vehicles["duration"],2)

    vehicles = vehicles.drop(["device_id"],axis=1)

    vehicles["gross_profit"] = 0.33*vehicles["revenue"] # From Information article, optimistic future 33% Bird gross profit margin
    vehicles["lifecycle_profit"] = 0.33*vehicles["revenue"] - 400 # $551 scooter acquisition cost from The Information 10/23/18
    vehicles["per_ride_profit"] = vehicles["lifecycle_profit"]/vehicles["rides"]
    vehicles["utilization"] = vehicles["rides"] / vehicles["active_days"]
    
    vehicles["start"]=pd.to_datetime(vehicles["start"])
    vehicles["end"]=pd.to_datetime(vehicles["end"])
    vehicles["lifespan"] = pd.Series(vehicles.end-vehicles.start).dt.days + np.ceil(pd.Series(vehicles.end-vehicles.start).dt.seconds/86400) + 1
    #vehicles["lifespan"] = np.ceil(vehicles["lifespan"].apply(lambda x: x.days) + vehicles["lifespan"].apply(lambda x: x.seconds)/(24*60*60))
    
    #Vehicle filters
    #vehicles = vehicles[vehicles.rides > 1] # at least 2 rides
    #vehicles = vehicles[vehicles.distance > 1] # one mile minimum
    #vehicles = vehicles[vehicles.duration > 0.2] #12 minutes
    return(vehicles)

# operating cost and margin data from
# https://www.theinformation.com/articles/inside-birds-scooter-economics

def describe_vehicles(df):
    dead_vehicles = df[df.end < "2019-02-16"]
    #print("n =", dead_vehicles.shape[0], "vehicles")
    description = round(dead_vehicles.describe(),2).iloc[[1,5,3,7,4,6],]
    description.index = ['mean', 'median', 'min', 'max', '25th percentile', '75th percentile']
    return(dead_vehicles, description)

### Scooters based on Filtered Trip Data

In [359]:
scooters = round(create_vehicles_df(scoot_trips),1)

# Any scooter not seen within last 14 days of data is considered dead
dead_threshold_date = max(scooters["end"]) - timedelta(days=14)
scooters["alive"] = scooters["end"] >  dead_threshold_date
dead_scooters = scooters[scooters.end < dead_threshold_date]

In [355]:
scooters.to_csv("scooters.csv", index=False)
dead_scooters.to_csv("dead_scooters.csv", index=False)

### Scooters based on Raw Trip Data

In [9]:
scooters_raw = round(create_vehicles_df(scoot_trips_raw),1)

# Any scooter not seen within last 14 days of data is considered dead
dead_threshold_date = max(scooters_raw["end"]) - timedelta(days=14)
scooters_raw["alive"] = scooters_raw["end"] >  dead_threshold_date
dead_scooters_raw = scooters_raw[scooters_raw.end < dead_threshold_date]

In [10]:
round(scooters_raw.describe())

Unnamed: 0,rides,duration,distance,active_days,lifespan,revenue,gross_profit,lifecycle_profit,per_ride_profit,utilization
count,41605.0,41605.0,41605.0,41605.0,41605.0,41605.0,41605.0,41605.0,41605.0,41605.0
mean,105.0,19.0,2242.0,28.0,58.0,277.0,92.0,-459.0,-22.0,4.0
std,91.0,17.0,49864.0,23.0,49.0,238.0,78.0,78.0,66.0,2.0
min,1.0,0.0,-10316.0,1.0,1.0,1.0,0.0,-551.0,-551.0,1.0
25%,35.0,7.0,32.0,10.0,19.0,97.0,32.0,-519.0,-15.0,3.0
50%,80.0,15.0,71.0,22.0,49.0,217.0,72.0,-479.0,-6.0,4.0
75%,148.0,27.0,122.0,38.0,82.0,390.0,129.0,-422.0,-3.0,4.0
max,609.0,538.0,1334484.0,161.0,344.0,4913.0,1621.0,1070.0,16.0,32.0


### Scooter Trip Violin Plots

In [220]:
sample_size = 50000

'''
retained = int(0.95*sample_size)
x1 = scoot_trips["duration"].sample(sample_size, replace=True)
x2 = scoot_trips["distance"].sample(sample_size, replace=True)
x3 = scoot_trips["revenue"].sample(sample_size, replace=True)
x4 = scoot_trips["speed"].sample(sample_size, replace=True)'''



x1 = scoot_trips["duration"]
x2 = scoot_trips["distance"]
x3 = scoot_trips["speed"]

x = [x1, x2, x3, x4]
data = []

for i in range(3):
    xi = x[i]
    q1 = xi.quantile(0.25)
    median = round(xi.quantile(0.5),decimal)
    mean = round(xi.mean(),decimal)
    mode = xi.mode()[0]
    q3 = xi.quantile(0.75)
    p95 = round(xi.quantile(0.95),decimal)
    xi=xi.sample(sample_size, replace=False)
    xi = xi.nsmallest(int(sample_size * 0.95)) # Bottom 95% of values, to make graphs display better
    trace = go.Violin(
        y=xi,
        name= "Mode: " + str(mode) + "\t \t " + "Median: " + str(median) + "\t \t Mean: " + str(mean) + 
        "<br> 25th Percentile: " + str(q1) + "\t \t 75th: " + str(q3) + "\t \t 95th: " + str(p95),
        opacity=0.9,
        #marker=dict(
        #    color='#03cb03'
        #),  
        meanline=dict(
        visible=True
        ),
        showlegend=False,
        box=dict(
        visible=True),
        points=False,
        #font=dict(size=14)
    )
    data.append(trace)
    
fig = plotly.tools.make_subplots(rows=1, cols=3, subplot_titles=('Trip Duration (min)', 'Trip Distance (mi)', 'Trip Speed (mph)'))

fig.append_trace(data[0], 1, 1)
fig.append_trace(data[1], 1, 2)
fig.append_trace(data[2], 1, 3)

margin = go.layout.Margin(
                        l=20,
                        r=0,
                        b=40,
                        t=100,
                        pad=0
                    )

fig['layout'].update(height=350, width=900, 
                     title='🛴 Trip Statistics',
                     font=dict(size=18,family='Times New Roman'),
                    xaxis=dict(tickfont=dict(size=15)),
                    xaxis2=dict(tickfont=dict(size=15)),
                    xaxis3=dict(tickfont=dict(size=15)),
                    yaxis1=dict(range=[0,40]),
                    yaxis2=dict(range=[0,3]),
                    yaxis3=dict(range=[0,12]),
                     margin=margin,
                     
        
    )

py.iplot(fig, filename='Scooter-Trip-Violins')

This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y2 ]  [ (1,3) x3,y3 ]



In [221]:
pio.write_image(fig, 'images/trip_statistics_violins.png')

In [34]:
#median revenue
scoot_trips["revenue"].median()

2.2

### Trip Counts by Month, Day of the Week, Time of Day

In [627]:
title = '🛴 Trips Over Time, n = ' + str(round(scoot_trips.shape[0]/1000000,1)) + ' Million' 

# X axis labels: Months, Days of Week, Times of Day
names = ["Months", "Day of Week", "Time of Day"]
x1 = ["Apr '18", "May '18", "Jun '18", "Jul '18", "Aug '18", "Sep '18", "Oct '18", "Nov '18", "Dec '18", "Jan '19", "Feb '19", "Mar '19", "Apr '19", "May '19"]
x2 = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
x3 = list(range(0,24))
#x3 = ["12 AM", "1 AM", "2 AM", "3 AM", "4 AM", "5 AM", "6 AM", "7 AM", "8 AM", "9 AM", "10", "11", "Noon", "1 PM", "2 ", "3", "4", "5", "6", "7", "8", "9", "10", "11"]

x = [x1, x2, x3]

# Daily Trip counts by month
scoot_trips_month = pd.Series(scoot_trips.end_time.dt.date.groupby([
    scoot_trips.end_time.dt.year, 
    scoot_trips.end_time.dt.month]).agg('count'))

scoot_trips_month = scoot_trips_month/[25,31,30,31,30,30,31,30,31,31,28,31, 30, scoot_trips.iloc[-1,].end_time.day] # Number of days in each month

scoot_trips_month=list(scoot_trips_month.values)

# Trip Counts by Day of Week
scoot_trips_day_of_week = scoot_trips.end_time.dt.weekday.groupby(scoot_trips.end_time.dt.weekday).agg('count')
scoot_trips_day_of_week = 100*(scoot_trips_day_of_week / sum(scoot_trips_day_of_week))

# Trip counts by hour of the day, for weekdays and weekends

# Weekdays

scoot_trips_hour = scoot_trips.end_time.dt.hour.groupby(scoot_trips.end_time.dt.hour).agg('count')

scoot_trips_hour = scoot_trips_hour/sum(scoot_trips_hour) # Normalize into Percents
scoot_trips_hour = 100*pd.Series(scoot_trips_hour)

y = [scoot_trips_month, scoot_trips_day_of_week, scoot_trips_hour]

data = [] # container for trace objects

for i in range(3):
    trace = go.Bar(
        x=x[i],
        y=y[i],
        name= names[i],
        opacity=0.9,
        #marker=dict(
        #    color='#03cb03'
        #),  
        showlegend=False,
        #font=dict(size=14)
    )
    data.append(trace)
    
fig = plotly.tools.make_subplots(rows=2, cols=2, subplot_titles=('Mean Trips Per Day, #', 'By Day of Week, %', 'By Hour of Day, %'), specs=[[{}, {}], [{'colspan': 2}, None]],)

fig.append_trace(data[0], 1, 1)
fig.append_trace(data[1], 1, 2)
fig.append_trace(data[2], 2, 1)
margin = go.layout.Margin(
                        l=40,
                        r=0,
                        b=40,
                        t=100,
                        pad=0
                    )

fig['layout'].update(height=600, width=825, 
                     title=title,
                     font=dict(size=18,family='Times New Roman'),
                    xaxis=dict(tickfont=dict(size=15),
                              tickangle=90),
                    xaxis2=dict(tickfont=dict(size=15)),
                    xaxis3=dict(tickfont=dict(size=13)),
                     
                    yaxis1=dict(tickfont=dict(size=15)),
                    yaxis2=dict(tickfont=dict(size=15)),
                    yaxis3=dict(tickfont=dict(size=15)),
                     margin=margin
        
    )

py.iplot(fig, filename='Scooter-Trips-Over-Time-Bars')

This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y2 ]
[ (2,1) x3,y3           -      ]



In [628]:
pio.write_image(fig, 'images/Scooter-Trips-Over-Time-Bars.png')

### Rolling Daily Average of Scooter Trips, Deployment (Vehicles Available) and Utilization, Window = 7 days

In [629]:
# Set window of one week for rolling averages
window = 7

trip_count = scoot_trips['device_id'].groupby([scoot_trips.end_time.dt.date]).count()
vehicles_out = scoot_trips['device_id'].groupby([scoot_trips.end_time.dt.date]).nunique()
utilization = trip_count / vehicles_out

# Get trips and number of scooters out by date, and the resulting utilization
#dates = dates.astype(str)
dates = utilization.index[6:]
trips_on_date = trip_count.rolling(window=window).mean()[6:]
scooters_out = vehicles_out.rolling(window=window).mean()[6:]
utilization_rolling = utilization.rolling(window=window).mean()[6:]

# Non-Rolling
'''dates = utilization.index
scooters_out = vehicles_out
trips_on_date = ride_count
utilization = utilization'''

trace1 = go.Scatter(
    x=dates,
    y=trips_on_date,
    yaxis='y3',
    name = "Scooter Trips"
)
trace2 = go.Scatter(
    x=dates,
    y=scooters_out,
    yaxis='y2',
    name = "Scooters Deployed"
)
trace3 = go.Scatter(
    x=dates,
    y=utilization_rolling,
    yaxis='y1',
    name = "Daily Trips Per Scooter"
)

margin = go.layout.Margin(
                        l=40,
                        r=0,
                        b=40,
                        t=100,
                        pad=0
                    )

data = [trace1, trace2, trace3]
layout = go.Layout(
    title = "Daily 🛴 Trips, Vehicle Deployments & Utilization: 7 Day Rolling Daily Average",
    font=dict(size=12,family='Times New Roman'),
    xaxis1=dict(
        tickfont=dict(
        size=16
        ),
    ),
    yaxis=dict(
        domain=[0, 0.33],
        tickfont=dict(
        size=14
        )
    ),
    yaxis2=dict(
        domain=[0.33, 0.66],
        tickfont=dict(
        size=14
        ),
        tickformat="s"
    ),
    yaxis3=dict(
        domain=[0.66, 1],
        tickfont=dict(
        size=14
        )
    ),
    height=750, width=825, 
    legend=dict(orientation="h",
                font=dict(
                    size=14
                )
               ),
    margin=margin
)

fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='daily_trips_deployment_utilization')

In [630]:
pio.write_image(fig, 'images/daily_trips_deployment_utilization.png')

### Defunct Vehicle Statistics

In [745]:
x1 = dead_scooters["rides"]
x2 = dead_scooters["duration"]
x3 = dead_scooters["distance"]
x4 = dead_scooters["active_days"]
x5 = dead_scooters["lifespan"]
x6 = round(100*dead_scooters["active_days"] / dead_scooters["lifespan"])


x = [x1, x4]
data = []

for i in range(2):
    xi = x[i]
    q1 = xi.quantile(0.25)
    median = xi.quantile(0.5)
    mean = round(xi.mean())
    q3 = xi.quantile(0.75)
    p95 = round(xi.quantile(0.95))
    trace = go.Violin(
        y=xi.nsmallest(round(0.99*len(dead_scooters["rides"]))),
        name= "Median: " + str(median) + "\t \t Mean: " + str(mean) + 
        "<br> 25th Percentile: " + str(q1) + "\t \t 75th: " + str(q3) + "\t \t 95th: " + str(p95),
        opacity=0.9,
        #marker=dict(
        #    color='#03cb03'
        #),  
        meanline=dict(
        visible=True
        ),
        showlegend=False,
        box=dict(
        visible=True),
        points=False,
        #font=dict(size=14)
    )
    data.append(trace)
    
fig = plotly.tools.make_subplots(rows=1, cols=2, subplot_titles=( 'Rides Completed', 'Days With Trips (d)'), 
                                 vertical_spacing = 0.12, horizontal_spacing = 0.05)

fig.append_trace(data[0], 1, 1)
fig.append_trace(data[1], 1, 2)
margin = go.layout.Margin(
                        l=40,
                        r=20,
                        b=60,
                        t=50,
                        pad=0
                    )

fig['layout'].update(height=400, width=825, 
                     #title='Defunct 🛴 Vehicle Statistics',
                     font=dict(size=12, family = "Times New Roman"),
                     xaxis=dict(tickfont=dict(size=14)),
                     xaxis2=dict(tickfont=dict(size=14)),
                     yaxis=dict(range=[0,350]),
                     yaxis2=dict(range=[0,85]),
                     margin=margin
        
    )

py.iplot(fig, filename='Scooter-Vehicle-Violins')

This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y2 ]



In [746]:
pio.write_image(fig, 'images/defunct_scooter_violins.png')

In [247]:
# Compute the unique vehicles active per calendar month
monthly_unique_vehicles_cal = scoot_trips.device_id.groupby([
    scoot_trips.end_time.dt.year, 
    scoot_trips.end_time.dt.month]).nunique()

monthly_unique_vehicles = list(monthly_unique_vehicles_cal.values)

In [256]:
title = '🛴 Vehicle Population Over Time' 

# X axis labels: Months, Days of Week, Times of Day
names = ["Net Scooter Population Change",
         "Unique Vehicles Active in Month", "Scooter Status by Start Month"]
x = ["Apr '18", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec '18", "Jan '19", "Feb", "Mar", "Apr"]

# Scooter counts by start month
scooters_start_month = list(scooters['start'].groupby([
    scooters.start.dt.year, 
    scooters.start.dt.month]).agg('count').values)

# Scooter counts by end month
dead_scooters_end_month = list(dead_scooters['end'].groupby([
    dead_scooters.end.dt.year, 
    dead_scooters.end.dt.month]).agg('count').values)

net_scooters_change = pd.Series(scooters_start_month) - pd.Series(dead_scooters_end_month)

scooters["alive"]

y = [net_scooters_change, monthly_unique_vehicles]

data = [] # container for trace objects

for i in range(2):
    trace = go.Bar(
        x=x,
        y=y[i],
        name= names[i],
        opacity=0.9,
        #marker=dict(
        #    color='#03cb03'
        #),  
        showlegend=False,
        #font=dict(size=14)
    )
    data.append(trace)



fig = plotly.tools.make_subplots(rows=1, cols=2, subplot_titles=(names[0], names[1]))

fig.append_trace(data[0], 1, 1)
fig.append_trace(data[1], 1, 2)


margin = go.layout.Margin(
                        l=40,
                        r=20,
                        b=60,
                        t=100,
                        pad=0
                    )

fig['layout'].update(height=400, width=825, 
                     title=title,
                    font=dict(size=12, family='Times New Roman'),
                    xaxis=dict(tickfont=dict(size=13),
                               tickangle=90),
                    xaxis2=dict(tickfont=dict(size=13),
                               tickangle=90),
                    yaxis=dict(tickfont=dict(size=15)),
                    yaxis2=dict(tickfont=dict(size=15)),
                     margin=margin,
        
    )

py.iplot(fig, filename='Scooter-Vehicle-Counts-Over-Time')

This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y2 ]



In [252]:
pio.write_image(fig, 'images/vehicle_population_change.png')

In [479]:
# Count of all scooters by start month
scooters_by_month = scooters["id"].groupby([
    scooters.start.dt.year, 
    scooters.start.dt.month]).agg('count').values

# Count of dead scooters by start month
scooters_dead_by_month= scooters[scooters.alive==False]["id"].groupby([
    scooters.start.dt.year, 
    scooters.start.dt.month]).agg('count').values

# Count of live scooters by start month 
# calculated by subtraction since groupby function doesn't create new rows for values with zero
try:
    scooters_live_by_month = scooters_by_month - scooters_dead_by_month
    percent_alive = 100*np.round(scooters_live_by_month / scooters_by_month,2)
except: 
    scooters_live_by_month = scooters_by_month[0:-1] - scooters_dead_by_month
    percent_alive = 100*np.round(scooters_live_by_month / scooters_by_month[0:-1],2)

    

In [476]:
# Code to get data for month fragment
'''scooters_live_by_month = pd.Series(scooters_by_month[0:-1] - scooters_dead_by_month)
scooters_live_by_month = scooters_live_by_month.append(pd.Series(scooters_by_month[-1]))
percent_alive = 100*np.round(scooters_live_by_month / scooters_by_month,2)
percent_alive = percent_alive.values
scooters_live_by_month = scooters_live_by_month.values'''

In [631]:
x = ["Apr '18", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec '18", "Jan '19", "Feb", "Mar", "Apr", "May '19"]

trace1 = go.Bar(
    x=x,
    y=scooters_dead_by_month,
    name='Defunct',
    marker=dict(
        color="#caccd1"
    )
)
  
trace2 = go.Bar(
    x=x,
    y=scooters_live_by_month,
    name='Active',
    marker=dict(
        color="#0abf53"
    ),
)


try:
    y=scooters_live_by_month/scooters_by_month,
except:
    y=scooters_live_by_month/scooters_by_month[0:-1]


trace3 = go.Scatter(
    x=x,
    y=y,
    name='% Active',
    text = pd.Series(percent_alive).astype(int).astype(str) + '%',
    textposition="top center",
    marker=dict(
        color="#0abf53"
    ),
    yaxis='y2',
    mode='lines+text'
)

data = [trace1, trace2, trace3]

margin = go.layout.Margin(
                        l=60,
                        r=20,
                        b=60,
                        t=100,
                        pad=0
                    )


layout = go.Layout(
    barmode='stack',
    font=dict(size=12, family='Times New Roman'),
    title = "Scooters, Active & Defunct, by Start Month, as of " + str(max(scooters.end))[0:10],
    xaxis=dict(
     tickfont=dict(
     size=14
     ),  
    ),
    height=400, width=825,
    legend=dict(orientation="h",
                font=dict(
                    size=14
                )
               ),
    margin=margin,
    yaxis=dict(
        title='Scooter Count'
    ),
    yaxis2=dict(
        title='% of Scooters Still Active',
        overlaying='y',
        side='right',
        tickfont=dict(
     color="white"
     ),
        showgrid=False
    )
)

fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='stacked-bar')

In [632]:
pio.write_image(fig, 'images/scooter_active_and_defunct_bar_line_percent.png')

In [275]:
# Set window size for rolling median of rides and active days per vehicle
w = 500

# Rolling median of lifetime rides and days active
rolling_lifetime_rides = dead_scooters.sort_values(by=["end"])["rides"].rolling(window=w).median()
rolling_lifetime_days_active = dead_scooters.sort_values(by=["end"])["active_days"].rolling(window=w).median()

# Calculate the median start date on a rolling basis of the selected window of scooters
#x = pd.to_datetime(dead_scooters.sort_values(by=["end"]).end[500:])
x = list(range(1,len(rolling_lifetime_rides)))

trace1 = go.Scatter(
    x=x,
    y=rolling_lifetime_rides,
    yaxis='y',
    name = "Rides per Vehicle, Median"
)
trace2 = go.Scatter(
    x=x,
    y=rolling_lifetime_days_active,
    yaxis='y2',
    name = "Active Days per Vehicle, Median"
)


data = [trace1, trace2]
layout = go.Layout(
    font=dict(size=12, family='Times New Roman'),
    title = "Defunct Scooters: Rolling Median Lifetime Rides and Days Active, Window = " + str(w),
    xaxis1=dict(
        tickfont=dict(
        size=16
        ),
    ),
    yaxis=dict(
        domain=[0.5, 1.0],
        tickfont=dict(
        size=14
        )
    ),
    yaxis2=dict(
        domain=[0, 0.5],
        tickfont=dict(
        size=14
        ),
        tickformat="s"
    ),
    height=600, width=700, 
    legend=dict(orientation="h", 
                font=dict(
                    size=12,
                        ),
        )
)

fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='stacked-subplots-shared-x-axis')

The draw time for this plot will be slow for clients without much RAM.


In [273]:
pio.write_image(fig, 'images/rolling-median-defunct-scooters.png')

### Cumulative Rides By Scooter Start Month

In [None]:
apr_18 = scooters[(scooters.start.dt.month == 4) & (scooters.start.dt.year == 2018)].id
may_18 = scooters[(scooters.start.dt.month == 5) & (scooters.start.dt.year == 2018)].id
jun_18 = scooters[(scooters.start.dt.month == 6) & (scooters.start.dt.year == 2018)].id
jul_18 = scooters[(scooters.start.dt.month == 7) & (scooters.start.dt.year == 2018)].id

apr_18_trips = scoot_trips[scoot_trips.device_id.isin(apr_18)]
apr_18_trips_by_date = apr_18_trips.groupby(['date']).agg('count')
apr_18_trips_by_date_cum = apr_18_trips_by_date.device_id.cumsum()

In [594]:
# Create the cumulative rides by months series
cum_rides = [None] * 14
for i in range(4, 13):
    scooter_segment = scooters[(scooters.start.dt.month == i) & (scooters.start.dt.year == 2018)].id
    scooter_segment_trips = scoot_trips[scoot_trips.device_id.isin(scooter_segment)]
    scooter_segment_trips_by_date = scooter_segment_trips.groupby(['date']).agg('count')
    cum_rides[i-4] = scooter_segment_trips_by_date.device_id.cumsum()

for i in range(1, 6):
    scooter_segment = scooters[(scooters.start.dt.month == i) & (scooters.start.dt.year == 2019)].id
    scooter_segment_trips = scoot_trips[scoot_trips.device_id.isin(scooter_segment)]
    scooter_segment_trips_by_date = scooter_segment_trips.groupby(['date']).agg('count')
    cum_rides[i+8] = scooter_segment_trips_by_date.device_id.cumsum()
    
# Trim the months to clean up the graph
cum_rides[0] = cum_rides[0][0:127] # April - August
#cum_rides[1] = []
cum_rides[2] = cum_rides[2][0:116] # June - October
cum_rides[3] = cum_rides[3][0:123] # July - October
cum_rides[4] = cum_rides[4][0:121] # August - November
monthly_new_deployments = scooters.groupby([scooters.start.dt.year, scooters.start.dt.month]).agg('count').id.values

In [610]:
title = 'Cumulative Rides by Scooter Start Month'

traces = []
names = ["Apr '18", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec '18", "Jan '19", "Feb", "Mar", "Apr", "May '19"]
widths = np.round(monthly_new_deployments/1200,2)

for i in range(0, 14):
    #if (i != 1): # Insignificant deployment in May
    traces.append(go.Scatter(
        x=cum_rides[i].index,
        y=cum_rides[i].values,
        mode='lines',
        line=dict(width=widths[i]),
        #line=dict(color=colors[i], width=line_size[i]),
        connectgaps=True,
    ))


layout = go.Layout(
    xaxis=dict(
        showline=True,
        showgrid=False,
        showticklabels=True,
        linecolor='rgb(204, 204, 204)',
        linewidth=2,
        ticks='outside',
        tickcolor='rgb(204, 204, 204)',
        tickwidth=2,
        ticklen=5,
        tickfont=dict(
            family='Arial',
            size=12,
            color='rgb(82, 82, 82)',
        ),
    ),
    height=400, width=700, 
    yaxis=dict(
        showgrid=True,
        zeroline=False,
        showline=False,
        showticklabels=True,
    ),
    autosize=False,
    margin=dict(
        autoexpand=False,
        l=50,
        r=25,
        t=70,
        b=50
    ),
    font=dict(family="Times New Roman"),
    showlegend=False,
    title = "Cumulative Rides by Monthly Scooter Cohort"
)



fig = go.Figure(data=traces, layout=layout)
py.iplot(fig, filename='news-source')

In [611]:
pio.write_image(fig, 'images/cumulative_rides.png')

In [732]:
# Create the quantile distribution for each month
quantile_rides = [None] * 5
for i in range(2, 5): # For year 2018
    quarterly_scooters = scooters[(scooters.start.dt.quarter == i) & (scooters.start.dt.year==2018)] # Subset scooters into one month's cohort
    dead_percent = math.floor(100*(1-quarterly_scooters.alive.mean()))/100 # Identify the portion that are defunct
    quantile_rides[i-2] = quarterly_scooters.rides.quantile(np.linspace(0,  # Create a percentile distribution of rides per scooter
                                                                         dead_percent+0.01, int(100*dead_percent)+1, 0))

for i in range(1, 3): # For year 2019
    quarterly_scooters = scooters[(scooters.start.dt.quarter == i) & (scooters.start.dt.year==2019)] # Subset scooters into one month's cohort
    dead_percent = math.floor(100*(1-quarterly_scooters.alive.mean()))/100 # Identify the portion that are defunct
    quantile_rides[i+2] = quarterly_scooters.rides.quantile(np.linspace(0,  # Create a percentile distribution of rides per scooter
                                                                         dead_percent+0.01, int(100*dead_percent)+1, 0))

In [748]:
traces = []
names = ["Q2 2018", "Q3 2018", "Q4 2018",
         "Q1 2019", "Q2 2019"]
#widths = np.round(monthly_new_deployments/1200,2)

for i in range(0, 5):
    #if (i != 1): # Insignificant deployment in May
    traces.append(go.Scatter(
        x=100*quantile_rides[i].index,
        y=quantile_rides[i].values,
        mode='lines',
        line=dict(width=3),
        #line=dict(color=colors[i], width=line_size[i]),
        connectgaps=True,
        name=names[i]
    ))


layout = go.Layout(
    xaxis=dict(
        showline=True,
        showgrid=False,
        showticklabels=True,
        linecolor='rgb(204, 204, 204)',
        linewidth=2,
        ticks='outside',
        tickcolor='rgb(204, 204, 204)',
        tickwidth=2,
        ticklen=5,
        tickfont=dict(
            family='Times New Roman',
            size=14,
            color='rgb(82, 82, 82)',
        ),
    ),
    height=400, width=700, 
    yaxis=dict(
        showgrid=True,
        zeroline=False,
        showline=False,
        showticklabels=True,
        range=[0,370],
        tickfont=dict(
            family='Times New Roman',
            size=14,
            color='rgb(82, 82, 82)',
        ),
    ),
    autosize=False,
    margin=dict(
        autoexpand=False,
        l=50,
        r=25,
        t=70,
        b=60
    ),
    font=dict(family="Times New Roman"),
    showlegend=True,
    legend=dict(orientation="h"),
    title = "Percentile Distribution of Lifetime Rides by Quarter Deployed, Defunct Scooters Only"
)



fig = go.Figure(data=traces, layout=layout)
py.iplot(fig, filename='percentile-distro-quarter-rides')

In [749]:
pio.write_image(fig, 'images/percentile-distro-quarter-rides.png')

In [738]:
# Create the quantile distribution for each month
quantile_days = [None] * 5
for i in range(2, 5): # For year 2018
    quarterly_scooters = scooters[(scooters.start.dt.quarter == i) & (scooters.start.dt.year==2018)] # Subset scooters into one month's cohort
    dead_percent = math.floor(100*(1-quarterly_scooters.alive.mean()))/100 # Identify the portion that are defunct
    quantile_days[i-2] = quarterly_scooters.active_days.quantile(np.linspace(0,  # Create a percentile distribution of days active per scooter
                                                                         dead_percent+0.01, int(100*dead_percent)+1, 0))

for i in range(1, 3): # For year 2019
    quarterly_scooters = scooters[(scooters.start.dt.quarter == i) & (scooters.start.dt.year==2019)] # Subset scooters into one month's cohort
    dead_percent = math.floor(100*(1-quarterly_scooters.alive.mean()))/100 # Identify the portion that are defunct
    quantile_days[i+2] = quarterly_scooters.active_days.quantile(np.linspace(0,  # Create a percentile distribution of days active per scooter
                                                                         dead_percent+0.01, int(100*dead_percent)+1, 0))

traces = []
names = ["Q2 2018", "Q3 2018", "Q4 2018",
         "Q1 2019", "Q2 2019"]
#widths = np.round(monthly_new_deployments/1200,2)

for i in range(0, 5):
    #if (i != 1): # Insignificant deployment in May
    traces.append(go.Scatter(
        x=100*quantile_days[i].index,
        y=quantile_days[i].values,
        mode='lines',
        line=dict(width=3),
        #line=dict(color=colors[i], width=line_size[i]),
        connectgaps=True,
        name=names[i]
    ))


layout = go.Layout(
    xaxis=dict(
        showline=True,
        showgrid=False,
        showticklabels=True,
        linecolor='rgb(204, 204, 204)',
        linewidth=2,
        ticks='outside',
        tickcolor='rgb(204, 204, 204)',
        tickwidth=2,
        ticklen=5,
        tickfont=dict(
            family='Times New Roman',
            size=14,
            color='rgb(82, 82, 82)',
        ),
    ),
    height=400, width=700, 
    yaxis=dict(
        showgrid=True,
        zeroline=False,
        showline=False,
        showticklabels=True,
        range=[0,75],
        tickfont=dict(
            family='Times New Roman',
            size=14,
            color='rgb(82, 82, 82)',
        ),
    ),
    autosize=False,
    margin=dict(
        autoexpand=False,
        l=50,
        r=25,
        t=70,
        b=60
    ),
    font=dict(family="Times New Roman"),
    showlegend=True,
    legend=dict(orientation="h"),
    title = "Percentile Distribution of Lifetime Days Active by Quarter Deployed, Defunct Scooters Only"
)



fig = go.Figure(data=traces, layout=layout)
py.iplot(fig, filename='percentile-distro-quarter')

In [739]:
pio.write_image(fig, 'images/percentile-distro-quarter-days-active.png')

In [None]:
# Create the quantile distribution for each month
quantile_rides = [None] * 14
for i in range(4, 13): # For year 2018
    monthly_scooters = scooters[(scooters.start.dt.month == i) & (scooters.start.dt.year==2018)] # Subset scooters into one month's cohort
    dead_percent = math.floor(100*(1-monthly_scooters.alive.mean()))/100 # Identify the portion that are defunct
    quantile_rides[i-4] = monthly_scooters.rides.quantile(np.linspace(0,  # Create a percentile distribution of rides per scooter
                                                                         dead_percent+0.01, int(100*dead_percent)+1, 0))

for i in range(1, 6): # For year 2019
    monthly_scooters = scooters[(scooters.start.dt.month == i) & (scooters.start.dt.year==2019)] # Subset scooters into one month's cohort
    dead_percent = math.floor(100*(1-monthly_scooters.alive.mean()))/100 # Identify the portion that are defunct
    quantile_rides[i+8] = monthly_scooters.rides.quantile(np.linspace(0,  # Create a percentile distribution of rides per scooter
                                                                         dead_percent+0.01, int(100*dead_percent)+1, 0))

In [703]:
traces = []
names = ["Apr '18", "May '18", "Jun '18",
         "Jul '18", "Aug '18", "Sep '18", "Oct '18", "Nov '18", "Dec '18", 
         "Jan '19", "Feb '19", "Mar '19", "Apr '19", "May '19"]
#widths = np.round(monthly_new_deployments/1200,2)

for i in range(0, 14):
    #if (i != 1): # Insignificant deployment in May
    traces.append(go.Scatter(
        x=100*quantile_rides[i].index,
        y=quantile_rides[i].values,
        mode='lines',
        line=dict(width=1.5), #widths[i]),
        #line=dict(color=colors[i], width=line_size[i]),
        connectgaps=True,
        name=names[i]
    ))


layout = go.Layout(
    xaxis=dict(
        showline=True,
        showgrid=False,
        showticklabels=True,
        linecolor='rgb(204, 204, 204)',
        linewidth=2,
        ticks='outside',
        tickcolor='rgb(204, 204, 204)',
        tickwidth=2,
        ticklen=5,
        tickfont=dict(
            family='Times New Roman',
            size=14,
            color='rgb(82, 82, 82)',
        ),
    ),
    height=400, width=700, 
    yaxis=dict(
        showgrid=True,
        zeroline=False,
        showline=False,
        showticklabels=True,
        range=[0,370],
        tickfont=dict(
            family='Times New Roman',
            size=14,
            color='rgb(82, 82, 82)',
        ),
    ),
    autosize=False,
    margin=dict(
        autoexpand=False,
        l=50,
        r=25,
        t=70,
        b=50
    ),
    font=dict(family="Times New Roman"),
    showlegend=True,
    title = "Percentile Distribution of Rides by Month Deployed"
)



fig = go.Figure(data=traces, layout=layout)
py.iplot(fig, filename='news-source')

In [269]:
title = 'Median Lifetime 🛴  Rides, Days Active, and Utilization by Vehicle Start Month, n = ' + "{:,}".format(scooters[scooters.start<"2019-04-01"].shape[0]) 

# X axis labels: Months, Days of Week, Times of Day
names = ["Rides", "Days with 1+ Trips",
         "Daily Utilization"]
x = [
    ["Apr '18", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec '18", "Jan '19", "Feb", "Mar"],
    ["Apr '18", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec '18", "Jan '19", "Feb", "Mar"],
    ["Apr '18", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec '18", "Jan '19", "Feb", "Mar", "Apr '19"]
]



# Scooter Lifetime Rides
rides_by_birth_month = (pd.Series(scooters['rides'].groupby([
    scooters.start.dt.year, 
    scooters.start.dt.month]).agg('median').values))

# Scooter Lifetime Active Days
active_days_by_birth_month = (pd.Series(scooters['active_days'].groupby([
    scooters.start.dt.year, 
    scooters.start.dt.month]).agg('median').values))

# Scooter utilization by start month
utilization_by_birth_month = (pd.Series(scooters['utilization'].groupby([
    scooters.start.dt.year, 
    scooters.start.dt.month]).agg('median').values))

y = [rides_by_birth_month, active_days_by_birth_month, 
     utilization_by_birth_month]

data = [] # container for trace objects

for i in range(3):
    trace = go.Bar(
        
        x=x[i],
        y=y[i],
        name= names[i],
        opacity=0.9,
        #marker=dict(
        #    color='#03cb03'
        #),  
        showlegend=False,
        #font=dict(size=14)
    )
    data.append(trace)
    
fig = plotly.tools.make_subplots(rows=1, cols=3, subplot_titles=(names[0], names[1], names[2]))

fig.append_trace(data[0], 1, 1)
fig.append_trace(data[1], 1, 2)
fig.append_trace(data[2], 1, 3)

margin = go.layout.Margin(
                        l=40,
                        r=20,
                        b=70,
                        t=100,
                        pad=0
                    )


fig['layout'].update(height=375, width=750, 
                     title=title,
                     font=dict(size=12, family = "Times New Roman"),
                    xaxis=dict(tickfont=dict(size=15),
                              tickangle=90),
                    xaxis2=dict(tickfont=dict(size=15),
                               tickangle=90),
                    xaxis3=dict(tickfont=dict(size=13),
                               tickangle=90),
                     
                    yaxis1=dict(tickfont=dict(size=15),
                               ),
                    yaxis2=dict(tickfont=dict(size=15),
                               ),
                    yaxis3=dict(tickfont=dict(size=15)),
                     margin=margin,
        
    )

py.iplot(fig, filename='Scooter-Vehicles-Stats-Over-Time')

This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y2 ]  [ (1,3) x3,y3 ]



In [271]:
pio.write_image(fig, 'images/lifetime_median_vehicle_longevity_stats.png')

In [32]:
scooters[(scooters.start.dt.month == 2) & (scooters.start.dt.day < 10)]["rides"].median()

67.0

In [57]:
scooters[(scooters.start.dt.month == 2) & (scooters.start.dt.day < 10)]["active_days"].median()

21.0

In [308]:
#title = '🛴 Vehicle Financial Statistics by Scooter Start Month, n = ' + "{:,}".format(scooters[scooters.start<"2019-02-01"].shape[0]) 
title = 'Lifetime Median Revenue and Utilization'


# X axis labels: Months, Days of Week, Times of Day
names = ["Revenue",  "Utilization"]
x = ["Apr '18", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec '18", "Jan '19", "Feb"]


# Lifetime median revenue by start month
revenue_by_birth_month = (pd.Series(scooters['revenue'].groupby([
    scooters.start.dt.year, 
    scooters.start.dt.month]).agg('median').values))

# Lifetime Daily utilization by start month
utilization_by_birth_month = (pd.Series(scooters['utilization'].groupby([
    scooters.start.dt.year, 
    scooters.start.dt.month]).agg('median').values))

y = [revenue_by_birth_month, 
     utilization_by_birth_month]

data = [] # container for trace objects

trace1 = go.Bar(
        x=x,
        y=y[0],
        name= names[0],
        opacity=0.9,
        #marker=dict(
        #    color='#03cb03'
        #),  
        showlegend=False,
        #font=dict(size=14)
        text=round(y[0]).astype(int),
        textposition = 'auto',
        textfont=dict(color="white"),
    )

trace2 = go.Bar(
        x=x,
        y=y[1],
        name= names[1],
        opacity=0.9,
        #marker=dict(
        #    color='#03cb03'
        #),  
        showlegend=False,
        #font=dict(size=14)
        text=round(y[1],1).astype(float),
        textposition = 'auto',
        textfont=dict(color="white"),
    )


data = [trace1, trace2]
    
fig = plotly.tools.make_subplots(rows=1, cols=2, subplot_titles=(names[0], names[1]))

fig.append_trace(data[0], 1, 1)
fig.append_trace(data[1], 1, 2)

margin = go.layout.Margin(
                        l=30,
                        r=10,
                        b=60,
                        t=100,
                        pad=0
                    )

fig['layout'].update(height=400, width=825, 
                     font=dict(size=12, family = "Times New Roman"),
                     title=title,

                    xaxis=dict(tickfont=dict(size=13),
                               tickangle=90),
                    xaxis2=dict(tickfont=dict(size=13),
                               tickangle=90),
                    yaxis=dict(tickfont=dict(size=15)),
                    yaxis2=dict(tickfont=dict(size=15), range=[0,8]),
        
    )

py.iplot(fig, filename='Scooter-Vehicles-Financial-Stats-Over-Time')

This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y2 ]



In [277]:
pio.write_image(fig, 'images/vehicle_financials1.png')

In [360]:
#title = '🛴 Vehicle Financial Statistics by Scooter Start Month, n = ' + "{:,}".format(scooters[scooters.start<"2019-02-01"].shape[0]) 
title = 'Lifetime Median Lifecycle Total and Per-Ride Profit'


# X axis labels: Months, Days of Week, Times of Day
names = ["Lifecycle Profit",  "Profit Per Ride"]
x = ["Apr '18", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec '18", "Jan '19", "Feb"]


# Lifetime median revenue by start month
lifecycle_profit_by_birth_month = (pd.Series(scooters['lifecycle_profit'].groupby([
    scooters.start.dt.year, 
    scooters.start.dt.month]).agg('median').values))

# Lifetime Daily utilization by start month
per_ride_profit_by_birth_month = (pd.Series(scooters['per_ride_profit'].groupby([
    scooters.start.dt.year, 
    scooters.start.dt.month]).agg('median').values))

per_ride_profit_by_birth_month[1] = 0

y = [lifecycle_profit_by_birth_month, 
     per_ride_profit_by_birth_month]

data = [] # container for trace objects

trace1 = go.Bar(
        x=x,
        y=y[0],
        name= names[0],
        opacity=0.9,
        #marker=dict(
        #    color='#03cb03'
        #),  
        showlegend=False,
        #font=dict(size=14)
        text=round(y[0]).astype(int),
        textposition = 'auto',
        textfont=dict(color="white"),
    )

trace2 = go.Bar(
        x=x,
        y=y[1],
        name= names[1],
        opacity=0.9,
        #marker=dict(
        #    color='#03cb03'
        #),  
        showlegend=False,
        #font=dict(size=14)
        text=round(y[1],1).astype(float),
        textposition = 'auto',
        textfont=dict(color="white"),
    )


data = [trace1, trace2]
    
fig = plotly.tools.make_subplots(rows=1, cols=2, subplot_titles=(names[0], names[1]))

fig.append_trace(data[0], 1, 1)
fig.append_trace(data[1], 1, 2)

margin = go.layout.Margin(
                        l=30,
                        r=10,
                        b=60,
                        t=100,
                        pad=0
                    )

fig['layout'].update(height=400, width=750, 
                     font=dict(size=12, family = "Times New Roman"),
                     title=title,

                    xaxis=dict(tickfont=dict(size=13),
                               tickangle=90),
                    xaxis2=dict(tickfont=dict(size=13),
                               tickangle=90),
                    yaxis=dict(tickfont=dict(size=15)),
                    yaxis2=dict(tickfont=dict(size=15), range=[-12,0])
        
    )

py.iplot(fig, filename='Scooter-Vehicles-Financial-Stats-Over-Time')

This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y2 ]



In [361]:
pio.write_image(fig, 'images/vehicle_financials2.png')

In [808]:
# Create the quantile distribution for each month
quantile_days = [None] * 5
for i in range(2, 5): # For year 2018
    quarterly_scooters = scooters[(scooters.start.dt.quarter == i) & (scooters.start.dt.year==2018)] # Subset scooters into one month's cohort
    dead_percent = math.floor(100*(1-quarterly_scooters.alive.mean()))/100 # Identify the portion that are defunct
    quantile_days[i-2] = quarterly_scooters.lifecycle_profit.quantile(np.linspace(0,  
                                                                    # Create a percentile distribution of lifecycle profit per scooter
                                                                         dead_percent+0.01, int(100*dead_percent)+1, 0))

for i in range(1, 3): # For year 2019
    quarterly_scooters = scooters[(scooters.start.dt.quarter == i) & (scooters.start.dt.year==2019)] # Subset scooters into one month's cohort
    dead_percent = math.floor(100*(1-quarterly_scooters.alive.mean()))/100 # Identify the portion that are defunct
    quantile_days[i+2] = quarterly_scooters.lifecycle_profit.quantile(np.linspace(0,  
                                                                    # Create a percentile distribution of lifecycle profit per scooter
                                                                         dead_percent+0.01, int(100*dead_percent)+1, 0))

traces = []
names = ["Q2 2018", "Q3 2018", "Q4 2018",
         "Q1 2019", "Q2 2019"]
#widths = np.round(monthly_new_deployments/1200,2)

for i in range(0, 5):
    #if (i != 1): # Insignificant deployment in May
    traces.append(go.Scatter(
        x=100*quantile_days[i].index,
        y=quantile_days[i].values,
        mode='lines',
        line=dict(width=3),
        #line=dict(color=colors[i], width=line_size[i]),
        connectgaps=True,
        name=names[i]
    ))


layout = go.Layout(
    xaxis=dict(
        showline=True,
        showgrid=False,
        showticklabels=True,
        linecolor='rgb(204, 204, 204)',
        linewidth=2,
        ticks='outside',
        tickcolor='rgb(204, 204, 204)',
        tickwidth=2,
        ticklen=5,
        tickfont=dict(
            family='Times New Roman',
            size=14,
            color='rgb(82, 82, 82)',
        ),
    ),
    height=400, width=600, 
    yaxis=dict(
        showgrid=True,
        zeroline=False,
        showline=False,
        showticklabels=True,
        #range=[0,75],
        tickfont=dict(
            family='Times New Roman',
            size=14,
            color='rgb(82, 82, 82)',
        ),
    ),
    autosize=False,
    margin=dict(
        autoexpand=False,
        l=50,
        r=25,
        t=70,
        b=75
    ),
    font=dict(family="Times New Roman", size = 12),
    showlegend=True,
    legend=dict(orientation="h"),
    title = "Percentile Distribution of Lifetime Profit per Defunct 🛴, By Date Deployed"
)



fig = go.Figure(data=traces, layout=layout)
py.iplot(fig, filename='percentile-distro-quarter-lifecycle-profit')

In [809]:
pio.write_image(fig, 'images/vehicle_financials-percentiles.png')

In [810]:
scooters[scooters.start.dt.month==9].alive.mean()

0.0773763771457853

In [804]:
# Create the quantile distribution for each month
quantile_rideprofit = [None] * 5
for i in range(2, 5): # For year 2018
    quarterly_scooters = scooters[(scooters.start.dt.quarter == i) & (scooters.start.dt.year==2018)] # Subset scooters into one month's cohort
    dead_percent = math.floor(100*(1-quarterly_scooters.alive.mean()))/100 # Identify the portion that are defunct
    quantile_rideprofit[i-2] = quarterly_scooters.per_ride_profit.quantile(np.linspace(0,  
                                                                    # Create a percentile distribution of lifecycle profit per scooter
                                                                         dead_percent+0.01, int(100*dead_percent)+1, 0))

for i in range(1, 3): # For year 2019
    quarterly_scooters = scooters[(scooters.start.dt.quarter == i) & (scooters.start.dt.year==2019)] # Subset scooters into one month's cohort
    dead_percent = math.floor(100*(1-quarterly_scooters.alive.mean()))/100 # Identify the portion that are defunct
    quantile_rideprofit[i+2] = quarterly_scooters.per_ride_profit.quantile(np.linspace(0,  
                                                                    # Create a percentile distribution of lifecycle profit per scooter
                                                                         dead_percent+0.01, int(100*dead_percent)+1, 0))

traces = []
names = ["Q2 2018", "Q3 2018", "Q4 2018",
         "Q1 2019", "Q2 2019"]
#widths = np.round(monthly_new_deployments/1200,2)

for i in range(0, 5):
    #if (i != 1): # Insignificant deployment in May
    traces.append(go.Scatter(
        x=100*quantile_rideprofit[i].index,
        y=quantile_rideprofit[i].values,
        mode='lines',
        line=dict(width=3),
        #line=dict(color=colors[i], width=line_size[i]),
        connectgaps=True,
        name=names[i]
    ))


layout = go.Layout(
    xaxis=dict(
        showline=True,
        showgrid=False,
        showticklabels=True,
        linecolor='rgb(204, 204, 204)',
        linewidth=2,
        ticks='outside',
        tickcolor='rgb(204, 204, 204)',
        tickwidth=2,
        ticklen=5,
        tickfont=dict(
            family='Times New Roman',
            size=14,
            color='rgb(82, 82, 82)',
        ),
    ),
    height=400, width=510, 
    yaxis=dict(
        showgrid=True,
        zeroline=False,
        showline=False,
        showticklabels=True,
        #range=[0,75],
        tickfont=dict(
            family='Times New Roman',
            size=14,
            color='rgb(82, 82, 82)',
        ),
        range=[-40,1],
        #type='log',
    ),
    autosize=False,
    margin=dict(
        autoexpand=False,
        l=50,
        r=25,
        t=70,
        b=75
    ),
    font=dict(family="Times New Roman"),
    showlegend=True,
    legend=dict(orientation="h"),
    title = "Profit per Ride per Defunct 🛴, by Date Deployed, Percentile Distribution"
)



fig = go.Figure(data=traces, layout=layout)
py.iplot(fig, filename='percentile-distro-quarter-lifecycle-profit')

In [805]:
pio.write_image(fig, 'images/vehicle_financials-per-ride-percentiles.png')

In [377]:
dead_scooters.lifecycle_profit.median()

-346.8

In [380]:
scooters.head()

Unnamed: 0,id,rides,duration,distance,start,end,active_days,lifespan,revenue,gross_profit,lifecycle_profit,per_ride_profit,utilization,alive
0,0003f834,2,0.4,1.7,2019-05-01 18:00:00,2019-05-01 19:00:00,1,2.0,5.8,1.9,-398.1,-199.0,2.0,True
1,00066528,282,56.5,264.0,2018-10-20 16:15:00,2019-04-25 23:15:00,95,189.0,790.9,261.0,-139.0,-0.5,3.0,True
2,0008292e,59,10.2,40.8,2019-03-08 16:45:00,2019-03-25 15:15:00,15,18.0,150.8,49.8,-350.2,-5.9,3.9,False
3,0011ad88,147,29.5,115.0,2019-01-04 00:45:00,2019-05-02 17:00:00,51,120.0,412.7,136.2,-263.8,-1.8,2.9,True
4,001237a8,36,7.4,47.3,2019-04-04 16:45:00,2019-04-28 08:00:00,13,25.0,102.2,33.7,-366.3,-10.2,2.8,True


In [386]:
scooters.to_csv("all_scooters.csv")

In [393]:
# Median loss per ride
print("Median loss per ride", dead_scooters.per_ride_profit.median())

# Weight average loss per ride, among defunct scooters
print("Weight average loss per ride, among defunct scooters", dead_scooters.lifecycle_profit.sum()/dead_scooters.rides.sum())

# Loss Per Ride for early September
early_sep_scooters = scooters[(scooters.start.dt.month == 9) & (scooters.start.dt.day < 15)]
print("Early September Scooters: ",early_sep_scooters.lifecycle_profit.sum()/early_sep_scooters.rides.sum())

# Early September scooters still active
print("Early September scooters still active",early_sep_scooters.alive.mean())

# Early September Scooters n= 
print( "Early September Scooters n= ", early_sep_scooters.shape[0])

# Total Losses to date
print("Total losses to date among defunct scooters", dead_scooters.lifecycle_profit.sum())

Median loss per ride -5.9
Weight average loss per ride, among defunct scooters -4.072623874800219
Early September Scooters:  -2.9705675261819473
Early September scooters still active 0.0007283321194464676
Early September Scooters n=  1373
Total losses to date among defunct scooters -9456310.9


In [397]:
print("Weight average loss per ride, among defunct scooters", 
      scooters[scooters.start.dt.month==12].lifecycle_profit.sum()/scooters[scooters.start.dt.month==12].rides.sum())

Weight average loss per ride, among defunct scooters -5.1658414786677165


In [400]:
january_scooters.shape

(3194,)

In [401]:
january_scooters = scooters[scooters.start.dt.month == 1].id
trips_of_january_scooters = scoot_trips[scoot_trips.device_id.isin(january_scooters)]
trips_of_january_scooters[(trips_of_january_scooters.end_time.dt.month == 4) & (trips_of_january_scooters.end_time.dt.day >23)]

Unnamed: 0,device_id,end_time,duration,distance,speed,revenue,date
4404285,b30b5118,2019-04-24 00:00:00,1.8,0.3,10.0,1.30,2019-04-24
4404300,f462e8ae,2019-04-24 00:00:00,4.5,0.5,6.7,1.75,2019-04-24
4404324,a20166d9,2019-04-24 00:00:00,4.1,0.4,5.9,1.75,2019-04-24
4404328,5d118c41,2019-04-24 00:00:00,4.2,0.8,11.4,1.75,2019-04-24
4404335,b1fb45b2,2019-04-24 00:00:00,35.3,2.7,4.6,6.40,2019-04-24
4404343,3e1980a3,2019-04-24 00:00:00,11.2,0.7,3.8,2.80,2019-04-24
4404349,672b3864,2019-04-24 00:00:00,3.5,0.2,3.4,1.60,2019-04-24
4404369,1e743999,2019-04-24 00:00:00,3.9,0.3,4.6,1.60,2019-04-24
4404376,0f6b7c62,2019-04-24 00:00:00,5.6,0.7,7.5,1.90,2019-04-24
4404402,3cd14e25,2019-04-24 00:15:00,28.4,3.6,7.6,5.35,2019-04-24


In [404]:
trips_of_january_scooters.end_time.groupby(trips_of_january_scooters['date']).agg('count').to_csv("january_scooters_trips.csv")

In [303]:
#title = '🛴 Vehicle Financial Statistics by Scooter Start Month, n = ' + "{:,}".format(scooters[scooters.start<"2019-02-01"].shape[0]) 
title = '🛴 Vehicle Financial Statistics by Scooter Start Month'


# X axis labels: Months, Days of Week, Times of Day
names = ["Total Lifetime Revenue (k)", "Total Lifetime Profit (k)",
         "Lifetime Median Revenue",  "Lifetime Daily Utilization",
        "Lifetime Median Profit", "Lifetime Median per Ride Profit"]
x = ["Apr '18", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec '18", "Jan '19"]



# Scooter Lifecycle Revenue
lifecycle_revenue_month = (pd.Series(dead_scooters['revenue'].groupby([
    scooters.start.dt.year, 
    scooters.start.dt.month]).agg('sum').values/1000))

# Scooter Gross Lifecycle Profit
lifecycle_profit_month = (pd.Series(scooters['lifecycle_profit'].groupby([
    scooters.start.dt.year, 
    scooters.start.dt.month]).agg('sum').values/1000))

# Scooter utilization by start month
utilization_by_birth_month = (pd.Series(scooters['utilization'].groupby([
    scooters.start.dt.year, 
    scooters.start.dt.month]).agg('median').values))

# Scooter revenue by start month
revenue_by_birth_month = (pd.Series(scooters['revenue'].groupby([
    scooters.start.dt.year, 
    scooters.start.dt.month]).agg('median').values))

# Scooter lifecycle profit by start month
profit_by_birth_month = (pd.Series(scooters['lifecycle_profit'].groupby([
    scooters.start.dt.year, 
    scooters.start.dt.month]).agg('median').values))

# Scooter per ride profit by start month
ride_profit_by_birth_month = (pd.Series(scooters['per_ride_profit'].groupby([
    scooters.start.dt.year, 
    scooters.start.dt.month]).agg('median').values))

y = [lifecycle_revenue_month, lifecycle_profit_month, revenue_by_birth_month, 
     utilization_by_birth_month, profit_by_birth_month, ride_profit_by_birth_month]

data = [] # container for trace objects

for i in range(6):
    if (i == 1): #Lifetime profit bar wasn't displaying well in earlier months
        trace = go.Bar(
            x=x,
            y=y[i],
            name= names[i],
            opacity=0.9,
            #marker=dict(
            #    color='#03cb03'
            #),  
            showlegend=False,
            #font=dict(size=14)
            text=round(y[i]).astype(int),
            textposition = ['outside', 'outside', 'outside', 'outside', 
                            'auto', 'auto', 'auto', 'auto', 'auto', 'auto'],
            textfont=dict(color=["black", "black", "black", "black", 
                                 "white", "white", "white", "white", "white", "white"],
                          size=16),
        )
    else:    
        trace = go.Bar(
            x=x,
            y=y[i],
            name= names[i],
            opacity=0.9,
            #marker=dict(
            #    color='#03cb03'
            #),  
            showlegend=False,
            #font=dict(size=14)
            text=round(y[i]).astype(int),
            textposition = 'auto',
            textfont=dict(color="white"),
        )
    data.append(trace)
    
fig = plotly.tools.make_subplots(rows=3, cols=2, subplot_titles=(names[0], names[1],
                                                                names[2], names[3], 
                                                                 names[4], names[5]))

fig.append_trace(data[0], 1, 1)
fig.append_trace(data[1], 1, 2)
fig.append_trace(data[2], 2, 1)
fig.append_trace(data[3], 2, 2)
fig.append_trace(data[4], 3, 1)
fig.append_trace(data[5], 3, 2)

margin = go.layout.Margin(
                        l=40,
                        r=20,
                        b=60,
                        t=100,
                        pad=0
                    )

fig['layout'].update(height=1000, width=825, 
                     title=title,
                     font=dict(size=18),
                    xaxis=dict(tickfont=dict(size=15),
                              tickangle=90),
                    xaxis2=dict(tickfont=dict(size=15),
                               tickangle=90),
                    xaxis3=dict(tickfont=dict(size=13),
                               tickangle=90),
                    xaxis4=dict(tickfont=dict(size=13),
                               tickangle=90),
                     xaxis5=dict(tickfont=dict(size=13),
                               tickangle=90),
                     xaxis6=dict(tickfont=dict(size=13),
                               tickangle=90),
                     
                    yaxis1=dict(tickfont=dict(size=15),
                               ),
                    yaxis2=dict(tickfont=dict(size=15),
                                range=[-4500, 0]
                               ),
                    yaxis3=dict(tickfont=dict(size=15)),
                    yaxis4=dict(tickfont=dict(size=15), range=[0,8]),
                    yaxis5=dict(tickfont=dict(size=15), range=[-600, 0]),
                    yaxis6=dict(tickfont=dict(size=15), range=[-11,0]),
        
    )

py.iplot(fig, filename='Scooter-Vehicles-Financial-Stats-Over-Time')


This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y2 ]
[ (2,1) x3,y3 ]  [ (2,2) x4,y4 ]
[ (3,1) x5,y5 ]  [ (3,2) x6,y6 ]



In [None]:
pio.write_image(fig, 'images/vehicle_financials3.png')

In [272]:
dead_scooters.distance.mean()

80.31440482526168

In [279]:
total_losses = round(dead_scooters.lifecycle_profit.sum()/10**6,1)
print(total_losses, " m in losses")
loss_per_ride = round(dead_scooters.lifecycle_profit.sum()/dead_scooters.rides.sum(),2)
print(loss_per_ride, " $ lost per ride")


-10.8  m in losses
-5.74  $ lost per ride


In [288]:
dead_scooters.end.iloc[0]

Timestamp('2018-10-11 17:15:00')

In [219]:
monthly_rides_median = scooters['rides'].groupby([
    scooters.start.dt.year, 
    scooters.start.dt.month]).agg('median')

In [220]:
monthly_rides_mean = scooters['rides'].groupby([
    scooters.start.dt.year, 
    scooters.start.dt.month]).agg('mean')

In [225]:
scooters[scooters.start.dt.month == 7].rides.mean()

225.88967611336034

In [193]:
scooters.head()

Unnamed: 0,id,rides,duration,distance,start,end,active_days,lifespan,revenue,gross_profit,lifecycle_profit,per_ride_profit,utilization,alive
0,00066528,221,42.5,205.7,2018-10-20 16:15:00,2019-03-23 21:15:00,73,156.0,603.8,199.3,-351.7,-1.6,3.0,True
1,0008292e,50,8.3,34.1,2019-03-08 16:45:00,2019-03-23 20:15:00,13,17.0,124.4,41.1,-509.9,-10.2,3.8,True
2,0011ad88,120,25.7,98.6,2019-01-04 00:45:00,2019-03-22 21:00:00,42,79.0,351.0,115.8,-435.2,-3.6,2.9,True
3,001302a8,102,17.2,98.1,2018-11-10 17:00:00,2019-03-10 20:45:00,35,122.0,257.0,84.8,-466.2,-4.6,2.9,True
4,0013eeed,11,3.5,8.0,2019-02-23 12:15:00,2019-03-16 17:15:00,6,23.0,42.9,14.2,-536.8,-48.8,1.8,True


### Break-Even: How Long Must the Vehicle Last to break even?

In [93]:
mean_trip_revenue = np.mean(scoot_trips.revenue)
print("$",round(mean_trip_revenue,2), ": mean trip revenue")

mean_trip_gross_profit = 0.33*mean_trip_revenue # 0.33 = gross profit margin

break_even_trips = 551/mean_trip_gross_profit # $551 is cost of scooter

break_even_active_days = round(break_even_trips / 3) # assumed utilization of 3 trips per day

#uptime_ratio = np.mean(dead_scooters.active_days / dead_scooters.lifespan)

#break_even_lifespan = break_even_days/uptime_ratio

print(" Vehicle Lifespan needed until break even - when gross operating profits cover vehicle acquisition cost")
print(break_even_active_days, " active days")
#print(round(break_even_lifespan/(365/12)), "months of total lifespan, assuming 59% uptime")

print("\n Vehicle lifespan until 20% ROI needed - when gross operating profits cover vehicle acquisition cost + 20%")
print(round(1.2*break_even_active_days), " active days")
#print(round(1.2*break_even_lifespan/(365/12)), "months of total lifespan, assuming 59% uptime")

print("Gross lifecycle profit for vehicle with 3 year operating lifespan & $800 acquisition cost")
mature_vehicle_active_days = 365*3*uptime_ratio
mature_vehicle_gross_revenue = mature_vehicle_active_days*3*0.33*mean_trip_revenue
mature_vehicle_gross_profit = mature_vehicle_gross_revenue - 800
annualized_return = round(100*((mature_vehicle_gross_revenue/800)**(1/3)-1))
print("$",int(round(mature_vehicle_gross_profit)),", ", round(100*mature_vehicle_gross_profit/800), "% ROI,", annualized_return, "% annualized return")
print()

$ 2.76 : mean trip revenue
 Vehicle Lifespan needed until break even - when gross operating profits cover vehicle acquisition cost
202  active days

 Vehicle lifespan until 20% ROI needed - when gross operating profits cover vehicle acquisition cost + 20%
242  active days
Gross lifecycle profit for vehicle with 3 year operating lifespan & $800 acquisition cost
$ 920 ,  115 % ROI, 29 % annualized return



### Profit Scenario: 10% ROI

Assumptions:
* 25% higher acquisition cost per scooter -> 688.75 USD cost per scooter instead of 551 USD
* Non-unit expenses cost 25% of unit expenses: net cost share = 1.25 * gross cost share = 1.25 * 0.66 = 0.8375

Desired ROI = 10% = 688.75 USD * 1.10 = 757.625 USD

In [74]:
mean_trip_profit = mean_trip_revenue * (1-0.8375)
operating_profit_required = 551 * 1.25 * 1.10 # Original scooter cost plus 25% ruggedization premium plus 10% ROI
trips_required_for_profit = operating_profit_required/mean_trip_profit
days_required_for_profit = trips_required_for_profit/3
days_required_for_profit

562.0438450029484

In [71]:
mean_trip_profit

0.44932734147340747

In [322]:
# active days needed, assuming 3 trips per day, 33% margins
print(round(551/(0.33*3*np.mean(scoot_trips.revenue))), "days, breakeven")
print(round(1.2*551/(0.33*3*np.mean(scoot_trips.revenue))), "days, +20%")

201 days, breakeven
241 days, +20%
