In [1]:
# Create a fast API
# https://app.jedha.co/course/serve-your-model-with-api-ft/fastapi-basics-ft

import matplotlib.pyplot as plt
from matplotlib.colors import Normalize
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.figure_factory as ff
import plotly.graph_objects as go
import plotly.io as pio
from plotly.subplots import make_subplots
import re
import seaborn as sns

In [2]:
dataset = pd.read_excel('src/get_around_delay_analysis.xlsx')

In [3]:
dataset.head()

Unnamed: 0,rental_id,car_id,checkin_type,state,delay_at_checkout_in_minutes,previous_ended_rental_id,time_delta_with_previous_rental_in_minutes
0,505000,363965,mobile,canceled,,,
1,507750,269550,mobile,ended,-81.0,,
2,508131,359049,connect,ended,70.0,,
3,508865,299063,connect,canceled,,,
4,511440,313932,mobile,ended,,,


In [4]:
100*dataset.isnull().sum()/dataset.shape[0]

rental_id                                      0.000000
car_id                                         0.000000
checkin_type                                   0.000000
state                                          0.000000
delay_at_checkout_in_minutes                  23.294228
previous_ended_rental_id                      91.360863
time_delta_with_previous_rental_in_minutes    91.360863
dtype: float64

## Drop columns with to mutch null raws

In [5]:
dataset = dataset.drop(columns=['previous_ended_rental_id', 'time_delta_with_previous_rental_in_minutes'])
dataset = dataset.dropna(subset=['delay_at_checkout_in_minutes'])

In [6]:
dataset.dtypes

rental_id                         int64
car_id                            int64
checkin_type                     object
state                            object
delay_at_checkout_in_minutes    float64
dtype: object

In [7]:
for cols in dataset.columns:
    print(f'{cols} : {dataset[cols].value_counts().count()}/{len(dataset)}')

rental_id : 16346/16346
car_id : 6689/16346
checkin_type : 2/16346
state : 2/16346
delay_at_checkout_in_minutes : 1745/16346


## Drop identifiers columns

In [8]:
dataset = dataset.drop(columns=['rental_id','car_id'])

In [9]:
print(dataset['state'].value_counts())
dataset = dataset.drop(columns=['state'])

ended       16345
canceled        1
Name: state, dtype: int64


## Delay at checkout in minutes

In [10]:
# Drop Outliers, +4 & -4 hours 
dataset = dataset[dataset['delay_at_checkout_in_minutes'] > -240]  
dataset = dataset[dataset['delay_at_checkout_in_minutes'] < 240 ] 

fig = px.histogram(dataset['delay_at_checkout_in_minutes'])
fig.update_layout(title= f"{'delay_at_checkout_in_minutes'.replace('_', ' ')}")
fig.update_layout(showlegend=False)
fig.show()

In [11]:
dataset_late = dataset[dataset['delay_at_checkout_in_minutes'] > 0] # Just watch cars with late 
dataset_late = dataset_late[dataset_late['delay_at_checkout_in_minutes'] < 240] # + 4 hours, after it must be outlier 
fig = px.histogram(dataset_late, 'delay_at_checkout_in_minutes', color = 'checkin_type')
fig.show()

In [12]:
dataset_early = dataset[dataset['delay_at_checkout_in_minutes'] < 0] # Just watch cars with early 
dataset_early = dataset_early[dataset_early['delay_at_checkout_in_minutes'] > -240] # + 4 hours, before it must be outlier 
fig = px.histogram(dataset_early, 'delay_at_checkout_in_minutes', color = 'checkin_type')
fig.show()

## Distribution Mobile and connect for early and late conductors.

In [13]:
# Create subplots: use 'domain' type for Pie subplot
fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])

values_early = dataset_early.groupby('checkin_type')['delay_at_checkout_in_minutes'].sum().abs()
print(values_early)
values_lates = dataset_late.groupby('checkin_type')['delay_at_checkout_in_minutes'].sum()
print(values_lates)

fig.add_trace(go.Pie(labels=values_early.keys(), values=values_early, name="Early Conductor"),
              1, 1)
fig.add_trace(go.Pie(labels=values_lates.keys(), values=values_lates, name="Late Conductor"),
             1, 2)

# Use `hole` to create a donut-like pie chart
fig.update_traces(hole=.4, hoverinfo="label+percent+name")

fig.update_layout(
    title_text="Distribution Mobile and connect for early and late conductors.",
    # Add annotations in the center of the donut pies.
    annotations=[dict(text='Early', x=0.19, y=0.5, font_size=20, showarrow=False),
                 dict(text='Late', x=0.80, y=0.5, font_size=20, showarrow=False)])
fig.show()

checkin_type
connect    109682.0
mobile     251560.0
Name: delay_at_checkout_in_minutes, dtype: float64
checkin_type
connect     76497.0
mobile     401309.0
Name: delay_at_checkout_in_minutes, dtype: float64


## Cars available between check in and check out after a delay of ...

In [14]:
## Proportion à l'heure à Actuel, delais 1h 2h 3h 4H ... ____|_|_|_| 
## 
specs = np.repeat({'type':'domain'}, 5).tolist()

fig = make_subplots(rows=1, cols=5, specs=[specs])

for hours_cut in range(0,5):

    dataset_before = len( dataset[dataset['delay_at_checkout_in_minutes'] < (hours_cut*60)] )
    dataset_after = len( dataset[dataset['delay_at_checkout_in_minutes'] >= (hours_cut*60)] )


    fig.add_trace(go.Pie(labels=['Ready', 'Unvailable'], 
                         values=[dataset_before, dataset_after], 
                         name=f"{hours_cut} hour(s) between ck_in & check out"),
                         1, (hours_cut+1) )

fig.update_layout(
    title_text="Cars available between check in and check out after a delay of ...",
    # Add annotations in the center of the donut pies.
    annotations=[dict(text='Without', x=0.03, y=0.9, font_size=20, showarrow=False),
                 dict(text='After 1 hour', x=0.22, y=0.9, font_size=20, showarrow=False),
                 dict(text='After 2 hours', x=0.50, y=0.9, font_size=20, showarrow=False),
                 dict(text='After 3 hours', x=0.79, y=0.9, font_size=20, showarrow=False),
                 dict(text='After 4 hours', x=0.99, y=0.9, font_size=20, showarrow=False),
                ])
# https://plotly.com/python/discrete-color/
colors = ['#636EFA', '#EF553B']
fig.update_traces(marker=dict(colors=colors))

fig.show()

In [15]:
## Proportion à l'heure à Actuel, delais 1h 2h 3h 4H ... ____|_|_|_| 
## 
specs = np.repeat({'type':'domain'}, 10).reshape(2, 5).tolist()

fig = make_subplots(rows=2, cols=5, specs=specs)

for hours_cut in range(0,5):
    dataset_h_cuts = dataset[dataset['checkin_type'] == 'mobile']
    dataset_before = len( dataset_h_cuts[dataset_h_cuts['delay_at_checkout_in_minutes'] < (hours_cut*60)] )
    dataset_after = len( dataset_h_cuts[dataset_h_cuts['delay_at_checkout_in_minutes'] >= (hours_cut*60)] )


    fig.add_trace(go.Pie(labels=['Ready', 'Unvailable'], 
                         values=[dataset_before, dataset_after], 
                         name=f"{hours_cut} hour(s) between ck_in & check out"),
                         1, (hours_cut+1) )

for hours_cut in range(0,5):

    dataset_h_cuts = dataset[dataset['checkin_type'] == 'connect']
    dataset_before = len( dataset_h_cuts[dataset_h_cuts['delay_at_checkout_in_minutes'] < (hours_cut*60)] )
    dataset_after = len( dataset_h_cuts[dataset_h_cuts['delay_at_checkout_in_minutes'] >= (hours_cut*60)] )


    fig.add_trace(go.Pie(labels=['Ready', 'Unvailable'], 
                         values=[dataset_before, dataset_after], 
                         name=f"{hours_cut} hour(s) between ck_in & check out"),
                         2, (hours_cut+1) )

fig.update_layout(
    title_text="Cars available between check in and check out after a delay of ...",
    # Add annotations in the center of the donut pies.
    annotations=[dict(text='Without', x=0.05, y=-0.1, font_size=12, showarrow=False),
                 dict(text='After 1 hour', x=0.24, y=-0.1, font_size=12, showarrow=False),
                 dict(text='After 2 hours', x=0.50, y=-0.1, font_size=12, showarrow=False),
                 dict(text='After 3 hours', x=0.76, y=-0.1, font_size=12, showarrow=False),
                 dict(text='After 4 hours', x=0.97, y=-0.1, font_size=12, showarrow=False),
                 dict(text='mobile', x=-0.05, y=0.95, font_size=20, showarrow=False,textangle=-90),
                 dict(text='connect', x=-0.05, y=0.05, font_size=20, showarrow=False, textangle=-90),
                ])
# https://plotly.com/python/discrete-color/
colors = ['#636EFA', '#EF553B']
fig.update_traces(marker=dict(colors=colors))

fig.show()

In [16]:
dataset.to_csv(r'src/get_around_delay_analysis_clean.csv', index=False)