# Validação de hipóteses - Cury Delivery Food

In [27]:
import pandas as pd
import numpy as np
from datetime import datetime
import re
import plotly.express as px
from matplotlib import pyplot as plt
import folium
from haversine import haversine

### Data Load

In [28]:
data = pd.read_csv('../Datasets/train_tratado.csv')
data.head()

Unnamed: 0,ID,Delivery_person_ID,Delivery_person_Age,Delivery_person_Ratings,Restaurant_latitude,Restaurant_longitude,Delivery_location_latitude,Delivery_location_longitude,Order_Date,Time_Orderd,...,Weatherconditions,Road_traffic_density,Vehicle_condition,Type_of_order,Type_of_vehicle,multiple_deliveries,Festival,City,Time_taken(min),Week_of_year
0,0x4607,INDORES13DEL02,37,4.9,22.745049,75.892471,22.765049,75.912471,2022-03-19,11:30:00,...,conditions Sunny,High,2,Snack,motorcycle,0,No,Urban,24,11
1,0xb379,BANGRES18DEL02,34,4.5,12.913041,77.683237,13.043041,77.813237,2022-03-25,19:45:00,...,conditions Stormy,Jam,2,Snack,scooter,1,No,Metropolitian,33,12
2,0x5d6d,BANGRES19DEL01,23,4.4,12.914264,77.6784,12.924264,77.6884,2022-03-19,08:30:00,...,conditions Sandstorms,Low,0,Drinks,motorcycle,1,No,Urban,26,11
3,0x7a6a,COIMBRES13DEL02,38,4.7,11.003669,76.976494,11.053669,77.026494,2022-04-05,18:00:00,...,conditions Sunny,Medium,0,Buffet,motorcycle,1,No,Metropolitian,21,14
4,0x70a2,CHENRES12DEL01,32,4.6,12.972793,80.249982,13.012793,80.289982,2022-03-26,13:30:00,...,conditions Cloudy,High,1,Snack,scooter,1,No,Metropolitian,30,12


### H1 - Entregas realizadas com o veículo do tipo "Motorcicle" são, na média, 50% mais rápidas que as entregas feitas com bicicletas

In [29]:
print(data['Type_of_vehicle'].unique())

time_bicycle = data.loc[data['Type_of_vehicle'] == 'bicycle', 'Time_taken(min)'].mean()

time_motorcycle = data.loc[data['Type_of_vehicle'] == 'motorcycle', 'Time_taken(min)'].mean()

time_scooter = data.loc[data['Type_of_vehicle'] == 'scooter', 'Time_taken(min)'].mean()

time_eletric_scooter = data.loc[data['Type_of_vehicle'] == 'electric_scooter', 'Time_taken(min)'].mean()

['motorcycle' 'scooter' 'electric_scooter' 'bicycle']


In [30]:
print(time_bicycle)

print(time_motorcycle)

print(time_scooter)

print(time_eletric_scooter)

26.426470588235293
27.60567429544165
24.48075412411626
24.470110120608286


### H2 - Entregas realizadas com bicicletas são em geral mais lentas que as feitas com outros tipos de veículos

In [31]:
data.groupby('Type_of_vehicle')['Time_taken(min)'].mean().sort_values(ascending=False)

Type_of_vehicle
motorcycle          27.605674
bicycle             26.426471
scooter             24.480754
electric_scooter    24.470110
Name: Time_taken(min), dtype: float64

### H3 - As entregas em cidades mtropolitanas são, em média, 30% maiores que nas outras cidades

In [48]:
df = pd.DataFrame(citys).astype(int)

In [73]:
print('O percentual de entregas na cidade Metropolitana corresponde a {}% do total de entregas'.format(round((df['ID'][0]/df['ID'].sum())*100)),2)
print('O percentual de entregas na cidade Urban corresponde a {}% do total de entregas'.format(round((df['ID'][1]/df['ID'].sum())*100)),2)
print('O percentual de entregas na cidade Semi-Urban corresponde a {}% do total de entregas'.format(round((df['ID'][2]/df['ID'].sum())*100)),2)

O percentual de entregas na cidade Metropolitana corresponde a 75% do total de entregas 2
O percentual de entregas na cidade Urban corresponde a 22% do total de entregas 2
O percentual de entregas na cidade Semi-Urban corresponde a 3% do total de entregas 2


### H4 - Os entregadores mais novos fazem mais entregas que os mais velhos

In [76]:
data['Delivery_person_Age'].min()

15

In [79]:
data['Delivery_person_Age'].max()

50

In [78]:
len(data.loc[data['Delivery_person_Age'] == 15, 'ID'])

38

In [80]:
len(data.loc[data['Delivery_person_Age'] == 50, 'ID'])

53

### H5 - O número de entregas com a condição de clima ensolarada é 50% maior que a de tempestade

In [92]:
data.loc[data['Weatherconditions'] == 'conditions Sunny', 'ID'].count()

7284

In [93]:
data.loc[data['Weatherconditions'] == 'conditions Stormy', 'ID'].count()

7586

### H6 -  Os 10 entregadores mais rápidos têm a menor faixa etária

In [97]:
df_aux = pd.DataFrame(data.groupby(['City', 'Delivery_person_ID', 'Delivery_person_Age'])['Time_taken(min)'].min()).reset_index()

metropoli = df_aux.loc[df_aux['City'] == 'Metropolitian', :].sort_values('Time_taken(min)', ascending=True).reset_index().head(10)

urba = df_aux.loc[df_aux['City'] == 'Urban', :].sort_values('Time_taken(min)', ascending=True).reset_index().head(10)

semi_ur = df_aux.loc[df_aux['City'] == 'Semi-Urban', :].sort_values('Time_taken(min)', ascending=True).reset_index().head(10)

city_concat = pd.concat([metropoli, urba, semi_ur], axis=0)

city_concat

Unnamed: 0,index,City,Delivery_person_ID,Delivery_person_Age,Time_taken(min)
0,870,Metropolitian,ALHRES19DEL03,26,10
1,12350,Metropolitian,MYSRES14DEL01,22,10
2,15739,Metropolitian,SURRES20DEL02,26,10
3,16265,Metropolitian,VADRES09DEL03,25,10
4,6488,Metropolitian,HYDRES15DEL02,29,10
5,14616,Metropolitian,RANCHIRES18DEL01,37,10
6,6221,Metropolitian,HYDRES09DEL02,25,10
7,6222,Metropolitian,HYDRES09DEL02,26,10
8,10750,Metropolitian,MUMRES02DEL01,27,10
9,14968,Metropolitian,SURRES04DEL01,22,10


In [98]:
city_concat['Delivery_person_Age'].mean()

25.1

### H7 - Os 10 entregadores mais lentos têm a maior faixa etária

In [99]:
df_aux = pd.DataFrame(data.groupby(['City', 'Delivery_person_ID', 'Delivery_person_Age'])['Time_taken(min)'].max()).reset_index()

metropoli = df_aux.loc[df_aux['City'] == 'Metropolitian', :].sort_values('Time_taken(min)', ascending=False).reset_index().head(10)

urba = df_aux.loc[df_aux['City'] == 'Urban', :].sort_values('Time_taken(min)', ascending=False).reset_index().head(10)

semi_ur = df_aux.loc[df_aux['City'] == 'Semi-Urban', :].sort_values('Time_taken(min)', ascending=False).reset_index().head(10)

city_concat = pd.concat([metropoli, urba, semi_ur], axis=0)
city_concat

Unnamed: 0,index,City,Delivery_person_ID,Delivery_person_Age,Time_taken(min)
0,10160,Metropolitian,KOLRES19DEL02,30,54
1,11763,Metropolitian,MYSRES01DEL02,33,54
2,10738,Metropolitian,MUMRES01DEL03,34,54
3,4788,Metropolitian,COIMBRES19DEL02,32,54
4,14496,Metropolitian,RANCHIRES15DEL03,39,54
5,6255,Metropolitian,HYDRES11DEL01,23,54
6,10833,Metropolitian,MUMRES03DEL03,21,54
7,14461,Metropolitian,RANCHIRES15DEL02,22,54
8,7604,Metropolitian,INDORES16DEL03,37,54
9,14449,Metropolitian,RANCHIRES15DEL01,30,54


In [100]:
city_concat['Delivery_person_Age'].mean()

31.233333333333334

### H8 - As entregas em dias de eventos são 20% menores que em dias sem eventos

In [122]:
#Quantidade de entregas em dias de Festival
dias_entregas_Festival = data.loc[data['Festival'] == 'Yes', 'Order_Date'].nunique()

In [123]:
dias_entregas_sfestival = data.loc[data['Festival'] == 'No', 'Order_Date'].nunique()

In [129]:
entregas_festival = data.loc[data['Festival'] == 'Yes', 'ID'].count()

In [130]:
entregas_sem_festival = data.loc[data['Festival'] == 'No', 'ID'].count()

In [131]:
#Média de entregas com Festival
media_entregas_Festival = entregas_festival/dias_entregas_Festival

In [132]:
#Média de entregas sem Festival
media_entregas_sem_Festival = entregas_sem_festival/dias_entregas_sfestival

In [135]:
round(((media_entregas_sem_Festival - media_entregas_Festival) / media_entregas_sem_Festival) * 100, 2)

97.99