In [1]:
# Processing for Uber Data 2023

In [1]:
import pandas as pd

In [2]:
# Import file and print basic information 

csv_file_path = './driver_activity_wav_update_2024_08_09.csv'
df = pd.read_csv(csv_file_path)

# Test dataframe
df.head()

  df = pd.read_csv(csv_file_path)


Unnamed: 0,weekstr,driver_hashed_uuid,total_session_mileage,total_passenger_mileage,online_minutes,rental_company,is_rental,is_wav,trip_minutes,driver_pay_excl_bonus_excl_tips,driver_tips,driver_bonus
0,2023-07-31 00:00:00.000,B3664F7C7748050F0FEFA70493CC5963,357.08,277.98,1446.433333,\N,False,False,1001.8166666666666,938.3,58.96,0
1,2023-07-31 00:00:00.000,69CFE4D5A4898C4CE0C43B3915729B4A,255.13,213.81,1511.85,\N,False,False,295.1333333333334,764.98,12.74,0
2,2023-07-31 00:00:00.000,69CFE4D5A4898C4CE0C43B3915729B4A,255.13,213.81,1511.85,ftl,True,False,541.2166666666667,764.98,12.74,0
3,2023-07-31 00:00:00.000,9EC980068872046D33C15B0374949A5D,98.72,66.62,393.5,\N,False,False,206.26666666666668,276.33,6.0,0
4,2023-07-31 00:00:00.000,002B70BF8CE65E460C60AE8AD46FF47E,790.42,670.94,4283.983333,\N,False,False,2243.666666666667,2205.59,329.34,7


In [3]:
# Convert to numeric values and remove non-numeric values
df['total_session_mileage'] = pd.to_numeric(df['total_session_mileage'], errors='coerce')
df['driver_pay_excl_bonus_excl_tips'] = pd.to_numeric(df['driver_pay_excl_bonus_excl_tips'], errors='coerce')
df['driver_tips'] = pd.to_numeric(df['driver_tips'], errors='coerce')
df['driver_bonus'] = pd.to_numeric(df['driver_bonus'], errors='coerce')
df['online_minutes'] = pd.to_numeric(df['online_minutes'], errors='coerce')

# Remove null values from the data
df.dropna(subset=['total_session_mileage'], inplace=True)
df.dropna(subset=['is_rental'], inplace=True)
df.dropna(subset=['driver_tips'], inplace=True)
df.dropna(subset=['driver_bonus'], inplace=True)
df.dropna(subset=['online_minutes'], inplace=True)
df.dropna(subset=['is_wav'], inplace=True)
df.reset_index(drop=True, inplace=True)

# Ensure the 'is_rental' column contains only 'true' or 'false' as strings
df['is_rental'] = df['is_rental'].astype(str).str.strip().str.lower()
df['is_wav'] = df['is_wav'].astype(str).str.strip().str.lower()

In [36]:
# Prepare an empty list to store values 
results_v1 = []
results_v2 = []

In [37]:
### Version 1
### Print values for EXPENSES

## Calculate the number of ALL drivers
total_drivers = df['driver_hashed_uuid'].nunique()
print("Total drivers:", total_drivers)
results_v1.append(["Total drivers", total_drivers])

## Calculate the number of RENTERS
unique_renters = df.loc[df['is_rental'] == 'true', 'driver_hashed_uuid'].nunique()
print("Total renters:", unique_renters)
results_v1.append(["Total renters", unique_renters])

## Calculate the number of OWNERS
unique_owners = df.loc[df['is_rental'] == 'false', 'driver_hashed_uuid'].nunique()
print("Total owners:", unique_owners)
results_v1.append(["Total owners", unique_owners])

# Calculate duplicates
owners_set = set(df.loc[df['is_rental'] == 'false', 'driver_hashed_uuid'])
renters_set = set(df.loc[df['is_rental'] == 'true', 'driver_hashed_uuid'])
intersection_set = owners_set & renters_set
drivers_both_categories = len(intersection_set)
print("Drivers in both categories:", drivers_both_categories)
results_v1.append(["Drivers in both categories", drivers_both_categories])

## Calculate the total annual miles for ALL
total_miles_all = df['total_session_mileage'].sum()
print("Total miles for all drivers:", total_miles_all)
results_v1.append(["Total miles for all drivers", total_miles_all])

## Calculate the total annual miles for RENTERS
total_miles_renters = df.loc[df['is_rental'] == 'true', 'total_session_mileage'].sum()
print("Total miles for renters:", total_miles_renters)
results_v1.append(["Total miles for renters", total_miles_renters])

## Calculate the total annual miles for OWNERS
total_miles_owners = df.loc[df['is_rental'] == 'false', 'total_session_mileage'].sum()
print("Total miles for owners:", total_miles_owners)
results_v1.append(["Total miles for owners", total_miles_owners])

## Calculate the mean weekly miles for ALL
average_weekly_miles_all = df['total_session_mileage'].mean()
print("Mean weekly miles for all drivers:", average_weekly_miles_all)
results_v1.append(["Mean weekly miles for all drivers", average_weekly_miles_all])

## Calculate the mean weekly miles for RENTERS
average_weekly_miles_renters = df.loc[df['is_rental'] == 'true', 'total_session_mileage'].mean()
print("Mean weekly miles for renters:", average_weekly_miles_renters)
results_v1.append(["Mean weekly miles for renters", average_weekly_miles_renters])

## Calculate the mean weekly miles for OWNERS
average_weekly_miles_owners = df.loc[df['is_rental'] == 'false', 'total_session_mileage'].mean()
print("Mean weekly miles for owners:", average_weekly_miles_owners)
results_v1.append(["Mean weekly miles for owners", average_weekly_miles_owners])

Total drivers: 90709
Total renters: 17013
Total owners: 87927
Drivers in both categories: 14233
Total miles for all drivers: 1120169975.769999
Total miles for renters: 163542886.03000003
Total miles for owners: 956543784.3700007
Mean weekly miles for all drivers: 324.0954049290229
Mean weekly miles for renters: 365.42384984234246
Mean weekly miles for owners: 318.06823685206655


In [38]:
### Version 1
### Print values for EARNINGS for all DRIVERS

# Calculate the mean weekly salary for all DRIVERS
mean_weekly_salary = df['driver_pay_excl_bonus_excl_tips'].mean()
print("Mean weekly salary for all drivers: $", mean_weekly_salary)
results_v1.append(["Mean weekly salary for all drivers", mean_weekly_salary])

# Calculate the mean weekly tips for all DRIVERS
mean_weekly_tips = df['driver_tips'].mean()
print("Mean weekly tips for all drivers: $", mean_weekly_tips)
results_v1.append(["Mean weekly tips for all drivers", mean_weekly_tips])

# Calculate the mean weekly bonus for all DRIVERS
mean_weekly_bonus = df['driver_bonus'].mean()
print("Mean weekly bonus for all drivers: $", mean_weekly_bonus)
results_v1.append(["Mean weekly bonus for all drivers", mean_weekly_bonus])

# Calculate the mean weekly overall earnings for all DRIVERS
total_earnings = mean_weekly_salary + mean_weekly_tips + mean_weekly_bonus
print("Mean weekly total earnings for all drivers: $", total_earnings)
results_v1.append(["Mean weekly total earnings for all drivers", total_earnings])

# Calculate the mean weekly minutes worked for all DRIVERS
mean_weekly_minutes_worked = df['online_minutes'].mean()
print("Mean weekly minutes worked for all drivers:", mean_weekly_minutes_worked)
results_v1.append(["Mean weekly minutes worked for all drivers", mean_weekly_minutes_worked])

# Calculate the mean weekly hours worked for all DRIVERS
mean_weekly_hours_worked = mean_weekly_minutes_worked / 60
print("Mean weekly hours worked for all drivers:", mean_weekly_hours_worked)
results_v1.append(["Mean weekly hours worked for all drivers", mean_weekly_hours_worked])

# Calculate the mean hourly earnings for all DRIVERS
mean_hourly_earnings = total_earnings / mean_weekly_hours_worked
print("Mean hourly earnings: $", mean_hourly_earnings)
results_v1.append(["Mean hourly earnings", mean_hourly_earnings])

# Calculate the mean annual earnings for all DRIVERS
mean_annual_earnings = total_earnings * 52
print("Mean annual earnings: $", mean_annual_earnings)
results_v1.append(["Mean annual earnings", mean_annual_earnings])

Mean weekly salary for all drivers: $ 985.4183529061451
Mean weekly tips for all drivers: $ 95.24208961497715
Mean weekly bonus for all drivers: $ 13.240848168650901
Mean weekly total earnings for all drivers: $ 1093.901290689773
Mean weekly minutes worked for all drivers: 1829.7616888586037
Mean weekly hours worked for all drivers: 30.496028147643397
Mean hourly earnings: $ 35.8702872844215
Mean annual earnings: $ 56882.8671158682


In [39]:
### Version 1
### Print values for EARNINGS for WAV

# Calculate the number of WAV drivers
drivers_wav = df.loc[df['is_wav'] == 'true', 'driver_hashed_uuid'].nunique()
print("Number of WAV drivers:", drivers_wav)
results_v1.append(["Number of WAV drivers", drivers_wav])

# Calculate the mean weekly salary for WAV drivers
mean_weekly_salary_wav = df.loc[df['is_wav'] == 'true', 'driver_pay_excl_bonus_excl_tips'].mean()
print("Mean weekly salary for WAV drivers: $", mean_weekly_salary_wav)
results_v1.append(["Mean weekly salary for WAV drivers", mean_weekly_salary_wav])

# Calculate the mean weekly tips for WAV drivers
mean_weekly_tips_wav = df.loc[df['is_wav'] == 'true', 'driver_tips'].mean()
print("Mean weekly tips for WAV drivers: $", mean_weekly_tips_wav)
results_v1.append(["Mean weekly tips for WAV drivers", mean_weekly_tips_wav])

# Calculate the mean weekly bonus for WAV drivers
mean_weekly_bonus_wav = df.loc[df['is_wav'] == 'true', 'driver_bonus'].mean()
print("Mean weekly bonus for WAV drivers: $", mean_weekly_bonus_wav)
results_v1.append(["Mean weekly bonus for WAV drivers", mean_weekly_bonus_wav])

# Calculate the mean weekly overall earnings for WAV drivers
total_earnings_wav = mean_weekly_salary_wav + mean_weekly_tips_wav + mean_weekly_bonus_wav
print("Mean weekly total earnings for WAV drivers: $", total_earnings_wav)
results_v1.append(["Mean weekly total earnings for WAV drivers", total_earnings_wav])

# Calculate the mean weekly minutes worked for WAV drivers
mean_weekly_minutes_worked_wav = df.loc[df['is_wav'] == 'true', 'online_minutes'].mean()
print("Mean weekly minutes worked for WAV drivers:", mean_weekly_minutes_worked_wav)
results_v1.append(["Mean weekly minutes worked for WAV drivers", mean_weekly_minutes_worked_wav])

# Calculate the mean weekly hours worked for WAV drivers
mean_weekly_hours_worked_wav = mean_weekly_minutes_worked_wav / 60
print("Mean weekly hours worked for WAV drivers:", mean_weekly_hours_worked_wav)
results_v1.append(["Mean weekly hours worked for WAV drivers", mean_weekly_hours_worked_wav])

# Calculate the mean hourly earnings for WAV drivers
mean_hourly_earnings_wav = total_earnings_wav / mean_weekly_hours_worked_wav
print("Mean hourly earnings for WAV drivers: $", mean_hourly_earnings_wav)
results_v1.append(["Mean hourly earnings for WAV drivers", mean_hourly_earnings_wav])

# Calculate the mean annual earnings for WAV drivers
mean_annual_earnings_wav = total_earnings_wav * 52
print("Mean annual earnings for WAV drivers: $", mean_annual_earnings_wav)
results_v1.append(["Mean annual earnings for WAV drivers", mean_annual_earnings_wav])

Number of WAV drivers: 8770
Mean weekly salary for WAV drivers: $ 1090.1038225474424
Mean weekly tips for WAV drivers: $ 85.10363472416759
Mean weekly bonus for WAV drivers: $ 85.42430586543432
Mean weekly total earnings for WAV drivers: $ 1260.6317631370443
Mean weekly minutes worked for WAV drivers: 1874.2870882130485
Mean weekly hours worked for WAV drivers: 31.238118136884143
Mean hourly earnings for WAV drivers: $ 40.35556039621235
Mean annual earnings for WAV drivers: $ 65552.85168312631


In [40]:
### Version 1
### Print values for EARNINGS for non-WAV drivers

# Calculate the number of non-WAV drivers
drivers_non_wav = df.loc[df['is_wav'] == 'false', 'driver_hashed_uuid'].nunique()
print("Number of non-WAV drivers:", drivers_non_wav)
results_v1.append(["Number of non-WAV drivers", drivers_non_wav])

# Calculate the mean weekly salary for non-WAV drivers
mean_weekly_salary_non_wav = df.loc[df['is_wav'] == 'false', 'driver_pay_excl_bonus_excl_tips'].mean()
print("Mean weekly salary for non-WAV drivers: $", mean_weekly_salary_non_wav)
results_v1.append(["Mean weekly salary for non-WAV drivers", mean_weekly_salary_non_wav])

# Calculate the mean weekly tips for non-WAV drivers
mean_weekly_tips_non_wav = df.loc[df['is_wav'] == 'false', 'driver_tips'].mean()
print("Mean weekly tips for non-WAV drivers: $", mean_weekly_tips_non_wav)
results_v1.append(["Mean weekly tips for non-WAV drivers", mean_weekly_tips_non_wav])

# Calculate the mean weekly bonus for non-WAV drivers
mean_weekly_bonus_non_wav = df.loc[df['is_wav'] == 'false', 'driver_bonus'].mean()
print("Mean weekly bonus for non-WAV drivers: $", mean_weekly_bonus_non_wav)
results_v1.append(["Mean weekly bonus for non-WAV drivers", mean_weekly_bonus_non_wav])

# Calculate the mean weekly overall earnings for non-WAV drivers
total_earnings_non_wav = mean_weekly_salary_non_wav + mean_weekly_tips_non_wav + mean_weekly_bonus_non_wav
print("Mean weekly total earnings for non-WAV drivers: $", total_earnings_non_wav)
results_v1.append(["Mean weekly total earnings for non-WAV drivers", total_earnings_non_wav])

# Calculate the mean weekly minutes worked for non-WAV drivers
mean_weekly_minutes_worked_non_wav = df.loc[df['is_wav'] == 'false', 'online_minutes'].mean()
print("Mean weekly minutes worked for non-WAV drivers:", mean_weekly_minutes_worked_non_wav)
results_v1.append(["Mean weekly minutes worked for non-WAV drivers", mean_weekly_minutes_worked_non_wav])

# Calculate the mean weekly hours worked for non-WAV drivers
mean_weekly_hours_worked_non_wav = mean_weekly_minutes_worked_non_wav / 60
print("Mean weekly hours worked for non-WAV drivers:", mean_weekly_hours_worked_non_wav)
results_v1.append(["Mean weekly hours worked for non-WAV drivers", mean_weekly_hours_worked_non_wav])

# Calculate the mean hourly earnings for non-WAV drivers
mean_hourly_earnings_non_wav = total_earnings_non_wav / mean_weekly_hours_worked_non_wav
print("Mean hourly earnings for non-WAV drivers: $", mean_hourly_earnings_non_wav)
results_v1.append(["Mean hourly earnings for non-WAV drivers", mean_hourly_earnings_non_wav])

# Calculate the mean annual earnings for non-WAV drivers
mean_annual_earnings_non_wav = total_earnings_non_wav * 52
print("Mean annual earnings for non-WAV drivers: $", mean_annual_earnings_non_wav)
results_v1.append(["Mean annual earnings for non-WAV drivers", mean_annual_earnings_non_wav])

Number of non-WAV drivers: 86493
Mean weekly salary for non-WAV drivers: $ 977.9094517831035
Mean weekly tips for non-WAV drivers: $ 96.00123549595128
Mean weekly bonus for non-WAV drivers: $ 8.065524936372608
Mean weekly total earnings for non-WAV drivers: $ 1081.9762122154275
Mean weekly minutes worked for non-WAV drivers: 1827.1966067753724
Mean weekly hours worked for non-WAV drivers: 30.45327677958954
Mean hourly earnings for non-WAV drivers: $ 35.52905718640405
Mean annual earnings for non-WAV drivers: $ 56262.76303520223


In [47]:
### Version 1
# Create a DataFrame from the results list
df_results_v1 = pd.DataFrame(results_v1, columns=['Metric', 'Value'])
df_results_v1['Value'] = pd.to_numeric(df_results_v1['Value'], errors='coerce')

# Export the results 
df_results_v1.to_csv('./uber_results_v1.csv')

In [42]:
### Version 2
### Print values for EXPENSES

## Calculate the number of ALL drivers
total_drivers = df['driver_hashed_uuid'].nunique()
print("Total drivers:", total_drivers)
results_v2.append(["Total drivers", total_drivers])

## Calculate the number of RENTERS
unique_renters = df.loc[df['is_rental'] == 'true', 'driver_hashed_uuid'].nunique()
print("Total renters:", unique_renters)
results_v2.append(["Total renters", unique_renters])

## Calculate the number of OWNERS
unique_owners = df.loc[df['is_rental'] == 'false', 'driver_hashed_uuid'].nunique()
print("Total owners:", unique_owners)
results_v2.append(["Total owners", unique_owners])

# Calculate duplicates
owners_set = set(df.loc[df['is_rental'] == 'false', 'driver_hashed_uuid'])
renters_set = set(df.loc[df['is_rental'] == 'true', 'driver_hashed_uuid'])
intersection_set = owners_set & renters_set
drivers_both_categories = len(intersection_set)
print("Drivers in both categories:", drivers_both_categories)
results_v2.append(["Drivers in both categories", drivers_both_categories])

## Calculate the total annual miles for ALL
total_miles_all = df['total_session_mileage'].sum()
print("Total miles for all drivers:", total_miles_all)
results_v2.append(["Total miles for all drivers", total_miles_all])

## Calculate the total annual miles for RENTERS
total_miles_renters = df.loc[df['is_rental'] == 'true', 'total_session_mileage'].sum()
print("Total miles for renters:", total_miles_renters)
results_v2.append(["Total miles for renters", total_miles_renters])

## Calculate the total annual miles for OWNERS
total_miles_owners = df.loc[df['is_rental'] == 'false', 'total_session_mileage'].sum()
print("Total miles for owners:", total_miles_owners)
results_v2.append(["Total miles for owners", total_miles_owners])

## Calculate the mean weekly miles for ALL
average_weekly_miles_all = df['total_session_mileage'].sum()/total_drivers/52
print("Mean weekly miles for all drivers:", average_weekly_miles_all)
results_v2.append(["Mean weekly miles for all drivers", average_weekly_miles_all])

## Calculate the mean weekly miles for RENTERS
average_weekly_miles_renters = df.loc[df['is_rental'] == 'true', 'total_session_mileage'].sum()/unique_renters/52
print("Mean weekly miles for renters:", average_weekly_miles_renters)
results_v2.append(["Mean weekly miles for renters", average_weekly_miles_renters])

## Calculate the mean weekly miles for OWNERS
average_weekly_miles_owners = df.loc[df['is_rental'] == 'false', 'total_session_mileage'].sum()/unique_owners/52
print("Mean weekly miles for owners:", average_weekly_miles_owners)
results_v2.append(["Mean weekly miles for owners", average_weekly_miles_owners])

Total drivers: 90709
Total renters: 17013
Total owners: 87927
Drivers in both categories: 14233
Total miles for all drivers: 1120169975.769999
Total miles for renters: 163542886.03000003
Total miles for owners: 956543784.3700007
Mean weekly miles for all drivers: 237.4817306250671
Mean weekly miles for renters: 184.8618997576514
Mean weekly miles for owners: 209.20846584491872


In [43]:
### Version 2
### Print values for EARNINGS

# Calculate the mean weekly salary
mean_weekly_salary = df['driver_pay_excl_bonus_excl_tips'].sum()/total_drivers/52
print("Mean weekly salary for all drivers: $", mean_weekly_salary)
results_v2.append(["Mean weekly salary for all drivers", mean_weekly_salary])

# Calculate the mean weekly tips
mean_weekly_tips = df['driver_tips'].sum()/total_drivers/52
print("Mean weekly tips for all drivers: $", mean_weekly_tips)
results_v2.append(["Mean weekly tips for all drivers", mean_weekly_tips])

# Calculate the mean weekly bonus
mean_weekly_bonus = df['driver_bonus'].sum()/total_drivers/52
print("Mean weekly bonus for all drivers: $", mean_weekly_bonus)
results_v2.append(["Mean weekly bonus for all drivers", mean_weekly_bonus])

# Calculate the mean weekly overall earnings
total_earnings = mean_weekly_salary + mean_weekly_tips + mean_weekly_bonus
print("Mean weekly total earnings for all drivers: $", total_earnings)
results_v2.append(["Mean weekly total earnings for all drivers", total_earnings])

# Calculate the mean weekly minutes worked
mean_weekly_minutes_worked = df['online_minutes'].sum()/total_drivers/52
print("Mean weekly minutes worked for all drivers:", mean_weekly_minutes_worked)
results_v2.append(["Mean weekly minutes worked for all drivers", mean_weekly_minutes_worked])

# Calculate the mean weekly hours worked
mean_weekly_hours_worked = mean_weekly_minutes_worked / 60
print("Mean weekly hours worked for all drivers:", mean_weekly_hours_worked)
results_v2.append(["Mean weekly hours worked for all drivers", mean_weekly_hours_worked])

# Calculate the mean hourly earnings
mean_hourly_earnings = total_earnings / mean_weekly_hours_worked
print("Mean hourly earnings: $", mean_hourly_earnings)
results_v2.append(["Mean hourly earnings", mean_hourly_earnings])

# Calculate the mean annual earnings
mean_annual_earnings = total_earnings * 52
print("Mean annual earnings: $", mean_annual_earnings)
results_v2.append(["Mean annual earnings", mean_annual_earnings])

Mean weekly salary for all drivers: $ 721.7753202336802
Mean weekly tips for all drivers: $ 69.78888292188796
Mean weekly bonus for all drivers: $ 9.702265105312021
Mean weekly total earnings for all drivers: $ 801.2664682608802
Mean weekly minutes worked for all drivers: 1340.762522062662
Mean weekly hours worked for all drivers: 22.3460420343777
Mean hourly earnings: $ 35.857198649684456
Mean annual earnings: $ 41665.85634956577


In [44]:
### Version 2
### Print values for EARNINGS for WAV

# Calculate the number of WAV drivers
drivers_wav = df.loc[df['is_wav'] == 'true', 'driver_hashed_uuid'].nunique()
print("Number of WAV drivers:", drivers_wav)
results_v2.append(["Number of WAV drivers", drivers_wav])

# Calculate the mean weekly salary for WAV drivers
mean_weekly_salary_wav = df.loc[df['is_wav'] == 'true', 'driver_pay_excl_bonus_excl_tips'].sum()/drivers_wav/52
print("Mean weekly salary for WAV drivers: $", mean_weekly_salary_wav)
results_v2.append(["Mean weekly salary for WAV drivers", mean_weekly_salary_wav])

# Calculate the mean weekly tips for WAV drivers
mean_weekly_tips_wav = df.loc[df['is_wav'] == 'true', 'driver_tips'].sum()/drivers_wav/52
print("Mean weekly tips for WAV drivers: $", mean_weekly_tips_wav)
results_v2.append(["Mean weekly tips for WAV drivers", mean_weekly_tips_wav])

# Calculate the mean weekly bonus for WAV drivers
mean_weekly_bonus_wav = df.loc[df['is_wav'] == 'true', 'driver_bonus'].sum()/drivers_wav/52
print("Mean weekly bonus for WAV drivers: $", mean_weekly_bonus_wav)
results_v2.append(["Mean weekly bonus for WAV drivers", mean_weekly_bonus_wav])

# Calculate the mean weekly overall earnings for WAV drivers
total_earnings_wav = mean_weekly_salary_wav + mean_weekly_tips_wav + mean_weekly_bonus_wav
print("Mean weekly total earnings for WAV drivers: $", total_earnings_wav)
results_v2.append(["Mean weekly total earnings for WAV drivers", total_earnings_wav])

# Calculate the mean weekly minutes worked for WAV drivers
mean_weekly_minutes_worked_wav = df.loc[df['is_wav'] == 'true', 'online_minutes'].sum()/drivers_wav/52
print("Mean weekly minutes worked for WAV drivers:", mean_weekly_minutes_worked_wav)
results_v2.append(["Mean weekly minutes worked for WAV drivers", mean_weekly_minutes_worked_wav])

# Calculate the mean weekly hours worked for WAV drivers
mean_weekly_hours_worked_wav = mean_weekly_minutes_worked_wav / 60
print("Mean weekly hours worked for WAV drivers:", mean_weekly_hours_worked_wav)
results_v2.append(["Mean weekly hours worked for WAV drivers", mean_weekly_hours_worked_wav])

# Calculate the mean hourly earnings for WAV drivers
mean_hourly_earnings_wav = total_earnings_wav / mean_weekly_hours_worked_wav
print("Mean hourly earnings for WAV drivers: $", mean_hourly_earnings_wav)
results_v2.append(["Mean hourly earnings for WAV drivers", mean_hourly_earnings_wav])

# Calculate the mean annual earnings for WAV drivers
mean_annual_earnings_wav = total_earnings_wav * 52
print("Mean annual earnings for WAV drivers: $", mean_annual_earnings_wav)
results_v2.append(["Mean annual earnings for WAV drivers", mean_annual_earnings_wav])

Number of WAV drivers: 8770
Mean weekly salary for WAV drivers: $ 552.7202146302956
Mean weekly tips for WAV drivers: $ 43.150476383650556
Mean weekly bonus for WAV drivers: $ 43.31306770601746
Mean weekly total earnings for WAV drivers: $ 639.1837587199636
Mean weekly minutes worked for WAV drivers: 950.3281616378679
Mean weekly hours worked for WAV drivers: 15.838802693964464
Mean hourly earnings for WAV drivers: $ 40.35556039621171
Mean annual earnings for WAV drivers: $ 33237.55545343811


In [45]:
### Version 2
### Print values for EARNINGS for non-WAV drivers

# Calculate the number of non-WAV drivers
drivers_non_wav = df.loc[df['is_wav'] == 'false', 'driver_hashed_uuid'].nunique()
print("Number of non-WAV drivers:", drivers_non_wav)
results_v2.append(["Number of non-WAV drivers", drivers_non_wav])

# Calculate the mean weekly salary for non-WAV drivers
mean_weekly_salary_non_wav = df.loc[df['is_wav'] == 'false', 'driver_pay_excl_bonus_excl_tips'].sum()/drivers_non_wav/52
print("Mean weekly salary for non-WAV drivers: $", mean_weekly_salary_non_wav)
results_v2.append(["Mean weekly salary for non-WAV drivers", mean_weekly_salary_non_wav])

# Calculate the mean weekly tips for non-WAV drivers
mean_weekly_tips_non_wav = df.loc[df['is_wav'] == 'false', 'driver_tips'].sum()/drivers_non_wav/52
print("Mean weekly tips for non-WAV drivers: $", mean_weekly_tips_non_wav)
results_v2.append(["Mean weekly tips for non-WAV drivers", mean_weekly_tips_non_wav])

# Calculate the mean weekly bonus for non-WAV drivers
mean_weekly_bonus_non_wav = df.loc[df['is_wav'] == 'false', 'driver_bonus'].sum()/drivers_non_wav/52
print("Mean weekly bonus for non-WAV drivers: $", mean_weekly_bonus_non_wav)
results_v2.append(["Mean weekly bonus for non-WAV drivers", mean_weekly_bonus_non_wav])

# Calculate the mean weekly overall earnings for non-WAV drivers
total_earnings_non_wav = mean_weekly_salary_non_wav + mean_weekly_tips_non_wav + mean_weekly_bonus_non_wav
print("Mean weekly total earnings for non-WAV drivers: $", total_earnings_non_wav)
results_v2.append(["Mean weekly total earnings for non-WAV drivers", total_earnings_non_wav])

# Calculate the mean weekly minutes worked for non-WAV drivers
mean_weekly_minutes_worked_non_wav = df.loc[df['is_wav'] == 'false', 'online_minutes'].sum()/drivers_non_wav/52
print("Mean weekly minutes worked for non-WAV drivers:", mean_weekly_minutes_worked_non_wav)
results_v2.append(["Mean weekly minutes worked for non-WAV drivers", mean_weekly_minutes_worked_non_wav])

# Calculate the mean weekly hours worked for non-WAV drivers
mean_weekly_hours_worked_non_wav = mean_weekly_minutes_worked_non_wav / 60
print("Mean weekly hours worked for non-WAV drivers:", mean_weekly_hours_worked_non_wav)
results_v2.append(["Mean weekly hours worked for non-WAV drivers", mean_weekly_hours_worked_non_wav])

# Calculate the mean hourly earnings for non-WAV drivers
mean_hourly_earnings_non_wav = total_earnings_non_wav / mean_weekly_hours_worked_non_wav
print("Mean hourly earnings for non-WAV drivers: $", mean_hourly_earnings_non_wav)
results_v2.append(["Mean hourly earnings for non-WAV drivers", mean_hourly_earnings_non_wav])

# Calculate the mean annual earnings for non-WAV drivers
mean_annual_earnings_non_wav = total_earnings_non_wav * 52
print("Mean annual earnings for non-WAV drivers: $", mean_annual_earnings_non_wav)
results_v2.append(["Mean annual earnings for non-WAV drivers", mean_annual_earnings_non_wav])

Number of non-WAV drivers: 86493
Mean weekly salary for non-WAV drivers: $ 700.9140767549889
Mean weekly tips for non-WAV drivers: $ 68.80863787776512
Mean weekly bonus for non-WAV drivers: $ 5.78094419070611
Mean weekly total earnings for non-WAV drivers: $ 775.5036588234601
Mean weekly minutes worked for non-WAV drivers: 1309.6384541049265
Mean weekly hours worked for non-WAV drivers: 21.827307568415442
Mean hourly earnings for non-WAV drivers: $ 35.52905718640395
Mean annual earnings for non-WAV drivers: $ 40326.19025881992


In [48]:
### Version 2
# Create a DataFrame from the results list
df_results_v2 = pd.DataFrame(results_v2, columns=['Metric', 'Value'])
df_results_v2['Value'] = pd.to_numeric(df_results_v2['Value'], errors='coerce')

# Export the results 
df_results_v2.to_csv('./uber_results_v2.csv')

In [54]:
# Group by 'driver_hashed_uuid' and calculate the sum of the relevant columns
grouped_df = df.groupby('driver_hashed_uuid').sum()

# Calculate the total earnings for each driver
grouped_df['total_earnings'] = grouped_df['driver_pay_excl_bonus_excl_tips'] + grouped_df['driver_tips'] + grouped_df['driver_bonus']

# Max test
max_earnings = grouped_df['total_earnings'].max()
print("Max total earnings:", max_earnings)

# Define specific quintile ranges
bins = [0, 25000, 50000, 75000, 100000, max(max_earnings + 1, 100000)]

# Bin the total earnings
grouped_df['earnings_bin'] = pd.cut(grouped_df['total_earnings'], bins, right=False, include_lowest=True)

# Convert the Interval objects to strings for CSV export
grouped_df['earnings_bin_str'] = grouped_df['earnings_bin'].astype(str)

# Count the number of drivers in each bin
bin_distribution = grouped_df['earnings_bin_str'].value_counts().sort_index()

# Create a DataFrame for the bin distribution table
bin_distribution_table = bin_distribution.reset_index()
bin_distribution_table.columns = ['Earnings Range', 'Number of Drivers']

# Calculate the percentage of drivers in each bin
total_drivers = bin_distribution_table['Number of Drivers'].sum()
bin_distribution_table['Percentage of Drivers'] = (bin_distribution_table['Number of Drivers'] / total_drivers) * 100

# Display the bin distribution table
print(bin_distribution_table)

# Export the distribution table to a CSV file
bin_distribution_table.to_csv('./earnings_binned.csv', index=False)

Max total earnings: 190769.3770441479
           Earnings Range  Number of Drivers  Percentage of Drivers
0          [0.0, 25000.0)              29344                  32.35
1  [100000.0, 190769.377)               2366                   2.61
2      [25000.0, 50000.0)              27422                  30.23
3      [50000.0, 75000.0)              22114                  24.38
4     [75000.0, 100000.0)               9462                  10.43
5                     nan                  1                   0.00
