In [None]:
import os
import pandas as pd
from datetime import datetime, timedelta 
import numpy as np
import matplotlib.pyplot as plt
import requests

# Uber Eats Data Collection & Analysis

This Jupyter Notebook serves as both a personal journal for tracking my Uber Eats side gig and a passion project to sharpen my skills in Python, NumPy, and pandas. It’s designed to help me improve in data entry, organization, and basic data analysis.

To start, I’ve defined two core data tables: Sessions and Trips.

### Sessions Table Structure

Column	Description
date	         | Date of the Uber Eats session

* start_time       | Time the session began

* trips	         | Number of deliveries completed

* total_earned     | Total income for the session (USD)

* end_time	     | Time the session ended

* shift_length_hrs | Duration of the session (HH:MM)

* device_used	     | Device used for the session (e.g., iPhone 13 Pro Max)


This table will help me monitor daily earnings, hours worked, and calculate hourly efficiency over time.

### Trips Table Structure

Column	Description

* restaurant_name	 | Source of the delivery

* trip_duration	 | Duration of the delivery (hours:minutes)

* mileage	         | Distance traveled (miles)

* delivery_zone	 | Area or neighborhood of delivery

* tip	             | Tip received (USD)

* total_earnings	 | Total income from the trip (base + tip)



This table is focused on identifying which restaurants, delivery zones, and trip lengths tend to be most profitable.


In [None]:
# Build Data Entry Function

# Step 1 Initial DataFrame

uber_sessions_df = pd.DataFrame(columns=['session_id', 'date', 'trips','total_earned', 'start_time',
                                 'shift_length_hrs', 'end_time', 'device_used'
])


In [None]:
# Step 2 Build Function to add Data

def add_uber_session(uber_sessions_df,session_id, date, start_time,shift_length_hrs, trips,
                      total_earned, device_used):
    # Convert date and start time string into datetime object
    start_dt = datetime.strptime(f"{date} {start_time}", "%Y-%m-%d %I:%M %p")

    # Add shift length in hours to get the end time
    end_dt = start_dt + timedelta(hours=shift_length_hrs)

    new_row = {
        'session_id':len(uber_sessions_df) + 1, # Auto-increment
        'date':date,
        'start_time': start_time,
        'trips': int(trips),
        'total_earned': float(total_earned),
        'end_time': end_dt,
        'shift_length_hrs': float(shift_length_hrs),
        'device_used': device_used
    }
    uber_sessions_df.loc[len(uber_sessions_df)] = new_row
    return uber_sessions_df

In [None]:
# Add rows to sessions table
uber_sessions_df = add_uber_session(
    uber_sessions_df,
    session_id='4',
    date='2025-06-10',
    start_time='8:20 PM',
    trips='9', 
    total_earned='74.87',
    shift_length_hrs=5,
    device_used='iPhone 13 Pro Max'
)

In [None]:
display(uber_sessions_df)

In [None]:
def save_backup(uber_sessions_df, file_path):
    uber_sessions_df.to_csv(file_path, index=False)

In [None]:
# Usage Example

save_backup(uber_sessions_df, "uber_sessions_data.csv")

In [None]:
# Code cell to manipulate resulting Data Frames
uber_sessions_df = uber_sessions_df[:-1]
uber_sessions_df


In [None]:
uber_trips_df = pd.DataFrame(columns=[
    'trip_id',
    'session_id',
    'restaurant_name',
    'trip_duration',
    'mileage',
    'delivery_zone',
    'tip',
    'total_earnings'
])

In [None]:
# Build Trips Table

# Step 1: initial Data frame

uber_trips_df = pd.DataFrame(columns=['session_id','restaurant_name', 'trip_duration', 'mileage', 'delivery_zone', 'tip', 'total_earnings'])

In [None]:
# Step 2 Build Function to add Data

def add_uber_trip(uber_trips_df, session_id, restaurant_name, trip_duration, mileage, delivery_zone, tip, total_earnings):
    trip_id = f"S{session_id}-T{len(uber_trips_df) + 1}"


    new_row_trips = {
        'trip_id': trip_id,
        'session_id': session_id,
        'restaurant_name':restaurant_name,
        'trip_duration':trip_duration,
        'mileage':mileage,
        'delivery_zone':delivery_zone,
        'tip':tip,
        'total_earnings': total_earnings
    }
    uber_trips_df.loc[len(uber_trips_df)] = new_row_trips
    return uber_trips_df


In [273]:
# Add rows to trips table
uber_trips_df = add_uber_trip(
    uber_trips_df,
    session_id='4',
    restaurant_name='Joker Liquor' ,
    trip_duration= '1 hour 5 mins',
    mileage= '12.10',
    delivery_zone= 'Orlando',
    tip= '2.00',
    total_earnings='8.68'
)
uber_trips_df

Unnamed: 0,trip_id,session_id,restaurant_name,trip_duration,mileage,delivery_zone,tip,total_earnings
0,S1-T1,1,Purple Ocean Superfood Bar,41 mins 6 seconds,8.85,Orlando,4.34,9.08
1,S1-T2,1,BEI JING RESTAURANT,45 mins 27 seconds,11.91,Orlando,1.65,7.7
2,S1-T3,1,Wendys,11 mins 11 seconds,3.01,Orlando,4.0,6.0
3,S1-T4,1,Las Cazuelas,47 mins 30 seconds,10.67,Orlando,6.35,11.51
4,S1-T5,1,CVS,23 mins 29 seconds,6.32,Orlando,3.89,7.01
5,S1-T6,1,Dunkin,30 mins 4 seconds,6.79,Orlando,3.0,6.52
6,S2-T7,2,El Cilantrillo,26 mins 24 seconds,3.99,Orlando,2.0,4.17
7,S2-T8,2,Family Dollar,24 mins 45 seconds,4.1,Orlando,1.61,6.43
8,S1-T9,2,Papa Johns,14 mins 44 seconds,2.48,Orlando,3.0,5.0
9,S1-T10,2,Walgreens,44 mins 23 seconds,6.91,University,1.81,9.36


In [None]:
uber_trips_df.loc[28, 'delivery_zone'] = "Oviedo"
uber_trips_df


In [None]:
# Erase last row to handle incorrect entries
uber_trips_df = uber_trips_df[:-1]
uber_trips_df = uber_trips_df.copy()

In [268]:
# Save Backup
save_backup(uber_trips_df, "uber_trips_data.csv")

In [269]:
# Fix incorrect trip ids
# Save backup copy in environment

# Step 1: Make a copy to avoid losing data or changing the original

trips_copy = uber_trips_df.copy()
trips_copy

Unnamed: 0,trip_id,session_id,restaurant_name,trip_duration,mileage,delivery_zone,tip,total_earnings
0,S1-T1,1,Purple Ocean Superfood Bar,41 mins 6 seconds,8.85,Orlando,4.34,9.08
1,S1-T2,1,BEI JING RESTAURANT,45 mins 27 seconds,11.91,Orlando,1.65,7.7
2,S1-T3,1,Wendys,11 mins 11 seconds,3.01,Orlando,4.0,6.0
3,S1-T4,1,Las Cazuelas,47 mins 30 seconds,10.67,Orlando,6.35,11.51
4,S1-T5,1,CVS,23 mins 29 seconds,6.32,Orlando,3.89,7.01
5,S1-T6,1,Dunkin,30 mins 4 seconds,6.79,Orlando,3.0,6.52
6,S2-T7,2,El Cilantrillo,26 mins 24 seconds,3.99,Orlando,2.0,4.17
7,S2-T8,2,Family Dollar,24 mins 45 seconds,4.1,Orlando,1.61,6.43
8,S1-T9,2,Papa Johns,14 mins 44 seconds,2.48,Orlando,3.0,5.0
9,S1-T10,2,Walgreens,44 mins 23 seconds,6.91,University,1.81,9.36


In [None]:
uber_trips_df = trips_copy.copy()

In [None]:
# Fix mishandled trip ids

uber_trips_df['trip_id'] = uber_trips_df.apply(
    lambda row: f"S{row['session_id']}-T{row.name + 1}", axis=1
)

In [None]:
trips = pd.read_csv("uber_trips_data.csv")

In [None]:
trips

In [None]:
uber_trips_df['trip_id'] = ['S1-T1','S1-T2','S1-T3','S1-T4','S1-T5','S1-T6','S2-T7','S2-T8','S1-T9','S1-T10','S2-T11','S2-T12','S2-T13',]

In [None]:
uber_trips_df.iloc[6:,1] = 2
uber_trips_df