In [1]:
#Conversion and dropoff proper code
import pandas as pd
import numpy as np
from tqdm import tqdm

par = pd.read_parquet(r"/Users/daviddangol/Documents/monthly report/domestic airlines/data/query-results_query-with-param_hotel_airlines_intl_bq_param_airlines_only_10-15-2025_11-01-2025000000000000.parquet")

# Assuming your DataFrame is named 'par'

# 1. Preprocess data
par['event_timestamp'] = pd.to_datetime(par['event_timestamp'], dayfirst=True)
par['event_date'] = par['event_timestamp'].dt.date

# Define journey mapping
event_to_journey_step = {
    'domesticFlightSearch': ('Domestic Airlines', 1, 'Search'),
    'domesticFlightDetail': ('Domestic Airlines', 2, 'Detail'),
    'domesticFlightPassengerDetail': ('Domestic Airlines', 3, 'Passenger Detail'),
    'domesticFlightConfirmation': ('Domestic Airlines', 4, 'Confirmation'),
    'airlines_payment': ('Domestic Airlines', 5, 'Payment'),
    'hotelV2Search': ('Hotel', 1, 'Search'),
    'hotelV2Listing': ('Hotel', 2, 'Listing'),
    'hotelV2Availability': ('Hotel', 3, 'Availability'),
    'hotelV2RoomSelect': ('Hotel', 4, 'Room Select'),
    'hotelV2Book': ('Hotel', 5, 'Book'),
    'hotelV2ReturnBooking': ('Hotel', 5, 'Book'),
    'hotelV2Confirm': ('Hotel', 6, 'Confirm'),
    'internationalFlightSearch': ('International Airlines', 1, 'Search'),
    'internationalflightDetail': ('International Airlines', 2, 'Detail'),
    'internationalPassengerDetail': ('International Airlines', 3, 'Passenger Detail'),
    'internationalFlightBook': ('International Airlines', 4, 'Book'),
}

# Add journey, step, and step_name columns
par['journey'] = par['event_name'].map(lambda x: event_to_journey_step.get(x, (None, None, None))[0])
par['step'] = par['event_name'].map(lambda x: event_to_journey_step.get(x, (None, None, None))[1])
par['step_name'] = par['event_name'].map(lambda x: event_to_journey_step.get(x, (None, None, None))[2])
par = par[par['journey'].notna()]

# 2. Filter first occurrence per user/day/step
par = par.sort_values(['user_id', 'event_timestamp'])
par = par.drop_duplicates(subset=['user_id', 'journey', 'event_date', 'step'], keep='first')

# 3. Create a step number to step name mapping
step_name_mapping = {
    journey: {step: name for event, (j, step, name) in event_to_journey_step.items() if j == journey}
    for journey in par['journey'].unique()
}

# 4. Calculate Funnel Metrics
def calculate_funnel_metrics():
    results = []
    journeys = par['journey'].unique()
    
    for journey in tqdm(journeys, desc="Processing Journeys", unit="journey"):
        journey_data = par[par['journey'] == journey]
        steps = sorted(journey_data['step'].unique())
        
        # Track users who have completed all previous steps
        users_at_previous_step = set(journey_data['user_id'].unique())  # All users in the journey
        
        for i in range(len(steps) - 1):
            current_step = steps[i]
            next_step = steps[i + 1]
            
            # Users at current step (must have completed all previous steps)
            users_at_current = journey_data[
                (journey_data['step'] == current_step) &
                (journey_data['user_id'].isin(users_at_previous_step))
            ]['user_id'].nunique()
            
            # Users at next step (must have completed current step)
            users_at_next = journey_data[
                (journey_data['step'] == next_step) &
                (journey_data['user_id'].isin(
                    journey_data[
                        (journey_data['step'] == current_step) &
                        (journey_data['user_id'].isin(users_at_previous_step))
                    ]['user_id']
                ))
            ]['user_id'].nunique()
            
            # Update users_at_previous_step for the next iteration
            users_at_previous_step = set(
                journey_data[
                    (journey_data['step'] == current_step) &
                    (journey_data['user_id'].isin(users_at_previous_step))
                ]['user_id']
            )
            
            # Conversion and drop-off rates
            conversion_rate = users_at_next / users_at_current if users_at_current > 0 else 0
            drop_off_rate = 1 - conversion_rate
            
            # Get step names
            current_step_name = step_name_mapping[journey][current_step]
            next_step_name = step_name_mapping[journey][next_step]
            
            results.append({
                'funnel_category': journey,
                'from_step': current_step_name,
                'to_step': next_step_name,
                'users_at_step_i': users_at_current,
                'users_at_step_i+1': users_at_next,
                'conversion_rate': conversion_rate,
                'drop_off_rate': drop_off_rate
            })
    
    return pd.DataFrame(results)

# 5. Generate Funnel Metrics
funnel_metrics = calculate_funnel_metrics()

# 6. Print Results
print(funnel_metrics.to_string(index=False))

Processing Journeys: 100%|██████████| 1/1 [00:01<00:00,  1.27s/journey]

  funnel_category        from_step          to_step  users_at_step_i  users_at_step_i+1  conversion_rate  drop_off_rate
Domestic Airlines           Search           Detail            63290              16367         0.258603       0.741397
Domestic Airlines           Detail Passenger Detail            16367                 28         0.001711       0.998289
Domestic Airlines Passenger Detail     Confirmation               28                  5         0.178571       0.821429





In [2]:
 funnel_metrics.to_clipboard(index=False)