<a href="https://colab.research.google.com/github/cbonnin88/RailFlow/blob/main/reverse_ETL_amplitude.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [14]:
import json
import time
import pandas as pd
from google.cloud import bigquery
from google.colab import auth

In [6]:
auth.authenticate_user()

In [2]:
# 1. Setup
project_id = 'railflow-484310'
client = bigquery.Client(project=project_id, location='europe-west9')

In [10]:
# 2. The Query
query = """
SELECT
  user_id,
  search_at,
  origin,
  destination,
  price,
  ticket_class,
  is_converted
FROM `railflow-484310.dbt_railflow_dev.int_search_bookings`
WHERE is_converted = TRUE
LIMIT 5
"""

In [8]:
df_events = client.query(query).to_dataframe()
print('Data fetch from BigQuery.')

Data fetch from BigQuery.


In [15]:
# 3. Transforming & Simulate API Call
# Amplitude expects a specific JSON structure
def send_to_amplitude_simulation(row):
  # Event 1: The Search
  search_event = {
      'user_id':row['user_id'],
      'event_type': 'search_route',
      'time': str(row['search_at']),
      'event_properties': {
          'origin': row['origin'],
          'destination': row['destination'],
          'source': 'bigquery_reverse_etl'
      }
  }

  print(f'[POST] api.amplitude.com/2/httpapi --> Sending search_route for {row['user_id'][:8]}')

  if row['is_converted']:
    booking_event = {
        'user_id': row['user_id'],
        'event_type': 'complete_booking',
        # Fake timestamp: 5 mins after search
        'time': str(row['search_at'] + pd.Timedelta(minutes=5)),
        'event_properties': {
            'amount': row['price'],
            'ticket_class': row['ticket_class'],
            'route': f'{row['origin']} to {row['destination']}'
        }
    }
    print(f'[POST] api.amplitude.com/2/httpapi --> Sending complete_booking for {row['user_id'][:8]}')

  print('-' * 50)

In [16]:
print('\n Starting Sync Job....\n')
for index, row in df_events.iterrows():
  send_to_amplitude_simulation(row)
  time.sleep(0.5)

print('\n Sync Job Complete')


 Starting Sync Job....

[POST] api.amplitude.com/2/httpapi --> Sending search_route for 78a61a2d
[POST] api.amplitude.com/2/httpapi --> Sending complete_booking for 78a61a2d
--------------------------------------------------
[POST] api.amplitude.com/2/httpapi --> Sending search_route for fe6c2362
[POST] api.amplitude.com/2/httpapi --> Sending complete_booking for fe6c2362
--------------------------------------------------
[POST] api.amplitude.com/2/httpapi --> Sending search_route for c6573212
[POST] api.amplitude.com/2/httpapi --> Sending complete_booking for c6573212
--------------------------------------------------
[POST] api.amplitude.com/2/httpapi --> Sending search_route for 2d79fc2a
[POST] api.amplitude.com/2/httpapi --> Sending complete_booking for 2d79fc2a
--------------------------------------------------
[POST] api.amplitude.com/2/httpapi --> Sending search_route for 2d79fc2a
[POST] api.amplitude.com/2/httpapi --> Sending complete_booking for 2d79fc2a
---------------------