In [1]:
import psycopg2
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

In [2]:
DB_CONFIG = {
    'host': 'localhost',
    'port': 5432,
    'dbname': 'dev',
    'user': 'adm',
    'password': 'adm'
}

def get_connection():
    """
    Create and return a PostgreSQL connection
    """
    try:
        conn = psycopg2.connect(**DB_CONFIG)
        conn.autocommit = True
        return conn
    except Exception as e:
        print(f"❌ Connection error: {e}")
        return None
    
def execute_query(query, fetch=False):
    """
    Execute a query and optionally fetch results
    """
    conn = get_connection()
    if not conn:
        return None
    
    try:
        cursor = conn.cursor()
        cursor.execute(query)
        
        if fetch:
            results = cursor.fetchall()
            columns = [desc[0] for desc in cursor.description]
            return pd.DataFrame(results, columns=columns)
        else:
            print("✅ Query executed successfully")
            return True
    except Exception as e:
        print(f"❌ Query error: {e}")
        return None
    finally:
        conn.close()

conn = get_connection()
if conn:
    print("✅ Database connection successful")
    conn.close()
else:
    print("❌ Database connection failed")

✅ Database connection successful


In [None]:
print("\n🌍 QUESTION 3 - TIMEZONE FILTER IMPLEMENTATION")
print("=" * 60)

print("""
EXPLANATION OF UTC ↔ TIMEZONE CONVERSION LOGIC (in English):

1) TIMEZONE CONVERSION:
    The data is stored in UTC (without timezone). To filter by the user's "current day",
    we need to convert UTC to the specific timezone.

2) CALCULATING THE CURRENT DAY:
    Determine the start and end of the "current day" in the user's timezone
    (00:00:00 to 23:59:59 in local timezone).

3) CONVERTING TO UTC:
    Convert the day boundaries (start and end) back to UTC to
    compare with the stored data.

4) DAYLIGHT SAVING TIME HANDLING:
    PostgreSQL automatically handles daylight saving transitions
    when using 'AT TIME ZONE'.

5) SARGABLE FILTER:
    Use conditions that allow efficient use of indexes on the 'interval' column.
""")

In [4]:
timezone_function_sql = """
CREATE OR REPLACE FUNCTION filter_current_day_intervals(
    user_timezone TEXT DEFAULT 'UTC',
    target_date DATE DEFAULT CURRENT_DATE
)
RETURNS TABLE(
    agent_id BIGINT,
    domain_id BIGINT,
    "interval" TIMESTAMP WITHOUT TIME ZONE,
    state VARCHAR(40),
    agent_state_time BIGINT,
    local_datetime TIMESTAMP WITHOUT TIME ZONE
) AS $$
DECLARE
    day_start_utc TIMESTAMP WITHOUT TIME ZONE;
    day_end_utc TIMESTAMP WITHOUT TIME ZONE;
    record_count INTEGER;
BEGIN
    -- Calculate UTC boundaries for the target date in user's timezone
    day_start_utc := (target_date AT TIME ZONE user_timezone AT TIME ZONE 'UTC')::TIMESTAMP WITHOUT TIME ZONE;
    day_end_utc := ((target_date + INTERVAL '1 day') AT TIME ZONE user_timezone AT TIME ZONE 'UTC')::TIMESTAMP WITHOUT TIME ZONE;
    
    -- Return filtered results
    RETURN QUERY
    SELECT 
        asi.agent_id,
        asi.domain_id,
        asi."interval",
        asi.state,
        asi.agent_state_time,
        (asi."interval" AT TIME ZONE 'UTC' AT TIME ZONE user_timezone)::TIMESTAMP WITHOUT TIME ZONE as local_datetime
    FROM agent_state_interval asi
    WHERE asi."interval" >= day_start_utc 
      AND asi."interval" < day_end_utc;
    
    -- Log result count
    GET DIAGNOSTICS record_count = ROW_COUNT;
    RAISE NOTICE 'Returned % records', record_count;
END;
$$ LANGUAGE plpgsql;
"""

result = execute_query(timezone_function_sql)
if result:
    print("✅ Timezone function created successfully")

✅ Query executed successfully
✅ Timezone function created successfully


In [5]:
production_test_query = """
-- Example call for Europe/Lisbon timezone on 2024-09-03
SELECT * FROM filter_current_day_intervals('Europe/Lisbon', '2024-09-03'::DATE)
ORDER BY agent_id, interval
LIMIT 15;
"""

In [6]:
execute_rollup_query = "SELECT * FROM process_agent_events_to_intervals();"
rollup_result_df = execute_query(execute_rollup_query, fetch=True)
print("📊 Resultado da execução do rollup:")
display(rollup_result_df)

📊 Resultado da execução do rollup:


Unnamed: 0,processed_records,execution_time_ms
0,140,12


In [7]:
validation_query = """
-- Compare total time from original events vs rollup intervals
WITH original_totals AS (
    SELECT 
        agent_id,
        state,
        SUM(EXTRACT(EPOCH FROM (state_end_datetime - state_start_datetime))) as original_seconds
    FROM agent_event 
    WHERE state_end_datetime IS NOT NULL
    GROUP BY agent_id, state
),
rollup_totals AS (
    SELECT 
        agent_id,
        state,
        SUM(agent_state_time) as rollup_seconds
    FROM agent_state_interval
    GROUP BY agent_id, state
)
SELECT 
    COALESCE(o.agent_id, r.agent_id) as agent_id,
    COALESCE(o.state, r.state) as state,
    COALESCE(o.original_seconds, 0) as original_seconds,
    COALESCE(r.rollup_seconds, 0) as rollup_seconds,
    ABS(COALESCE(o.original_seconds, 0) - COALESCE(r.rollup_seconds, 0)) as difference_seconds
FROM original_totals o
FULL OUTER JOIN rollup_totals r ON o.agent_id = r.agent_id AND o.state = r.state
ORDER BY agent_id, state;
"""

In [8]:
production_df = execute_query(production_test_query, fetch=True)
print("🔍 Resultado da função de timezone:")
display(production_df)

🔍 Resultado da função de timezone:


Unnamed: 0,agent_id,domain_id,interval,state,agent_state_time,local_datetime
0,1001,1,2024-09-03 08:00:00,READY,450,2024-09-03 09:00:00
1,1001,1,2024-09-03 08:00:00,NOT_READY,270,2024-09-03 09:00:00
2,1001,1,2024-09-03 08:00:00,LOGGED_IN,30,2024-09-03 09:00:00
3,1001,1,2024-09-03 08:00:00,ON_CALL,150,2024-09-03 09:00:00
4,1001,1,2024-09-03 08:15:00,ON_CALL,510,2024-09-03 09:15:00
5,1001,1,2024-09-03 08:15:00,READY,315,2024-09-03 09:15:00
6,1001,1,2024-09-03 08:15:00,ACW,75,2024-09-03 09:15:00
7,1001,1,2024-09-03 08:30:00,ON_CALL,645,2024-09-03 09:30:00
8,1001,1,2024-09-03 08:30:00,ACW,165,2024-09-03 09:30:00
9,1001,1,2024-09-03 08:30:00,ON_HOLD,90,2024-09-03 09:30:00
