In [1]:
%reload_ext sql
%config SqlMagic.autocommit=False
%config SqlMagic.autolimit=0
%config SqlMagic.autopandas=True
%config SqlMagic.displaylimit=200

In [2]:
%sql trino://localhost:9090/cuebiq/

'Connected: @cuebiq/'

In [3]:
pip install python-geohash

Note: you may need to restart the kernel to use updated packages.


In [4]:
import os
import time
import geohash
import logging
import pandas as pd
from datetime import datetime, timedelta
from sqlalchemy import create_engine

In [5]:
# SQL engine
from trino.dbapi import connect 
from sqlalchemy import create_engine
import pandas as pd
import time

class TrinoEngine():
    def __init__(self):
        conn = connect(
            host="localhost",
            port=9090,
            catalog="cuebiq"
        )
        self.cur = conn.cursor()
        self.engine = create_engine("trino://localhost:9090/cuebiq/")
    
    def execute_statement(self, query:str) -> list:
        """
        Create and drop statements.
        """
        self.cur.execute(query)
        return self.cur.fetchall()
    
    def read_sql(self, query:str) -> pd.DataFrame: 
        """
        Select and insert into operations.
        """
        return pd.read_sql(query, self.engine)

sql_engine = TrinoEngine()

In [6]:
schema_name = {'cda': 'cuebiq.paas_cda_pe_v3'}
pe_tj_table = f"{schema_name['cda']}.trajectory_uplevelled"

# Export to local

In [11]:
# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

In [12]:
# Function to process data for a specific date
def process_data_for_date(event_date, country_code, export_path):
    errors = []  # List to keep track of errors
    
    # Define the export file paths
    csv3_file = f"od_{country_code.lower()}2_agg3_3h.csv"
    csv5_file = f"od_{country_code.lower()}2_agg5_3h.csv"
    csv3_file_path = f"{export_path}{csv3_file}"
    csv5_file_path = f"{export_path}{csv5_file}"

    # Loop through 24 hours in increments of 3 hours
    for start_hour in range(0, 24, 3):
        end_hour = start_hour + 3  # end hour for the 3-hour interval
        logging.info(f"Querying data for date: {event_date}, hours: {start_hour}-{end_hour}")

        try:
            # Fetch the data for the specified event date and country code
            pe_tj_df3 = sql_engine.read_sql(
                f"""
                SELECT 
                    cuebiq_id,
                    duration_minutes,
                    length_meters,
                    number_of_points,
                    TRY(date_parse(substr(start_zoned_datetime, 1, 19), '%Y-%m-%dT%H:%i:%s')) AS event_datetime_local,
                    EXTRACT(HOUR FROM TRY(date_parse(substr(start_zoned_datetime, 1, 19), '%Y-%m-%dT%H:%i:%s'))) AS event_hour,
                    geohash_encode(start_lat, start_lng, 5) AS start_geohash5,
                    geohash_encode(start_lat, start_lng, 3) AS start_geohash3,
                    geohash_encode(end_lat, end_lng, 5) AS end_geohash5,
                    geohash_encode(end_lat, end_lng, 3) AS end_geohash3,
                    DATE_FORMAT(date_parse(substr(start_zoned_datetime, 1, 10), '%Y-%m-%d'), '%Y%m%d') AS local_date
                FROM cuebiq.paas_cda_pe_v3.trajectory_uplevelled
                WHERE 
                    event_date = {event_date}
                    AND end_country = '{country_code}' 
                    AND start_country = '{country_code}' 
                """
            )

            # Filter the DataFrame for the current 3-hour interval
            logging.info(f"Processing data for date: {event_date}, hours: {start_hour}-{end_hour}")
            filtered_df = pe_tj_df3[(pe_tj_df3['event_hour'] >= start_hour) & (pe_tj_df3['event_hour'] < end_hour)]

            # Add user numbers to the aggregated data for geohash3
            aggregated_df3 = filtered_df.groupby(['start_geohash3', 'end_geohash3', 'local_date']).agg({
                'cuebiq_id': 'count',
                'duration_minutes': ['mean', 'median', 'std'],
                'length_meters': ['mean', 'median', 'std'],
                'number_of_points': ['mean', 'median', 'std']
            }).reset_index()

            # Flatten the MultiIndex columns
            aggregated_df3.columns = ['start_geohash3', 'end_geohash3', 'local_date', 'trip_count', 
                                      'm_duration_min', 'mdn_duration_min', 'sd_duration_min', 
                                      'm_length_m', 'mdn_length_m', 'sd_length_m',
                                      'm_points_no', 'mdn_points_no', 'sd_points_no']

            # Add the local_time column
            aggregated_df3['local_time'] = aggregated_df3['local_date'].astype(str) + \
                                           f" {start_hour:02d}:00:00 - {end_hour:02d}:00:00"

            # Filter out rows where trip_count is less than or equal to 9
            aggregated_df3 = aggregated_df3[aggregated_df3['trip_count'] > 9]

            # Append the results to the CSV file for geohash3
            logging.info(f"Exporting aggregated data (geohash3) for date: {event_date}, hours: {start_hour}-{end_hour}")
            if not os.path.isfile(csv3_file_path):
                aggregated_df3.to_csv(csv3_file_path, index=False)
            else:
                aggregated_df3.to_csv(csv3_file_path, mode='a', header=False, index=False)

            # Add user numbers to the aggregated data for geohash5
            aggregated_df5 = filtered_df.groupby(['start_geohash5', 'end_geohash5', 'local_date']).agg({
                'cuebiq_id': 'count',
                'duration_minutes': ['mean', 'median', 'std'],
                'length_meters': ['mean', 'median', 'std'],
                'number_of_points': ['mean', 'median', 'std']
            }).reset_index()

            # Flatten the MultiIndex columns
            aggregated_df5.columns = ['start_geohash5', 'end_geohash5', 'local_date', 'trip_count', 
                                      'm_duration_min', 'mdn_duration_min', 'sd_duration_min', 
                                      'm_length_m', 'mdn_length_m', 'sd_length_m',
                                      'm_points_no', 'mdn_points_no', 'sd_points_no']

            # Add the local_time column
            aggregated_df5['local_time'] = aggregated_df5['local_date'].astype(str) + \
                                           f" {start_hour:02d}:00:00 - {end_hour:02d}:00:00"

            # Filter out rows where trip_count is less than or equal to 9
            aggregated_df5 = aggregated_df5[aggregated_df5['trip_count'] > 9]

            # Append the results to the CSV file for geohash5
            logging.info(f"Exporting aggregated data (geohash5) for date: {event_date}, hours: {start_hour}-{end_hour}")
            if not os.path.isfile(csv5_file_path):
                aggregated_df5.to_csv(csv5_file_path, index=False)
            else:
                aggregated_df5.to_csv(csv5_file_path, mode='a', header=False, index=False)
        
        except Exception as e:
            error_message = f"Error processing date: {event_date}, hours: {start_hour}-{end_hour} - {str(e)}"
            logging.error(error_message)
            errors.append(error_message)
    
    return errors

# Function to process data for a range of dates
def process_data_for_date_range(start_date, end_date, country_code, export_path):
    current_date = start_date
    all_errors = []
    while current_date <= end_date:
        event_date = current_date.strftime("%Y%m%d")
        logging.info(f"Processing data for date: {event_date}")
        errors = process_data_for_date(event_date, country_code, export_path)
        all_errors.extend(errors)
        current_date += timedelta(days=1)
    
    # Log any errors that occurred
    if all_errors:
        logging.info("Errors occurred during processing:")
        for error in all_errors:
            logging.info(error)

In [13]:
# Dates to process (example: a range of dates)
start_date = datetime.strptime("20190929", "%Y%m%d")
end_date = datetime.strptime("20191231", "%Y%m%d")

# Process each date in the range
current_date = start_date
while current_date <= end_date:
    event_date = current_date.strftime("%Y%m%d")
    logging.info(f"Processing data for date: {event_date}")
    process_data_for_date(event_date, 'MX', '/home/jovyan/Data/TJ/3h/')
    current_date += timedelta(days=1)

2024-07-03 06:27:51,197 - INFO - Processing data for date: 20190929
2024-07-03 06:27:51,197 - INFO - Querying data for date: 20190929, hours: 0-3
2024-07-03 06:28:15,005 - INFO - Processing data for date: 20190929, hours: 0-3
2024-07-03 06:28:15,030 - INFO - Exporting aggregated data (geohash3) for date: 20190929, hours: 0-3
2024-07-03 06:28:15,064 - INFO - Exporting aggregated data (geohash5) for date: 20190929, hours: 0-3
2024-07-03 06:28:15,083 - INFO - Querying data for date: 20190929, hours: 3-6
2024-07-03 06:28:53,601 - INFO - Processing data for date: 20190929, hours: 3-6
2024-07-03 06:28:53,627 - INFO - Exporting aggregated data (geohash3) for date: 20190929, hours: 3-6
2024-07-03 06:28:53,657 - INFO - Exporting aggregated data (geohash5) for date: 20190929, hours: 3-6
2024-07-03 06:28:53,673 - INFO - Querying data for date: 20190929, hours: 6-9
2024-07-03 06:29:19,764 - INFO - Processing data for date: 20190929, hours: 6-9
2024-07-03 06:29:19,799 - INFO - Exporting aggregated 

# Fillin Gaps

In [20]:
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Function to process data for a specific date and time intervals
def process_data_for_date(event_date, country_code, export_path, start_hour, end_hour):
    # Define the export file paths
    csv3_file = f"od_{country_code.lower()}4_agg3_3h.csv"
    csv5_file = f"od_{country_code.lower()}4_agg5_3h.csv"
    csv3_file_path = f"{export_path}{csv3_file}"
    csv5_file_path = f"{export_path}{csv5_file}"

    # Loop through the specified hours in increments of 3 hours
    for hour in range(start_hour, end_hour, 3):
        next_hour = hour + 3  # end hour for the 3-hour interval
        logging.info(f"Querying data for date: {event_date}, hours: {hour}-{next_hour}")

        # Fetch the data for the specified event date and country code
        pe_tj_df3 = sql_engine.read_sql(
            f"""
            SELECT 
                cuebiq_id,
                duration_minutes,
                length_meters,
                number_of_points,
                TRY(date_parse(substr(start_zoned_datetime, 1, 19), '%Y-%m-%dT%H:%i:%s')) AS event_datetime_local,
                EXTRACT(HOUR FROM TRY(date_parse(substr(start_zoned_datetime, 1, 19), '%Y-%m-%dT%H:%i:%s'))) AS event_hour,
                geohash_encode(start_lat, start_lng, 5) AS start_geohash5,
                geohash_encode(start_lat, start_lng, 3) AS start_geohash3,
                geohash_encode(end_lat, end_lng, 5) AS end_geohash5,
                geohash_encode(end_lat, end_lng, 3) AS end_geohash3,
                DATE_FORMAT(date_parse(substr(start_zoned_datetime, 1, 10), '%Y-%m-%d'), '%Y%m%d') AS local_date
            FROM cuebiq.paas_cda_pe_v3.trajectory_uplevelled
            WHERE 
                event_date = {event_date}
                AND end_country = '{country_code}' 
                AND start_country = '{country_code}' 
            """
        )

        # Filter the DataFrame for the current 3-hour interval
        logging.info(f"Processing data for date: {event_date}, hours: {hour}-{next_hour}")
        filtered_df = pe_tj_df3[(pe_tj_df3['event_hour'] >= hour) & (pe_tj_df3['event_hour'] < next_hour)]

        # Add user numbers to the aggregated data for geohash3
        aggregated_df3 = filtered_df.groupby(['start_geohash3', 'end_geohash3', 'local_date']).agg({
            'cuebiq_id': 'count',
            'duration_minutes': ['mean', 'median', 'std'],
            'length_meters': ['mean', 'median', 'std'],
            'number_of_points': ['mean', 'median', 'std']
        }).reset_index()

        # Flatten the MultiIndex columns
        aggregated_df3.columns = ['start_geohash3', 'end_geohash3', 'local_date', 'trip_count', 
                                  'm_duration_min', 'mdn_duration_min', 'sd_duration_min', 
                                  'm_length_m', 'mdn_length_m', 'sd_length_m',
                                  'm_points_no', 'mdn_points_no', 'sd_points_no']

        # Add the local_time column
        aggregated_df3['local_time'] = aggregated_df3['local_date'].astype(str) + \
                                       f" {hour:02d}:00:00 - {next_hour:02d}:00:00"

        # Filter out rows where trip_count is less than or equal to 9
        aggregated_df3 = aggregated_df3[aggregated_df3['trip_count'] > 9]

        # Append the results to the CSV file for geohash3
        logging.info(f"Exporting aggregated data (geohash3) for date: {event_date}, hours: {hour}-{next_hour}")
        if not os.path.isfile(csv3_file_path):
            aggregated_df3.to_csv(csv3_file_path, index=False)
        else:
            aggregated_df3.to_csv(csv3_file_path, mode='a', header=False, index=False)

        # Add user numbers to the aggregated data for geohash5
        aggregated_df5 = filtered_df.groupby(['start_geohash5', 'end_geohash5', 'local_date']).agg({
            'cuebiq_id': 'count',
            'duration_minutes': ['mean', 'median', 'std'],
            'length_meters': ['mean', 'median', 'std'],
            'number_of_points': ['mean', 'median', 'std']
        }).reset_index()

        # Flatten the MultiIndex columns
        aggregated_df5.columns = ['start_geohash5', 'end_geohash5', 'local_date', 'trip_count', 
                                  'm_duration_min', 'mdn_duration_min', 'sd_duration_min', 
                                  'm_length_m', 'mdn_length_m', 'sd_length_m',
                                  'm_points_no', 'mdn_points_no', 'sd_points_no']

        # Add the local_time column
        aggregated_df5['local_time'] = aggregated_df5['local_date'].astype(str) + \
                                       f" {hour:02d}:00:00 - {next_hour:02d}:00:00"

        # Filter out rows where trip_count is less than or equal to 9
        aggregated_df5 = aggregated_df5[aggregated_df5['trip_count'] > 9]

        # Append the results to the CSV file for geohash5
        logging.info(f"Exporting aggregated data (geohash5) for date: {event_date}, hours: {hour}-{next_hour}")
        if not os.path.isfile(csv5_file_path):
            aggregated_df5.to_csv(csv5_file_path, index=False)
        else:
            aggregated_df5.to_csv(csv5_file_path, mode='a', header=False, index=False)


In [21]:
# Specify the date and initial time interval
event_date = "20190421"
start_hour = 0
end_hour = 24  # Process until the end of the day
country_code = 'MX'
export_path = '/home/jovyan/Data/TJ/3h/'

# Process data for the specified date and time intervals
process_data_for_date(event_date, country_code, export_path, start_hour, end_hour)


2024-07-03 18:09:46,186 - INFO - Querying data for date: 20190421, hours: 0-3
2024-07-03 18:10:34,379 - INFO - Processing data for date: 20190421, hours: 0-3
2024-07-03 18:10:34,411 - INFO - Exporting aggregated data (geohash3) for date: 20190421, hours: 0-3
2024-07-03 18:10:34,453 - INFO - Exporting aggregated data (geohash5) for date: 20190421, hours: 0-3
2024-07-03 18:10:34,482 - INFO - Querying data for date: 20190421, hours: 3-6
2024-07-03 18:11:14,028 - INFO - Processing data for date: 20190421, hours: 3-6
2024-07-03 18:11:14,063 - INFO - Exporting aggregated data (geohash3) for date: 20190421, hours: 3-6
2024-07-03 18:11:14,095 - INFO - Exporting aggregated data (geohash5) for date: 20190421, hours: 3-6
2024-07-03 18:11:14,109 - INFO - Querying data for date: 20190421, hours: 6-9
2024-07-03 18:11:44,131 - INFO - Processing data for date: 20190421, hours: 6-9
2024-07-03 18:11:44,176 - INFO - Exporting aggregated data (geohash3) for date: 20190421, hours: 6-9
2024-07-03 18:11:44,2

In [10]:
# Specify the date and initial time interval
event_date = "20190122"
start_hour = 9
end_hour = 12  # Process until the end of the day
country_code = 'MX'
export_path = '/home/jovyan/Data/TJ/3h/'

# Process data for the specified date and time intervals
process_data_for_date(event_date, country_code, export_path, start_hour, end_hour)


2024-07-03 17:42:42,957 - INFO - Querying data for date: 20190122, hours: 9-12
2024-07-03 17:43:23,593 - INFO - Processing data for date: 20190122, hours: 9-12
2024-07-03 17:43:23,667 - INFO - Exporting aggregated data (geohash3) for date: 20190122, hours: 9-12
2024-07-03 17:43:23,751 - INFO - Exporting aggregated data (geohash5) for date: 20190122, hours: 9-12


In [11]:
# Specify the date and initial time interval
event_date = "20190128"
start_hour = 21
end_hour = 24  # Process until the end of the day
country_code = 'MX'
export_path = '/home/jovyan/Data/TJ/3h/'

# Process data for the specified date and time intervals
process_data_for_date(event_date, country_code, export_path, start_hour, end_hour)


2024-07-03 17:43:23,902 - INFO - Querying data for date: 20190128, hours: 21-24
2024-07-03 17:44:05,942 - INFO - Processing data for date: 20190128, hours: 21-24
2024-07-03 17:44:05,972 - INFO - Exporting aggregated data (geohash3) for date: 20190128, hours: 21-24
2024-07-03 17:44:06,007 - INFO - Exporting aggregated data (geohash5) for date: 20190128, hours: 21-24


In [12]:
# Specify the date and initial time interval
event_date = "20190202"
start_hour = 9
end_hour = 15  # Process until the end of the day
country_code = 'MX'
export_path = '/home/jovyan/Data/TJ/3h/'

# Process data for the specified date and time intervals
process_data_for_date(event_date, country_code, export_path, start_hour, end_hour)


2024-07-03 17:44:06,121 - INFO - Querying data for date: 20190202, hours: 9-12
2024-07-03 17:44:33,477 - INFO - Processing data for date: 20190202, hours: 9-12
2024-07-03 17:44:33,554 - INFO - Exporting aggregated data (geohash3) for date: 20190202, hours: 9-12
2024-07-03 17:44:33,662 - INFO - Exporting aggregated data (geohash5) for date: 20190202, hours: 9-12
2024-07-03 17:44:33,701 - INFO - Querying data for date: 20190202, hours: 12-15
2024-07-03 17:44:53,329 - INFO - Processing data for date: 20190202, hours: 12-15
2024-07-03 17:44:53,514 - INFO - Exporting aggregated data (geohash3) for date: 20190202, hours: 12-15
2024-07-03 17:44:53,709 - INFO - Exporting aggregated data (geohash5) for date: 20190202, hours: 12-15


In [13]:
# Specify the date and initial time interval
event_date = "20190405"
start_hour = 6
end_hour = 9  # Process until the end of the day
country_code = 'MX'
export_path = '/home/jovyan/Data/TJ/3h/'

# Process data for the specified date and time intervals
process_data_for_date(event_date, country_code, export_path, start_hour, end_hour)


2024-07-03 17:44:53,983 - INFO - Querying data for date: 20190405, hours: 6-9
2024-07-03 17:45:17,888 - INFO - Processing data for date: 20190405, hours: 6-9
2024-07-03 17:45:17,954 - INFO - Exporting aggregated data (geohash3) for date: 20190405, hours: 6-9
2024-07-03 17:45:18,049 - INFO - Exporting aggregated data (geohash5) for date: 20190405, hours: 6-9


In [14]:
# Specify the date and initial time interval
event_date = "20190807"
start_hour = 12
end_hour = 18  # Process until the end of the day
country_code = 'MX'
export_path = '/home/jovyan/Data/TJ/3h/'

# Process data for the specified date and time intervals
process_data_for_date(event_date, country_code, export_path, start_hour, end_hour)


2024-07-03 17:45:18,220 - INFO - Querying data for date: 20190807, hours: 12-15
2024-07-03 17:46:01,417 - INFO - Processing data for date: 20190807, hours: 12-15
2024-07-03 17:46:01,516 - INFO - Exporting aggregated data (geohash3) for date: 20190807, hours: 12-15
2024-07-03 17:46:01,626 - INFO - Exporting aggregated data (geohash5) for date: 20190807, hours: 12-15
2024-07-03 17:46:01,661 - INFO - Querying data for date: 20190807, hours: 15-18
2024-07-03 17:46:39,240 - INFO - Processing data for date: 20190807, hours: 15-18
2024-07-03 17:46:39,347 - INFO - Exporting aggregated data (geohash3) for date: 20190807, hours: 15-18
2024-07-03 17:46:39,444 - INFO - Exporting aggregated data (geohash5) for date: 20190807, hours: 15-18


In [15]:
# Specify the date and initial time interval
event_date = "20191007"
start_hour = 0
end_hour = 3  # Process until the end of the day
country_code = 'MX'
export_path = '/home/jovyan/Data/TJ/3h/'

# Process data for the specified date and time intervals
process_data_for_date(event_date, country_code, export_path, start_hour, end_hour)


2024-07-03 17:46:39,609 - INFO - Querying data for date: 20191007, hours: 0-3
2024-07-03 17:47:27,117 - INFO - Processing data for date: 20191007, hours: 0-3
2024-07-03 17:47:27,138 - INFO - Exporting aggregated data (geohash3) for date: 20191007, hours: 0-3
2024-07-03 17:47:27,166 - INFO - Exporting aggregated data (geohash5) for date: 20191007, hours: 0-3


In [16]:
# Specify the date and initial time interval
event_date = "20191021"
start_hour = 21
end_hour = 24  # Process until the end of the day
country_code = 'MX'
export_path = '/home/jovyan/Data/TJ/3h/'

# Process data for the specified date and time intervals
process_data_for_date(event_date, country_code, export_path, start_hour, end_hour)


2024-07-03 17:47:27,273 - INFO - Querying data for date: 20191021, hours: 21-24
2024-07-03 17:47:54,068 - INFO - Processing data for date: 20191021, hours: 21-24
2024-07-03 17:47:54,079 - INFO - Exporting aggregated data (geohash3) for date: 20191021, hours: 21-24
2024-07-03 17:47:54,090 - INFO - Exporting aggregated data (geohash5) for date: 20191021, hours: 21-24


In [17]:
# Specify the date and initial time interval
event_date = "20191022"
start_hour = 0
end_hour = 12  # Process until the end of the day
country_code = 'MX'
export_path = '/home/jovyan/Data/TJ/3h/'

# Process data for the specified date and time intervals
process_data_for_date(event_date, country_code, export_path, start_hour, end_hour)


2024-07-03 17:47:54,135 - INFO - Querying data for date: 20191022, hours: 0-3
2024-07-03 17:48:06,186 - INFO - Processing data for date: 20191022, hours: 0-3
2024-07-03 17:48:06,197 - INFO - Exporting aggregated data (geohash3) for date: 20191022, hours: 0-3
2024-07-03 17:48:06,206 - INFO - Exporting aggregated data (geohash5) for date: 20191022, hours: 0-3
2024-07-03 17:48:06,208 - INFO - Querying data for date: 20191022, hours: 3-6
2024-07-03 17:48:17,006 - INFO - Processing data for date: 20191022, hours: 3-6
2024-07-03 17:48:17,017 - INFO - Exporting aggregated data (geohash3) for date: 20191022, hours: 3-6
2024-07-03 17:48:17,064 - INFO - Exporting aggregated data (geohash5) for date: 20191022, hours: 3-6
2024-07-03 17:48:17,066 - INFO - Querying data for date: 20191022, hours: 6-9
2024-07-03 17:48:27,480 - INFO - Processing data for date: 20191022, hours: 6-9
2024-07-03 17:48:27,491 - INFO - Exporting aggregated data (geohash3) for date: 20191022, hours: 6-9
2024-07-03 17:48:27,5

In [18]:
# Specify the date and initial time interval
event_date = "20191215"
start_hour = 0
end_hour = 3  # Process until the end of the day
country_code = 'MX'
export_path = '/home/jovyan/Data/TJ/3h/'

# Process data for the specified date and time intervals
process_data_for_date(event_date, country_code, export_path, start_hour, end_hour)


2024-07-03 17:48:44,676 - INFO - Querying data for date: 20191215, hours: 0-3
2024-07-03 17:49:21,774 - INFO - Processing data for date: 20191215, hours: 0-3
2024-07-03 17:49:21,796 - INFO - Exporting aggregated data (geohash3) for date: 20191215, hours: 0-3
2024-07-03 17:49:21,831 - INFO - Exporting aggregated data (geohash5) for date: 20191215, hours: 0-3


In [19]:
# Specify the date and initial time interval
event_date = "20191231"
start_hour = 0
end_hour = 9  # Process until the end of the day
country_code = 'MX'
export_path = '/home/jovyan/Data/TJ/3h/'

# Process data for the specified date and time intervals
process_data_for_date(event_date, country_code, export_path, start_hour, end_hour)


2024-07-03 17:49:21,902 - INFO - Querying data for date: 20191231, hours: 0-3
2024-07-03 17:49:54,972 - INFO - Processing data for date: 20191231, hours: 0-3
2024-07-03 17:49:54,983 - INFO - Exporting aggregated data (geohash3) for date: 20191231, hours: 0-3
2024-07-03 17:49:54,992 - INFO - Exporting aggregated data (geohash5) for date: 20191231, hours: 0-3
2024-07-03 17:49:54,995 - INFO - Querying data for date: 20191231, hours: 3-6
2024-07-03 17:50:12,128 - INFO - Processing data for date: 20191231, hours: 3-6
2024-07-03 17:50:12,139 - INFO - Exporting aggregated data (geohash3) for date: 20191231, hours: 3-6
2024-07-03 17:50:12,150 - INFO - Exporting aggregated data (geohash5) for date: 20191231, hours: 3-6
2024-07-03 17:50:12,152 - INFO - Querying data for date: 20191231, hours: 6-9
2024-07-03 17:50:28,666 - INFO - Processing data for date: 20191231, hours: 6-9
2024-07-03 17:50:28,677 - INFO - Exporting aggregated data (geohash3) for date: 20191231, hours: 6-9
2024-07-03 17:50:28,6