In [2]:
import apache_beam as beam
from apache_beam.options.pipeline_options import PipelineOptions
from beam_postgres import splitters
from beam_postgres.io import ReadFromPostgres
from datetime import datetime

class FilterWeekdays(beam.DoFn):
    def process(self, element):
        # Get the datetime from the element
        dt = element['datetime']
        
        # Check if the day of the week is a weekday (0-4 are Monday-Friday)
        if dt.weekday() < 5:
            yield element

with beam.Pipeline(options=PipelineOptions()) as p:
    read_from_postgres = ReadFromPostgres(
            query="SELECT * FROM public.your_table_name;",
            host="localhost",
            database="transport",
            user="postgres",
            password="postgres",
            splitter=splitters.NoSplitter()  # you can select how to split query for performance
    )

    # Read data from PostgreSQL and store in a PCollection named 'data'
    data = p | "ReadFromPostgres" >> read_from_postgres

    # Use the 'data' PCollection and filter out weekends
    filtered_data = data | "FilterWeekdays" >> beam.ParDo(FilterWeekdays())

    # Output filtered data to stdout or other downstream steps
    (
        filtered_data
        | "WriteToStdout" >> beam.Map(print)
        # Add more pipeline steps here
    )


INFO:beam_postgres.client:Successfully execute query: EXPLAIN SELECT * FROM (SELECT * FROM public.your_table_name) as subq
INFO:beam_postgres.client:Successfully execute query: SELECT * FROM public.your_table_name


{'datetime': datetime.datetime(2019, 1, 31, 21, 15), 'street_time': 1266.0, 'count': 21, 'velocity': 82.0952380952381}
{'datetime': datetime.datetime(2019, 1, 4, 6, 0), 'street_time': 145.0, 'count': 2, 'velocity': 14.5}
{'datetime': datetime.datetime(2019, 2, 12, 12, 30), 'street_time': 2188.0, 'count': 10, 'velocity': 33.5}
{'datetime': datetime.datetime(2019, 1, 8, 3, 15), 'street_time': 470.0, 'count': 2, 'velocity': 9.5}
{'datetime': datetime.datetime(2019, 1, 8, 6, 15), 'street_time': 2818.0, 'count': 65, 'velocity': 69.61538461538461}
{'datetime': datetime.datetime(2019, 1, 8, 7, 45), 'street_time': 10.0, 'count': 1, 'velocity': 23.0}
{'datetime': datetime.datetime(2019, 2, 14, 16, 15), 'street_time': 213.0, 'count': 38, 'velocity': 30.07111528822055}
{'datetime': datetime.datetime(2019, 1, 18, 7, 30), 'street_time': 663.0, 'count': 74, 'velocity': 74.29054054054055}
{'datetime': datetime.datetime(2019, 1, 10, 2, 45), 'street_time': 1285.0, 'count': 19, 'velocity': 83.0}
{'datet