In [1]:
import apache_beam as beam
from apache_beam.options.pipeline_options import PipelineOptions
from beam_postgres import splitters
from beam_postgres.io import ReadFromPostgres
from datetime import datetime

class FilterWeekdays(beam.DoFn):
    def process(self, element):
        # Get the datetime from the element
        dt = element['datetime']
        
        # Check if the day of the week is a weekday (0-4 are Monday-Friday)
        if dt.weekday() < 5:
            yield element

class FormatDataFn(beam.DoFn):
    def process(self, element):
        # Format datetime to a user-friendly string
        formatted_datetime = element['datetime'].strftime('%Y-%m-%d %H:%M:%S')
        
        # Format other values as needed
        formatted_street_time = round(element['street_time'], 2)
        formatted_count = element['count']
        formatted_velocity = round(element['velocity'], 2)
        
        # Create a new dictionary with formatted values
        formatted_element = {
            'datetime': formatted_datetime,
            'street_time': formatted_street_time,
            'count': formatted_count,
            'velocity': formatted_velocity
        }
        
        yield formatted_element

with beam.Pipeline(options=PipelineOptions()) as p:
    read_from_postgres = ReadFromPostgres(
            query="SELECT * FROM public.your_table_name;",
            host="localhost",
            database="transport",
            user="postgres",
            password="postgres",
            splitter=splitters.NoSplitter()  # you can select how to split query for performance
    )

    # Read data from PostgreSQL and store in a PCollection named 'data'
    data = p | "ReadFromPostgres" >> read_from_postgres

    # Use the 'data' PCollection and filter out weekdays
    weekday_data = data | "FilterWeekdays" >> beam.ParDo(FilterWeekdays())

    # Format the data using the FormatDataFn transformation
    formatted_weekday_data = weekday_data | "FormatData" >> beam.ParDo(FormatDataFn())

    # Output formatted weekday data to stdout or other downstream steps
    (
        formatted_weekday_data
        #| "WriteToStdout" >> beam.Map(print)
        | "Write to file" >> beam.io.WriteToText('nifi/nifi/conf/source1/formatted_weekdays')
        # Add more pipeline steps here
    )


INFO:beam_postgres.client:Successfully execute query: EXPLAIN SELECT * FROM (SELECT * FROM public.your_table_name) as subq
INFO:beam_postgres.client:Successfully execute query: SELECT * FROM public.your_table_name


In [2]:
import csv
import datetime

input_file = "nifi/nifi/conf/source1/formatted_weekdays-00000-of-00001"
csv_file = 'nifi/nifi/conf/source1/weekdays.csv'

with open(input_file, 'r') as file:
    lines = file.readlines()

data = [eval(line) for line in lines if line.strip() != 'None']

if data:
    with open(csv_file, mode='w', newline='') as file:
        fieldnames = data[0].keys()
        writer = csv.DictWriter(file, fieldnames=fieldnames)

        writer.writeheader()
        for row in data:
            writer.writerow(row)

    print(f'CSV file "{csv_file}" has been created.')
else:
    print('No valid data found in the input file.')

CSV file "nifi/nifi/conf/source1/weekdays.csv" has been created.
