## Extract from Postgres

In [1]:
import psycopg2
import csv

In [4]:
def export_data_to_csv():
    conn = psycopg2.connect(
        dbname='fire_incidents_db',
        user='root',
        password='root',
        host='fire_incidents_db_container',
        port=5432
    )
    cursor = conn.cursor()
    cursor.execute("SELECT * FROM fire_incidents_tbl")
    
    with open('./temp_csv_files/exported_nyc_fire_incidents_data.csv', 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow([i[0] for i in cursor.description])  # Write headers
        writer.writerows(cursor.fetchall())  # Write data
    
    cursor.close()
    conn.close()

export_data_to_csv()

## Upload CSV File to S3

In [None]:
import boto3

In [None]:
def upload_to_s3():
    s3 = boto3.client(
        's3',
        aws_access_key_id='your_access_key',
        aws_secret_access_key='your_secret_key'
    )
    s3.upload_file('/path/to/exported_data.csv', 'your_bucket_name', 'exported_data.csv')

upload_to_s3()

## Load Data from s3 to Redshift

In [None]:
COPY your_redshift_table
FROM 's3://your_bucket_name/exported_data.csv'
IAM_ROLE 'arn:aws:iam::your_account_id:role/your_redshift_role'
CSV
IGNOREHEADER 1;

## DAG

In [None]:
from airflow import DAG
from airflow.operators.python import PythonOperator
from airflow.providers.postgres.operators.postgres import PostgresOperator
from airflow.providers.amazon.aws.hooks.s3 import S3Hook
from airflow.providers.amazon.aws.hooks.redshift import RedshiftHook
from datetime import datetime

# Define the DAG
default_args = {'start_date': datetime(2025, 3, 22)}
dag = DAG('postgres_to_redshift', default_args=default_args, schedule_interval='@daily')

# Task 1: Export PostgreSQL Data
export_task = PythonOperator(
    task_id='export_postgres_to_csv',
    python_callable=export_data_to_csv,
    dag=dag,
)

# Task 2: Upload CSV to S3
upload_task = PythonOperator(
    task_id='upload_csv_to_s3',
    python_callable=upload_to_s3,
    dag=dag,
)

# Task 3: Load to Redshift
redshift_task = PostgresOperator(
    task_id='load_to_redshift',
    postgres_conn_id='redshift_default',
    sql="""
        COPY your_redshift_table
        FROM 's3://your_bucket_name/exported_data.csv'
        IAM_ROLE 'arn:aws:iam::your_account_id:role/your_redshift_role'
        CSV
        IGNOREHEADER 1;
    """,
    dag=dag,
)

# Define Task Dependencies
export_task >> upload_task >> redshift_task