### First install the dependency

In [None]:
pip install redshift-connector

### Import all dependencies are needed

In [14]:
import csv
from zipfile import ZipFile, ZipInfo
from io import BytesIO, StringIO
from typing import IO, List
from datetime import datetime
from sys import exit, stdout
from redshift_connector import connect, Connection
import os

In [15]:
def redshift_open_connection_by_dict(dict_secret: dict, database: str = None) -> Connection:
    print(f'Opening connection by secret dict')

    if database is None:
        database = dict_secret['dbname']

    return connect(
        host=dict_secret['hostname'],
        database=database,
        user=dict_secret['user'],
        password=dict_secret['password'],
        port=dict_secret['port']
    )

In [24]:
# Function to save data in chunks to CSV
def redshift_get_rows_and_save_csv_by_chunks(dict_secret: dict, str_query: str, save_path: str, delimiter: str = '|', quoting: int = csv.QUOTE_NONNUMERIC, lineterminator: str = '\r\n', upper_header: bool = True, batch_size: int = 1000):
    conn = redshift_open_connection_by_dict(dict_secret)
    conn.autocommit = False
    cur = conn.cursor()

    print('Executing query...')
    cur.execute(str_query)
    cols = [a[0] for a in cur.description]

    # Ensure the directory exists
    os.makedirs(os.path.dirname(save_path), exist_ok=True)

    with open(save_path, mode='w', newline='', encoding='utf-8') as csv_file:
        writer = csv.writer(csv_file, delimiter=delimiter, quotechar='"', quoting=quoting, lineterminator=lineterminator)

        if upper_header:
            cols = [k.upper() for k in cols]
            
        # Write header
        writer.writerow(cols)
        idx = 1
        while True:
            print(f'Running idx batch_size: {idx}')
            idx += 1
            rows = cur.fetchmany(batch_size)
            if not rows:
                break
            writer.writerows(rows)

    print(f"Data saved to CSV at: {save_path}")

    cur.close()
    conn.close()

## Set your credencials and root path to store files

In [22]:
dict_secret = {
    "dbname": "datalake_dw",
    "port": 5439,
    "hostname": "asgard-redshift-production.cmqegk5gj3mi.sa-east-1.redshift.amazonaws.com",
    "user": "herculano_cunha",
    "password": "your secret password"
}
str_path_save = "/home/jovyan/work/files"

list_of_querys = [
    {'query': "select * from credit_portfolio.metrica limit 100000", 'filename': 'metrica.csv'},
    {'query': "select * from credit_portfolio.contrato limit 100000", 'filename': 'contrato.csv'}
]

In [26]:
for dict_query in list_of_querys:
    print(f"Selecting rows on Redshift for query: {dict_query.get('query')}")
    save_path = f"{str_path_save}/{dict_query.get('filename')}"
    redshift_get_rows_and_save_csv_by_chunks(dict_secret=dict_secret, str_query=dict_query.get('query'), save_path=save_path, delimiter= '|', quoting=csv.QUOTE_NONE)
    print(f"{save_path} - saved")
    print()
    print('------------------------')
    print()

Selecting rows on Redshift for query: select * from credit_portfolio.metrica limit 100000
Opening connection by secret dict
Executing query...
Running idx batch_size: 1
Running idx batch_size: 2
Running idx batch_size: 3
Running idx batch_size: 4
Running idx batch_size: 5
Running idx batch_size: 6
Running idx batch_size: 7
Running idx batch_size: 8
Running idx batch_size: 9
Running idx batch_size: 10
Running idx batch_size: 11
Running idx batch_size: 12
Running idx batch_size: 13
Running idx batch_size: 14
Running idx batch_size: 15
Running idx batch_size: 16
Running idx batch_size: 17
Running idx batch_size: 18
Running idx batch_size: 19
Running idx batch_size: 20
Running idx batch_size: 21
Running idx batch_size: 22
Running idx batch_size: 23
Running idx batch_size: 24
Running idx batch_size: 25
Running idx batch_size: 26
Running idx batch_size: 27
Running idx batch_size: 28
Running idx batch_size: 29
Running idx batch_size: 30
Running idx batch_size: 31
Running idx batch_size: 32
Ru