In [2]:
import boto3
import botocore
import csv
from io import StringIO
import psycopg2

In [3]:
def download_csv_from_s3(bucket_name, object_key):
    s3 = boto3.client('s3', config=botocore.config.Config(signature_version=botocore.UNSIGNED))
    response = s3.get_object(Bucket=bucket_name, Key=object_key)
    content = response['Body'].read().decode('utf-8')

    # Process header row to remove leading and trailing spaces
    header, rows = content.split("\n", 1)
    cleaned_header = ",".join(column.strip() for column in header.split(","))

    # Return cleaned content
    return cleaned_header + "\n" + rows

titanic_csv_content = download_csv_from_s3('data-eng-makers-public-datasets-404544469985', 'etl_bites_04_titanic_dataset.csv')

In [4]:
def calculate_average_fare(titanic_data, pclass_filter):
    total_fare = 0
    passengers_count = 0

    for row in titanic_data:
        if row['Pclass'] == str(pclass_filter):
            total_fare += float(row['Fare'])
            passengers_count += 1

    return total_fare / passengers_count if passengers_count > 0 else 0

average_fare_class_1 = calculate_average_fare(csv.DictReader(StringIO(titanic_csv_content)), 1)

In [None]:
CREATE TABLE class_average_fares (
    id SERIAL PRIMARY KEY,
    pclass INTEGER NOT NULL,
    average_fare NUMERIC(10, 2) NOT NULL
);

In previous bites, we created the table from within the Python script, and that is still perfectly possible here. However, I wanted to show you that there are other ways.

To execute the query, you can choose from some different options. I just decided to open TablePlus and within the SQL execution section, just run that query to create the table.

In [6]:
def insert_data_to_postgresql(data, connection):
    cursor = connection.cursor()
    query = "INSERT INTO class_average_fares (pclass, average_fare) VALUES (%s, %s)"
    cursor.execute(query, data)
    connection.commit()

etl_bites_conn_string = "host='localhost' port='5432' dbname='etl_bites' user='jackdench'"
conn = psycopg2.connect(etl_bites_conn_string)

insert_data_to_postgresql((1, average_fare_class_1), conn)