In [7]:
import boto3
import botocore
import csv
from io import StringIO
import psycopg2


In [9]:
def download_csv_from_s3(bucket_name, object_key):
    s3 = boto3.client('s3', config=botocore.config.Config(signature_version=botocore.UNSIGNED))
    response = s3.get_object(Bucket=bucket_name, Key=object_key)
    content = response['Body'].read().decode('utf-8')

    # Process header row to remove leading and trailing spaces
    header, rows = content.split("\n", 1)
    cleaned_header = ",".join(column.strip() for column in header.split(","))

    # Return cleaned content
    return cleaned_header + "\n" + rows

titanic_csv_content = download_csv_from_s3('data-eng-makers-public-datasets-404544469985', 'etl_bites_04_titanic_dataset.csv')


In [20]:
def calculate_average_fare(titanic_data, pclass_filter):
    total_fare = 0
    passengers_count = 0

    for row in titanic_data:
        if row['Pclass'] == str(pclass_filter):
            total_fare += float(row['Fare'])
            passengers_count += 1

    return total_fare / passengers_count if passengers_count > 0 else 0

average_fare_class_1 = calculate_average_fare(csv.DictReader(StringIO(titanic_csv_content)), 1)
average_fare_class_2 = calculate_average_fare(csv.DictReader(StringIO(titanic_csv_content)), 2)
average_fare_class_3 = calculate_average_fare(csv.DictReader(StringIO(titanic_csv_content)), 3)

In [11]:
conn_string = "dbname='etl_bites' user='' password='' host='localhost' port='5432'"

In [14]:
def execute_query_postgresql(query):
    with psycopg2.connect(conn_string) as conn:
        with conn.cursor() as cur:
            cur.execute(query)
            conn.commit()

new_table_query = """CREATE TABLE class_average_fares (
    id SERIAL PRIMARY KEY,
    pclass INTEGER NOT NULL,
    average_fare NUMERIC(10, 2) NOT NULL
);
"""
execute_query_postgresql(new_table_query)

In [21]:
def insert_data_to_postgresql(data, connection):
    cursor = connection.cursor()
    query = "INSERT INTO class_average_fares (pclass, average_fare) VALUES (%s, %s)"
    cursor.execute(query, data)
    connection.commit()

conn = psycopg2.connect(conn_string)

insert_data_to_postgresql((1, average_fare_class_1), conn)
insert_data_to_postgresql((2, average_fare_class_2), conn)
insert_data_to_postgresql((3, average_fare_class_3), conn)
