## Exercise

Calculate the survival rate for passengers in the 2nd class from the Titanic
dataset.

Create a new table class_survival_rate in the local PostgreSQL database and
insert the calculated survival rate into this table.


In [8]:
import boto3
import botocore
import csv
from io import StringIO
import psycopg2

In [4]:
def download_csv_from_s3(bucket_name, object_key):
    s3 = boto3.client('s3', config=botocore.config.Config(signature_version=botocore.UNSIGNED))
    response = s3.get_object(Bucket=bucket_name, Key=object_key)
    content = response['Body'].read().decode('utf-8')

    # Process header row to remove leading and trailing spaces
    header, rows = content.split("\n", 1)
    cleaned_header = ",".join(column.strip() for column in header.split(","))

    # Return cleaned content
    return cleaned_header + "\n" + rows

titanic_csv_content = download_csv_from_s3('data-eng-makers-public-datasets-404544469985', 'etl_bites_04_titanic_dataset.csv')

In [16]:
def calculate_survival_rate(titanic_data, pclass_filter):
    count_class_survivors = 0
    count_class_total = 0
    for row in titanic_data:
        if row['Pclass'] == str(pclass_filter):
            count_class_total +=1
            if row['Survived'] == str(1):
                count_class_survivors +=1
    class_survival_rate = round(((count_class_survivors/count_class_total)*100), 2)
    return class_survival_rate

In [27]:
passenger_class = 3
class_survival_rate = calculate_survival_rate(csv.DictReader(StringIO(titanic_csv_content)), passenger_class)

In [None]:
CREATE TABLE class_survival_rates (
    id SERIAL PRIMARY KEY,
    pclass INTEGER NOT NULL,
    survival_rate FLOAT NOT NULL
);
# Table created through direct SQL command in TablePlus rather than Python script for the sake of variety

In [28]:
def insert_data_to_postgresql(data, connection):
    cursor = connection.cursor()
    query = "INSERT INTO class_survival_rates (pclass, survival_rate) VALUES (%s, %s)"
    cursor.execute(query, data)
    connection.commit()

etl_bites_conn_string = "host='localhost' port='5432' dbname='etl_bites' user='jackdench'"
conn = psycopg2.connect(etl_bites_conn_string)

insert_data_to_postgresql((passenger_class, class_survival_rate), conn)