In [41]:
import psycopg2 as ps
from dotenv import load_dotenv
import os

load_dotenv()

True

In [42]:
user = os.getenv("ELEPHANTSQL_USER")
password = os.getenv("ELEPHANTSQL_PASSWORD")
host = os.getenv("ELEPHANTSQL_HOST")
dbname = os.getenv("ELEPHANTSQL_DBNAME")

conn = ps.connect(database=dbname, user=user, password=password, host=host)

pg = conn.cursor()

In [25]:
q = "SELECT * FROM titanic LIMIT 2"
pg.execute(q)
c = pg.fetchall()
c

[(1, False, 3, 'Mr. Owen Harris Braund', 'male', 22.0, 1, 0, 7.25),
 (2,
  True,
  1,
  'Mrs. John Bradley (Florence Briggs Thayer) Cumings',
  'female',
  38.0,
  1,
  0,
  71.2833)]

In [26]:
# pg.execute("create schema myschema;")
# conn.commit()

In [27]:
create_titanic_table = """
CREATE TABLE myschema.titanic
(
character_id SERIAL PRIMARY KEY,
survived boolean,
pclass int,
name varchar(128),
sex gender,
age real,
siblings_spouses_aboard int,
parents_children_aboard int,
fare real
);"""
pg.execute(create_titanic_table)

In [29]:
pg.execute(
    """
    SELECT COUNT(*) FROM titanic AS t WHERE t.survived = TRUE
    """
)
print(f"How many passengers survived? {pg.fetchone()[0]}")

How many passengers survived? 342


In [30]:
pg.execute(
    """
    SELECT COUNT(*) FROM titanic AS t WHERE t.survived = FALSE
    """
)
print(f"How many passengers died? {pg.fetchone()[0]}")

How many passengers died? 545


In [36]:
pg.execute(
    """
    SELECT  t.pclass as "Class",  COUNT(*) as "Survived" FROM titanic AS t
    WHERE t.survived = TRUE
    GROUP BY t.pclass
    """
)
print(f"How many passengers survived by class? {pg.fetchall()}")

How many passengers survived by class? [(1, 136), (2, 87), (3, 119)]


In [37]:
pg.execute(
    """
    SELECT  t.pclass as "Class",  COUNT(*) as "Survived" FROM titanic AS t
    WHERE t.survived = FALSE
    GROUP BY t.pclass
    """
)
print(f"How many passengers died by class? {pg.fetchall()}")

How many passengers died by class? [(1, 80), (2, 97), (3, 368)]


In [56]:
pg.execute(
    """
    SELECT  AVG(t.age) as "sage" FROM titanic AS t
    WHERE t.survived = TRUE
    UNION
    SELECT  AVG(t.age) as "dage" FROM titanic AS t
    WHERE t.survived = FALSE
    """
)
c = pg.fetchall()

In [57]:
print(f"What was the average age of survivors vs nonsurvivors?  survived: {c[0][0]:0.04}, \
died: {c[1][0]:0.04}")

What was the average age of survivors vs nonsurvivors?  survived: 28.41, died: 30.14


In [61]:
pg.execute(
    """
    SELECT  t.pclass as "Class",  AVG(t.age) as "Survived" FROM titanic AS t
    GROUP BY t.pclass
    """
)

In [62]:
print(f"What was the average age of each passenger class?  {pg.fetchall()}")

What was the average age of each passenger class?  [(1, 38.7889814815587), (2, 29.8686413042571), (3, 25.188747433238)]


In [94]:
pg.execute(
    """
    SELECT  t.pclass AS "class",  AVG(t.fare) AS "fare",
    (SELECT  AVG(st.fare) as "fare" FROM titanic AS st WHERE st.survived = TRUE) as "sfare",
    (SELECT  AVG(dt.fare) as "fare" FROM titanic AS dt WHERE dt.survived = FALSE) as "dfare"
    FROM titanic AS t  
    GROUP BY t.pclass
    """
)
c = pg.fetchall()
c

[(1, 84.154687528257, 48.3954076976107, 22.2085840951412),
 (2, 20.6621831810993, 48.3954076976107, 22.2085840951412),
 (3, 13.7077075010452, 48.3954076976107, 22.2085840951412)]

In [103]:
print(
    f"""
    What was the average fare by passenger class?
        class {c[0][0]} fare {c[0][1]} class {c[1][0]} fare {c[1][1]} class {c[2][0]} fare {c[2][1]}  
    What is the average fare by survival? survived {c[0][2]}  died {c[0][3]}
    """
)


    What was the average fare by passenger class?
        class 1 fare 84.154687528257 class 2 fare 20.6621831810993 class 3 fare 13.7077075010452  
    What is the average fare by survival? survived 48.3954076976107  died 22.2085840951412
    


In [105]:
pg.execute(
    """
    SELECT  t.pclass AS "class",  AVG(t.fare) AS "fare", t.survived FROM titanic AS t  
    GROUP BY t.pclass, t.survived
    ORDER BY t.pclass, t.survived
    """
)
c = pg.fetchall()

In [106]:
print(f"What was the average fare by passenger class and survival (class, fare, survived)? {c}")

What was the average fare by passenger class and survival (class, fare, survived)? [(1, 64.6840073347092, False), (1, 95.6080288185793, True), (2, 19.4123278549037, False), (2, 22.0557000390415, True), (3, 13.7118531063847, False), (3, 13.6948874778106, True)]


In [117]:
pg.execute(
    """
    SELECT  t.pclass AS "class",  AVG(t.siblings_spouses_aboard),
    (SELECT  AVG(st.siblings_spouses_aboard) FROM titanic AS st WHERE st.survived = TRUE) as "ss",
    (SELECT  AVG(dt.siblings_spouses_aboard) FROM titanic AS dt WHERE dt.survived = FALSE) as "ds"
    FROM titanic AS t  
    GROUP BY t.pclass
    """
)
c = pg.fetchall()

In [120]:
print(
    f"""
    How many siblings/spouses aboard on average by passenger class?
        class {c[0][0]} average  {c[0][1]:0.04} class {c[1][0]} \
average {c[1][1]:0.04} class {c[2][0]} average {c[2][1]:0.04}  
     How many siblings/spouses aboard on average by survival? survived {c[0][2]:0.04}  died {c[0][3]:0.04}
    """
)


    How many siblings/spouses aboard on average by passenger class?
        class 1 average  0.4167 class 2 average 0.4022 class 3 average 0.6201  
     How many siblings/spouses aboard on average by survival? survived 0.4737  died 0.5578
    


In [121]:
pg.execute(
    """
    SELECT  t.pclass AS "class",  AVG(t.parents_children_aboard),
    (SELECT  AVG(st.parents_children_aboard) FROM titanic AS st WHERE st.survived = TRUE) as "ss",
    (SELECT  AVG(dt.parents_children_aboard) FROM titanic AS dt WHERE dt.survived = FALSE) as "ds"
    FROM titanic AS t  
    GROUP BY t.pclass
    """
)
c = pg.fetchall()

In [122]:

print(
    f"""
    How many parents/children aboard on average by passenger class?
        class {c[0][0]} average  {c[0][1]:0.04} class {c[1][0]} \
average {c[1][1]:0.04} class {c[2][0]} average {c[2][1]:0.04}  
    How many parents/children aboard on average by survival? survived {c[0][2]:0.04}  died {c[0][3]:0.04}
    """
)


    How many parents/children aboard on average by passenger class?
        class 1 average  0.3565 class 2 average 0.3804 class 3 average 0.3963  
    How many parents/children aboard on average by survival? survived 0.4649  died 0.3321
    


In [156]:
pg.execute(
    """
    SELECT CASE WHEN COUNT(DISTINCT t.name) - COUNT(*) = 0 THEN 'False' ELSE 'True' END FROM titanic t
    """
)

In [158]:
print(f"Do any passengers have the same name? {pg.fetchone()[0]}")

Do any passengers have the same name? False


In [159]:
pg.execute(
    """
    SELECT COUNT(*) /2 FROM titanic t1 CROSS JOIN
    titanic t2
    WHERE ((SUBSTRING(t1.name,1,4) = 'Mrs.' AND SUBSTRING(t2.name,1,3) = 'Mr.') OR
          (SUBSTRING(t1.name,1,3) = 'Mr.' AND SUBSTRING(t2.name,1,4) = 'Mrs.')) AND
          regexp_replace(t1.name, '^.* ', '') = regexp_replace(t2.name, '^.* ', '')
    """
)
c = pg.fetchone()[0]

In [160]:
print(f"How many married couples were aboard the Titanic? {c}")

How many married couples were aboard the Titanic? 68


In [153]:
conn.rollback()