In [1]:
import os
import psycopg2

In [2]:
conn = psycopg2.connect(dbname=os.getenv('TITANIC_USER_DB'),
                        user=os.getenv('TITANIC_USER_DB'),
                        password=os.getenv('TITANIC_PASS'),
                        host=os.getenv('TITANIC_HOST'))
curs = conn.cursor()

## How many passengers survived, and how many died?

In [3]:
curs.execute('''SELECT COUNT(*)
                FROM titanic
                WHERE survived = 0;
                ''')

In [4]:
died = curs.fetchone()

In [5]:
curs.execute('''SELECT COUNT(*)
                FROM titanic
                WHERE survived = 1;
                ''')

In [6]:
survived = curs.fetchone()

In [7]:
print(f'Number of passengers who Survived: {survived[0]}')
print(f'Number of passengers who Died: {died[0]}')

Number of passengers who Survived: 342
Number of passengers who Died: 545


In [8]:
# Or more simply
curs.execute('SELECT COUNT(*), survived FROM titanic GROUP BY survived')

In [9]:
survivors = curs.fetchall()

In [10]:
print(f'Number of passengers who Survived: {survivors[1][0]}\n'
      f'Number of passengers who Died: {survivors[0][0]}')

Number of passengers who Survived: 342
Number of passengers who Died: 545


## How many passengers were in each class?

In [11]:
curs.execute('SELECT pclass, COUNT(*) FROM titanic \
              GROUP BY pclass \
              ORDER BY pclass;')

In [12]:
pass_class = curs.fetchall()

In [13]:
for i in range(len(pass_class)):
    print(f'Class {pass_class[i][0]} had {pass_class[i][1]} passengers')

Class 1 had 216 passengers
Class 2 had 184 passengers
Class 3 had 487 passengers


## How many passengers within each class survived/died?

In [14]:
curs.execute('SELECT pclass, COUNT(*) FROM titanic\
              WHERE survived = 0\
              GROUP BY pclass\
              ORDER BY pclass;')

In [15]:
dead_class = curs.fetchall()

In [16]:
for i in range(len(dead_class)):
    print(f'Class {dead_class[i][0]} had {dead_class[i][1]} passengers who died')

Class 1 had 80 passengers who died
Class 2 had 97 passengers who died
Class 3 had 368 passengers who died


In [17]:
curs.execute('SELECT pclass, COUNT(*) FROM titanic\
              WHERE survived = 1\
              GROUP BY pclass\
              ORDER BY pclass;')

In [18]:
live_class = curs.fetchall()

In [19]:
for i in range(len(live_class)):
    print(f'Class {live_class[i][0]} had {live_class[i][1]} passengers who lived')

Class 1 had 136 passengers who lived
Class 2 had 87 passengers who lived
Class 3 had 119 passengers who lived


In [20]:
# Or more simply
curs.execute('SELECT  pclass, survived, COUNT(pclass)\
              FROM titanic\
              GROUP BY survived, pclass\
              ORDER BY pclass, survived DESC;')

In [21]:
class_survive = curs.fetchall()

In [22]:
for i in range(len(class_survive)):
    if class_survive[i][1] == 1:
        print(f'Class {class_survive[i][0]} had {class_survive[i][2]} passengers who lived')
    elif class_survive[i][1] == 0:
        print(f'Class {class_survive[i][0]} had {class_survive[i][2]} passengers who died')

Class 1 had 136 passengers who lived
Class 1 had 80 passengers who died
Class 2 had 87 passengers who lived
Class 2 had 97 passengers who died
Class 3 had 119 passengers who lived
Class 3 had 368 passengers who died


## What was the average age of survivors vs nonsurvivors?

In [23]:
curs.execute('SELECT survived, AVG(age) FROM titanic\
              GROUP BY survived;')

In [24]:
avg_age = curs.fetchall()

In [25]:
print(f'Average age of survivors was: {avg_age[1][1]:.02f} years old')
print(f'Average age of non-survivors was: {avg_age[0][1]:.02f} years old')

Average age of survivors was: 28.41 years old
Average age of non-survivors was: 30.14 years old


## What was the average age of each passenger class?

In [26]:
curs.execute('SELECT pclass, AVG(age) FROM titanic\
              GROUP BY pclass\
              ORDER BY pclass;')

In [27]:
avg_age_class = curs.fetchall()

In [30]:
for i in range(len(avg_age_class)):
    print(f'Class {i+1} average age: {avg_age_class[i][1]:.02f}')

Class 1 average age: 38.79
Class 2 average age: 29.87
Class 3 average age: 25.19


## What was the average fare by passenger class? By survival?

In [31]:
curs.execute('SELECT pclass, AVG(fare) FROM titanic\
              GROUP BY pclass\
              ORDER BY pclass;')

In [32]:
avg_fare_class = curs.fetchall()

In [33]:
for i in range(len(avg_fare_class)):
    print(f'Class {i+1} average fare: ${avg_fare_class[i][1]:.02f}')

Class 1 average fare: $84.15
Class 2 average fare: $20.66
Class 3 average fare: $13.71


In [34]:
curs.execute('SELECT survived, AVG(fare) FROM titanic\
              GROUP BY survived\
              ORDER BY survived;')

In [35]:
avg_fare_survived = curs.fetchall()

In [38]:
print(f'Survivor average fare: ${avg_fare_survived[1][1]:.02f}')
print(f'Non-Survivor average fare: ${avg_fare_survived[0][1]:.02f}')

Survivor average fare: $48.40
Non-Survivor average fare: $22.21


## How many siblings/spouses aboard on average by passenger class? By survival?

In [39]:
curs.execute('SELECT pclass, AVG(siblings_spouses_aboard) FROM titanic\
              GROUP BY pclass\
              ORDER BY pclass;')

In [41]:
sib_avg_class = curs.fetchall()

In [53]:
for i in range(len(sib_avg_class)):
    print(f'Class {sib_avg_class[i][0]} averaged {float(sib_avg_class[i][1]):.02f} siblings or spouses aboard')

Class 1 averaged 0.42 siblings or spouses aboard
Class 2 averaged 0.40 siblings or spouses aboard
Class 3 averaged 0.62 siblings or spouses aboard


In [51]:
curs.execute('SELECT survived, AVG(siblings_spouses_aboard) FROM titanic\
              GROUP BY survived\
              ORDER BY survived;')

In [52]:
sib_avg_surv = curs.fetchall()

In [54]:
print(f'Survivors averaged {float(sib_avg_surv[1][1]):.02f} siblings or spouses aboard')
print(f'Non-Survivors averaged {float(sib_avg_surv[0][1]):.02f} siblings or spouses aboard')

Survivors averaged 0.47 siblings or spouses aboard
Non-Survivors averaged 0.56 siblings or spouses aboard


## How many parents/children aboard on average by passenger class? By survival?

In [55]:
curs.execute('SELECT pclass, AVG(parents_children_aboard) FROM titanic\
              GROUP BY pclass\
              ORDER BY pclass;')

In [56]:
par_avg_class = curs.fetchall()

In [57]:
for i in range(len(par_avg_class)):
    print(f'Class {par_avg_class[i][0]} averaged {float(par_avg_class[i][1]):.02f} parents or children aboard')

Class 1 averaged 0.36 parents or children aboard
Class 2 averaged 0.38 parents or children aboard
Class 3 averaged 0.40 parents or children aboard


In [58]:
curs.execute('SELECT survived, AVG(parents_children_aboard) FROM titanic\
              GROUP BY survived\
              ORDER BY survived;')

In [59]:
par_avg_surv = curs.fetchall()

In [60]:
print(f'Survivors averaged {float(par_avg_surv[1][1]):.02f} parents or children aboard')
print(f'Non-Survivors averaged {float(par_avg_surv[0][1]):.02f} parents or children aboard')

Survivors averaged 0.46 parents or children aboard
Non-Survivors averaged 0.33 parents or children aboard


## Do any passengers have the same name?

In [3]:
curs.execute('SELECT COUNT(DISTINCT name) FROM titanic;')
distinct_names = curs.fetchall() 

In [4]:
curs.execute('SELECT COUNT(name) FROM titanic;')
all_names = curs.fetchall()

In [6]:
distinct_names == all_names

True

NO REPEATS of full names...