In [1]:
# Import libraries
import pandas as pd
import numpy as np
import datetime

pd.options.mode.chained_assignment = None

## Extract

In [2]:
# Import data
df =  pd.read_csv("everest.csv", low_memory=False)
df

Unnamed: 0,expid,membid,peakid,myear,mseason,fname,lname,sex,age,birthdate,...,totmembers,smtmembers,mdeaths,tothired,nohired,smthired,hdeaths,bcdate,pkname,heightm
0,EVER88401,15,EVER,1988,4,Maurits,Vreudge,M,0,- -,...,17,0,0,10,False,0,1,10/11/1988,Everest,8849
1,EVER88401,13,EVER,1988,4,Christa,Van Schaerdenburg,F,0,- -,...,17,0,0,10,False,0,1,10/11/1988,Everest,8849
2,EVER88401,14,EVER,1988,4,Rudy,Van Snick,M,0,- -,...,17,0,0,10,False,0,1,10/11/1988,Everest,8849
3,EVER88401,19,EVER,1988,4,Ang Rita,Sherpa,M,0,- -,...,17,0,0,10,False,0,1,10/11/1988,Everest,8849
4,EVER88401,20,EVER,1988,4,Lhakpa Dorje (Ang Lhakpa),Sherpa,M,0,- -,...,17,0,0,10,False,0,1,10/11/1988,Everest,8849
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21896,EVER20103,10,EVER,2020,1,Tai-Gang,Huang,M,0,- -,...,20,14,0,22,False,21,0,23/04/2020,Everest,8849
21897,EVER20103,11,EVER,2020,1,Wei,Li,M,0,- -,...,20,14,0,22,False,21,0,23/04/2020,Everest,8849
21898,EVER20103,9,EVER,2020,1,Hong-Zhi,Hu,M,0,- -,...,20,14,0,22,False,21,0,23/04/2020,Everest,8849
21899,EVER20103,12,EVER,2020,1,Xiao-Lin,Li,M,0,- -,...,20,14,0,22,False,21,0,23/04/2020,Everest,8849


### Column Definitions

<b>expid</b> - Expedition id 

<b>membid</b> - Member id

<b>*NOTE:*</b> - Individual record id is composite key of expedition id + member id

-----

<b>bconly</b> - BC / Advanced BC only (Y/N) if yes, The member did not climb above base camp (or advanced
base camp in cases where the path from base camp does not require technical climbing skills) 
  
<b>nottobc</b> - Not to base camp (Y/N) if yes, The member did not reach base camp

<b>hired</b> - The person was hired by the expedition

<b>msolo</b> - Solo (Y/N)

<b>sherpa</b> - Sherpa (Y/N) 

<b>tibetan</b> - Tibetan (Y/N)

<b>mhighpt</b> - Expedition high-point reached (Y/N)

<b>mperhighpt</b> - Personal high-point (m)

<b>msmdate1</b> - 1st summit / high-point date

<b>msmdate2</b> - 2nd summit date

<b>msmdate3</b> - 3rd summit date

<b>mroute1</b> - 1st ascent route

<b>mroute2</b> - 2nd ascent route

<b>mroute3</b> - 3rd ascent route

<b>mo2used</b> - Oxygen used (Y/N)

<b>mo2none</b> - Oxygen not used (Y/N)

<b>deathhgtm</b> - Death Height (m)

<b>msmtbid</b> - Summit Bid:
```
0 – Unspecified
1 – No summit bid
2 – Aborted below high camp
3 – Aborted at high camp
4 – Aborted above high camp
5 – Successful summit bid
```
<b>nohired</b> - No hired personnel used (above BC)

<b>stdrte</b> - 8000m standard route (Y/N)

In [3]:
# Select the required columns 
filtered_df = df[['expid', 'membid', 'myear', 'sex', 'calcage', 'citizen', 'status', 'msolo', 'msuccess', 
                  'msmtdate1', 'msmtdate2', 'msmtdate3', 'route1', 'route2', 'route3', 'route4', 'mo2used', 
                  'mo2none', 'mo2climb', 'mo2descent', 'mo2sleep', 'death', 'deathdate', 'msmtbid', 'stdrte']]
filtered_df

Unnamed: 0,expid,membid,myear,sex,calcage,citizen,status,msolo,msuccess,msmtdate1,...,route4,mo2used,mo2none,mo2climb,mo2descent,mo2sleep,death,deathdate,msmtbid,stdrte
0,EVER88401,15,1988,M,49,Belgium,2nd Exp Doctor,False,False,- -,...,,False,True,False,False,False,False,- -,1,True
1,EVER88401,13,1988,F,30,Netherlands,Exp Doctor,False,False,- -,...,,False,True,False,False,False,False,- -,1,True
2,EVER88401,14,1988,M,32,Belgium,Climber,False,False,22/12/1988,...,,True,False,True,False,True,False,- -,4,True
3,EVER88401,19,1988,M,40,Nepal,H-A Worker,False,False,22/12/1988,...,,False,True,False,False,False,False,- -,4,True
4,EVER88401,20,1988,M,29,Nepal,H-A Worker,False,False,22/12/1988,...,,True,False,True,False,True,True,23/12/1988,4,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21896,EVER20103,10,2020,M,47,China,Climber,False,False,- -,...,,True,False,True,False,False,False,- -,1,True
21897,EVER20103,11,2020,M,0,China,Climber,False,True,28/05/2020,...,,True,False,True,False,True,False,- -,5,True
21898,EVER20103,9,2020,M,57,China,Climber,False,True,28/05/2020,...,,True,False,True,False,True,False,- -,5,True
21899,EVER20103,12,2020,M,35,China,Climber,False,True,28/05/2020,...,,True,False,True,False,True,False,- -,5,True


## Transform 

In [4]:
# Replace dashes in date columns with nan
for index, row in filtered_df.iterrows():
    if row['msmtdate1'] == '  -   -':
        filtered_df.loc[index,'msmtdate1'] = 'na'
    if row['msmtdate2'] == '  -   -':
        filtered_df.loc[index,'msmtdate2'] = 'na'
    if row['msmtdate3'] == '  -   -':
        filtered_df.loc[index,'msmtdate3'] = 'na'
    if row['deathdate'] == '  -   -':
        filtered_df.loc[index,'deathdate'] = 'na'

In [5]:
# Remove any missing items from the routes and status columns
filtered_df = filtered_df[filtered_df['route1'].isnull() != True]
filtered_df = filtered_df[filtered_df['status'].isnull() != True]

In [6]:
# Organise the route names into either 'northern', 'southern', or 'other' routes
for index, row in filtered_df.iterrows():
    if ('Col-N' in row['route1']) or ('N Col' in row['route1']) or ('N Face' in row['route1']) or \
       ('from N' in row['route1']) or ('NE Ridge' in row['route1']) or ('N Ridge' in row['route1']) or\
       ('E Ridge' in row['route1']) or ('N side' in row['route1']):
        filtered_df.loc[index,'new_route'] = 'North'
    if ('Col-S' in row['route1']) or ('S Col' in row['route1']) or ('S Face' in row['route1']) or \
       ('from S' in row['route1']) or ('SW Face' in row['route1']) or ('Geneva' in row['route1']) or \
       ('SE Ridge' in row['route1']) or ('S side' in row['route1']):
        filtered_df.loc[index,'new_route'] = 'South'
    if ('Lho' in row['route1']) or ('Lho' in row['route1']) or ('S Pillar' in row['route1']) or \
       ('SW' in row['route1']) or ('Hornbein ' in row['route1']) or ('E Face' in row['route1']) or \
       ('W Ridge' in row['route1']) or ('E sides' in row['route1']) or ('Khumbu' in row['route1']) or \
        ('S Couloir' in row['route1']) :
        filtered_df.loc[index,'new_route'] = 'Other'

In [7]:
# Print the route breakdowns
print(f"Northern route: {len(filtered_df[filtered_df['new_route'] == 'North'])}")
print(f"Southern route: {len(filtered_df[filtered_df['new_route'] == 'South'])}")
print(f"Other routes: {len(filtered_df[filtered_df['new_route'] == 'Other'])}")

Northern route: 7886
Southern route: 12280
Other routes: 1532


In [8]:
# Organise the job status into classes 
for index, row in filtered_df.iterrows():
    if ('Cook' in filtered_df.loc[index, 'status']) :
        filtered_df.loc[index,'new_status'] = 'Cook'
    elif ('Doc' in filtered_df.loc[index,'status']) : 
        filtered_df.loc[index,'new_status'] = 'Doctor'
    elif ('Climb' in filtered_df.loc[index,'status']) :
        filtered_df.loc[index,'new_status'] = 'Climber'   
    elif ('Dep' in filtered_df.loc[index, 'status']) :
        filtered_df.loc[index,'new_status'] = 'Deputy'
    elif ('Leader' in filtered_df.loc[index, 'status']) :
        filtered_df.loc[index,'new_status'] = 'Leader'
    else:
        filtered_df.loc[index,'new_status'] = 'Other'

In [9]:
# Print the status breakdowns
print(f"Cooks: {len(filtered_df[filtered_df['new_status'] == 'Cook'])}")
print(f"Doctors: {len(filtered_df[filtered_df['new_status'] == 'Doctor'])}")
print(f"Climbers: {len(filtered_df[filtered_df['new_status'] == 'Climber'])}")
print(f"Deputy Leaders: {len(filtered_df[filtered_df['new_status'] == 'Deputy'])}")
print(f"Leaders: {len(filtered_df[filtered_df['new_status'] == 'Leader'])}")
print(f"Other: {len(filtered_df[filtered_df['new_status'] == 'Other'])}")

Cooks: 29
Doctors: 437
Climbers: 11312
Deputy Leaders: 356
Leaders: 2171
Other: 7393


In [10]:
# Work out the average age and substitute this value to hikers have age listed as 0
age_df = filtered_df[filtered_df['calcage'] != 0]
avg_age = round(age_df['calcage'].mean(),0)

for index, row in filtered_df.iterrows():
    if row['calcage'] == 0:
        filtered_df.loc[index, 'calcage'] = avg_age

filtered_df['calcage'] = filtered_df['calcage'].astype('int64')

In [11]:
# Add an id column to the dataframe 
filtered_df = filtered_df.reset_index()
filtered_df.rename(columns={"index":"id"}, inplace=True)

In [12]:
# Create a dictionary to store the count of climbers on any given day
dates = pd.date_range(start='1/1/1920', end='12/12/2020', freq='D')
date_df = pd.DataFrame(dates)
date_df['count'] = 0
date_df.rename(columns={0:"date"}, inplace=True)
date_df["date"] =  pd.to_datetime(date_df["date"], format="%d/%m/%y")
date_df["date"] = date_df["date"].dt.strftime("%#d/%m/%Y")
date_df = date_df.set_index("date")
date_dict = date_df.to_dict()
date_dict

{'count': {'1/01/1920': 0,
  '2/01/1920': 0,
  '3/01/1920': 0,
  '4/01/1920': 0,
  '5/01/1920': 0,
  '6/01/1920': 0,
  '7/01/1920': 0,
  '8/01/1920': 0,
  '9/01/1920': 0,
  '10/01/1920': 0,
  '11/01/1920': 0,
  '12/01/1920': 0,
  '13/01/1920': 0,
  '14/01/1920': 0,
  '15/01/1920': 0,
  '16/01/1920': 0,
  '17/01/1920': 0,
  '18/01/1920': 0,
  '19/01/1920': 0,
  '20/01/1920': 0,
  '21/01/1920': 0,
  '22/01/1920': 0,
  '23/01/1920': 0,
  '24/01/1920': 0,
  '25/01/1920': 0,
  '26/01/1920': 0,
  '27/01/1920': 0,
  '28/01/1920': 0,
  '29/01/1920': 0,
  '30/01/1920': 0,
  '31/01/1920': 0,
  '1/02/1920': 0,
  '2/02/1920': 0,
  '3/02/1920': 0,
  '4/02/1920': 0,
  '5/02/1920': 0,
  '6/02/1920': 0,
  '7/02/1920': 0,
  '8/02/1920': 0,
  '9/02/1920': 0,
  '10/02/1920': 0,
  '11/02/1920': 0,
  '12/02/1920': 0,
  '13/02/1920': 0,
  '14/02/1920': 0,
  '15/02/1920': 0,
  '16/02/1920': 0,
  '17/02/1920': 0,
  '18/02/1920': 0,
  '19/02/1920': 0,
  '20/02/1920': 0,
  '21/02/1920': 0,
  '22/02/1920': 0,
  

In [13]:
# Add one to the climber count if a climber made a summit bid on particular day  
for index, row in filtered_df.iterrows():
    if filtered_df.loc[index, 'msmtdate1'] != 'na':
        date  = filtered_df.loc[index, 'msmtdate1']
        date_dict['count'][date] += 1 
        
for index, row in filtered_df.iterrows():
    if filtered_df.loc[index, 'msmtdate2'] != 'na':
        date  = filtered_df.loc[index, 'msmtdate2']
        date_dict['count'][date] += 1

for index, row in filtered_df.iterrows():
    if filtered_df.loc[index, 'msmtdate3'] != 'na':
        date  = filtered_df.loc[index, 'msmtdate3']
        date_dict['count'][date] += 1 


In [14]:
# Add a 'number of climbers' column to the filter dataframe in order to assess effect of crowding
for index, row in filtered_df.iterrows():
    if filtered_df.loc[index, 'msmtdate1'] != 'na':
        date  = filtered_df.loc[index, 'msmtdate1']
        count = date_dict['count'][date]
        filtered_df.loc[index, 'climber_count'] = count


In [64]:
# Export the cleandata file to csv
filtered_df.to_csv('clean_data.csv', index=False)

In [16]:
# Export the crowding_data file to csv
crowding_df = filtered_df[filtered_df['msmtdate1'] != 'na']
crowding_df.to_csv('crowding_data.csv', index=False)

In [17]:
climbers_df = pd.DataFrame.from_dict(date_dict)
climbers_df = climbers_df.reset_index()
climbers_df.rename(columns={'index':"date"}, inplace=True)
climbers_df


Unnamed: 0,date,count
0,1/01/1920,0
1,1/01/1921,0
2,1/01/1922,0
3,1/01/1923,0
4,1/01/1924,0
...,...,...
36867,9/12/2016,0
36868,9/12/2017,0
36869,9/12/2018,0
36870,9/12/2019,0


In [18]:
# Create a data set of averaged success rates and death rates for age and gender groups
under20 = filtered_df[filtered_df['calcage'] <20 ]
twenty_to_thirty = filtered_df[(filtered_df['calcage'] <30) & (filtered_df['calcage'] >= 20) ]
thirty_to_forty = filtered_df[(filtered_df['calcage'] <40) & (filtered_df['calcage'] >= 30) ]
forty_to_fifty = filtered_df[(filtered_df['calcage'] <50) & (filtered_df['calcage'] >= 40) ]
fifty_to_sixty = filtered_df[(filtered_df['calcage'] <60) & (filtered_df['calcage'] >= 50) ]
over60 = filtered_df[filtered_df['calcage'] >60 ]

males = filtered_df[filtered_df['sex'] == 'M' ]
females = filtered_df[filtered_df['sex'] == 'F' ]

In [19]:
class_groups = [under20, twenty_to_thirty, thirty_to_forty, forty_to_fifty, fifty_to_sixty, over60, males, females, filtered_df]
average_data = [{'group':'under 20'}, {'group':'20 - 30'}, {'group':'30 - 40'}, {'group':'40 - 50'}, {'group':'50 - 60'},
                {'group':'over 60'}, {'group':'males'}, {'group':'females'}, {'group':'overall'}]

for x, item in enumerate(class_groups):
    success = round(len(item[item['msuccess'] == True])/len(item)*100,2)
    death = round(len(item[item['death'] == True])/len(item)*100,2)
    average_data[x]['success'] = success
    average_data[x]['death'] = death
    

In [20]:
average_data = pd.DataFrame(average_data)
average_data = average_data.reset_index()
average_data

Unnamed: 0,index,group,success,death
0,0,under 20,56.49,0.76
1,1,20 - 30,52.38,1.26
2,2,30 - 40,45.81,1.56
3,3,40 - 50,44.66,1.16
4,4,50 - 60,35.76,1.49
5,5,over 60,20.94,3.2
6,6,males,46.39,1.47
7,7,females,39.93,0.74
8,8,overall,45.87,1.41


In [21]:
# Data manipulation for crowding analysis
# Need success rates and death rates based on number of climbers 

# Pull columns 
line_data = filtered_df[['climber_count', 'msuccess', 'death']] 

# Convert boolean to binary
for index, row in line_data.iterrows():
    if row['msuccess'] == True:
        line_data.iloc[index,1] = 1
    elif row['msuccess'] == False:
        line_data.iloc[index,1] = 0
    if row['death'] == True:
        line_data.iloc[index,2] = 1
    elif row['death'] == False:
        line_data.iloc[index,2] = 0    

line_data

Unnamed: 0,climber_count,msuccess,death
0,,0,0
1,,0,0
2,3.0,0,0
3,3.0,0,0
4,3.0,0,1
...,...,...,...
21693,,0,0
21694,15.0,1,0
21695,15.0,1,0
21696,15.0,1,0


In [22]:
#  Perform groupby to get totals and create a dataframe of unique numbers 
totals = line_data.groupby(by=['climber_count']).count()
totals = totals.reset_index()
totals.rename(columns={'msuccess':'total'}, inplace=True)
totals.drop('death', axis=1, inplace=True)
totals['climber_count'] = totals['climber_count'].astype(int)
totals

Unnamed: 0,climber_count,total
0,1,345
1,2,560
2,3,456
3,4,460
4,5,315
...,...,...
101,208,206
102,245,245
103,273,273
104,301,301


In [23]:
# Calculate the average success rate and death rate based on number of  people on the mountain on those days
for index, row in totals.iterrows():
    x = row['climber_count']
    filtered = line_data[line_data['climber_count']==x]
    totals.loc[index,'success'] = len(filtered[filtered['msuccess']==1])
    totals.loc[index,'death'] = len(filtered[filtered['death']==1])

for index, row in totals.iterrows():
    totals.loc[index, 'success_avg'] = totals.loc[index, 'success'] / totals.loc[index, 'total']*100
    totals.loc[index, 'death_avg'] = totals.loc[index, 'death'] / totals.loc[index, 'total']*100
    
totals

Unnamed: 0,climber_count,total,success,death,success_avg,death_avg
0,1,345,8.0,19.0,2.318841,5.507246
1,2,560,77.0,20.0,13.750000,3.571429
2,3,456,72.0,15.0,15.789474,3.289474
3,4,460,100.0,10.0,21.739130,2.173913
4,5,315,71.0,7.0,22.539683,2.222222
...,...,...,...,...,...,...
101,208,206,168.0,0.0,81.553398,0.000000
102,245,245,223.0,4.0,91.020408,1.632653
103,273,273,262.0,0.0,95.970696,0.000000
104,301,301,266.0,4.0,88.372093,1.328904


In [24]:
crowding_df = totals[['climber_count', 'success_avg', 'death_avg']]
crowding_df

Unnamed: 0,climber_count,success_avg,death_avg
0,1,2.318841,5.507246
1,2,13.750000,3.571429
2,3,15.789474,3.289474
3,4,21.739130,2.173913
4,5,22.539683,2.222222
...,...,...,...
101,208,81.553398,0.000000
102,245,91.020408,1.632653
103,273,95.970696,0.000000
104,301,88.372093,1.328904


## Load
  

 

In [55]:
# Import neccessary SQLalchemy packages
import sqlalchemy
from sqlalchemy import create_engine
from sqlalchemy.ext.automap import automap_base
from sqlalchemy import inspect
from sqlalchemy.orm import Session
from sqlalchemy import Column, Float, Integer, String
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()

In [56]:
# Create the Everest Class
class Everest(Base):
    __tablename__ = "everest"
    id = Column(Integer, primary_key=True)
    expid = Column(Integer)
    membid = Column(Integer)
    myear = Column(Integer)
    sex = Column(String)
    calcage = Column(Integer)
    citizen = Column(String)
    status = Column(String)
    msolo = Column(String)
    msuccess = Column(String)
    msmtdate1 = Column(Integer)
    msmtdate2 = Column(String)
    msmtdate3 = Column(String)
    route1 = Column(String)
    route2 = Column(String)
    route3 = Column(String)
    route4 = Column(String)
    mo2used = Column(String)
    mo2none = Column(String)
    mo2climb = Column(String)
    mo2descent = Column(String)
    mo2sleep = Column(String)
    death = Column(String)
    deathdate = Column(String)
    msmtbid = Column(Integer)
    stdrte = Column(String)
    new_route = Column(String)
    new_status = Column(String)
    climber_count =  Column(Integer)
    

In [35]:
# Create the Everest Class
class Averages(Base):
    __tablename__ = "averages"
    index = Column(Integer, primary_key=True)
    group = Column(String)
    success = Column(Float)
    death = Column(Float)
    

In [36]:
# Create the Everest Class
class Crowding(Base):
    __tablename__ = "crowding"
    climber_count = Column(Integer, primary_key=True)
    success_avg = Column(String)
    death_avg = Column(Float)

In [45]:
from config import username, password, endpoint
import psycopg2

# Establish database connection (AWS RDS) 
db_url = f'postgresql://{username}:{password}@{endpoint}/everest_db'
engine = create_engine(db_url)    

In [48]:
# Load dataframes into database
filtered_df.to_sql("everest", engine, if_exists="append", index=False)
average_data.to_sql("averages", engine, if_exists="append", index=False)
crowding_df.to_sql("crowding", engine, if_exists="append", index=False)

In [59]:
filtered_df.rename(columns={"id":"climber_id"}, inplace=True)

In [65]:
# Reflect the database and ckeck keys
Base = automap_base()
Base.prepare(engine, reflect=True)
Base.classes.keys()


['averages', 'crowding', 'everest']

In [66]:
# Create a session for a trial query
session = Session(engine)
Everest = Base.classes.everest
Averages = Base.classes.averages
Crowding = Base.classes.crowding

In [68]:
for row in session.query(Everest.climber_id, Everest.sex, Everest.msuccess, Everest.new_route).limit(20):
    print(row)

(0.0, 'M', 'False', 'South')
(1.0, 'F', 'False', 'South')
(2.0, 'M', 'False', 'South')
(3.0, 'M', 'False', 'South')
(4.0, 'M', 'False', 'South')
(5.0, 'M', 'False', 'South')
(6.0, 'M', 'False', 'South')
(7.0, 'M', 'False', 'South')
(8.0, 'M', 'False', 'South')
(9.0, 'M', 'False', 'South')
(10.0, 'M', 'False', 'South')
(11.0, 'M', 'False', 'South')
(12.0, 'M', 'False', 'South')
(13.0, 'M', 'False', 'South')
(14.0, 'M', 'False', 'South')
(15.0, 'M', 'False', 'South')
(16.0, 'M', 'False', 'South')
(17.0, 'M', 'False', 'South')
(18.0, 'M', 'False', 'South')
(19.0, 'F', 'False', 'South')


In [69]:
for row in session.query(Averages.group, Averages.success, Averages.death).all():
    print(row)

('under 20', 56.49, 0.76)
('20 - 30', 52.38, 1.26)
('30 - 40', 45.81, 1.56)
('40 - 50', 44.66, 1.16)
('50 - 60', 35.76, 1.49)
('over 60', 20.94, 3.2)
('males', 46.39, 1.47)
('females', 39.93, 0.74)
('overall', 45.87, 1.41)


In [70]:
for row in session.query(Crowding.climber_count,Crowding.success_avg, Crowding.death_avg).limit(20):
    print(row)

(1, '2.318840579710145', '5.507246376811594')
(2, '13.750000000000002', '3.571428571428571')
(3, '15.789473684210526', '3.289473684210526')
(4, '21.73913043478261', '2.1739130434782608')
(5, '22.53968253968254', '2.2222222222222223')
(6, '31.351351351351354', '3.2432432432432434')
(7, '27.027027027027028', '1.1583011583011582')
(8, '47.68518518518518', '4.62962962962963')
(9, '45.911949685534594', '3.1446540880503147')
(10, '47.20812182741117', '1.015228426395939')
(11, '46.590909090909086', '0.0')
(12, '43.333333333333336', '4.166666666666666')
(13, '38.46153846153847', '5.769230769230769')
(14, '75.0', '1.7857142857142856')
(15, '60.67415730337079', '2.247191011235955')
(16, '69.48051948051948', '0.0')
(17, '82.35294117647058', '0.0')
(18, '63.33333333333333', '1.1111111111111112')
(19, '81.33333333333333', '2.666666666666667')
(20, '79.0', '0.0')


In [41]:
conn = engine.connect()
data = pd.read_sql("""
                   SELECT citizen
                   FROM everest
                   """, conn)