In [1]:
import pandas as pd
from utils import *
from mappings import *

In [2]:
df = read_dbf('../data/raw/members.DBF')

In [3]:
df.head()

Unnamed: 0,expid,membid,peakid,myear,mseason,fname,lname,sex,age,birthdate,...,membermemo,necrology,msmtbid,msmtterm,hcn,mchksum,msmtnote1,msmtnote2,msmtnote3,deathrte
0,AMAD78301,1,AMAD,1978,3,Jean Robert,Clemenson,M,0,,...,,,1,4,0,2426937,,,,
1,AMAD78301,2,AMAD,1978,3,Bernard,Dufour,M,0,,...,,,1,4,0,2426501,,,,
2,AMAD78301,3,AMAD,1978,3,Philippe,Gerard,M,0,,...,,,1,4,0,2431569,,,,
3,AMAD78301,4,AMAD,1978,3,Eric,Lasserre,M,0,,...,,,1,4,0,2426809,,,,
4,AMAD78301,5,AMAD,1978,3,Guy,Peters,M,0,,...,,,1,4,0,2429215,,,,


### Primary Key

In [4]:
df.expid = df.expid.str.cat(df.myear.astype(str), sep='_')

In [5]:
df.head()

Unnamed: 0,expid,membid,peakid,myear,mseason,fname,lname,sex,age,birthdate,...,membermemo,necrology,msmtbid,msmtterm,hcn,mchksum,msmtnote1,msmtnote2,msmtnote3,deathrte
0,AMAD78301_1978,1,AMAD,1978,3,Jean Robert,Clemenson,M,0,,...,,,1,4,0,2426937,,,,
1,AMAD78301_1978,2,AMAD,1978,3,Bernard,Dufour,M,0,,...,,,1,4,0,2426501,,,,
2,AMAD78301_1978,3,AMAD,1978,3,Philippe,Gerard,M,0,,...,,,1,4,0,2431569,,,,
3,AMAD78301_1978,4,AMAD,1978,3,Eric,Lasserre,M,0,,...,,,1,4,0,2426809,,,,
4,AMAD78301_1978,5,AMAD,1978,3,Guy,Peters,M,0,,...,,,1,4,0,2429215,,,,


### Climbers


In [6]:
# isolate columns pertaining to an individual climber
df_climbers = df[['fname', 'lname', 'sex', 'yob', 'occupation', 'residence', 'citizen', 'hcn']]\
	.drop_duplicates(ignore_index=True)\
	.reset_index(names='id')

In [7]:
# swap climber info for climber id in expeditions df
df = df.merge(df_climbers, how='left')\
	.rename({'id': 'climber_id'}, axis=1)\
	.drop(['fname', 'lname', 'sex', 'yob', 'occupation', 'residence', 'citizen', 'age', 'calcage', 'birthdate', 'hcn'], axis=1)

#### Citizenship

In [8]:
# explode slash-separated countries into scalar values
df_citizenships = df_climbers[['id', 'citizen']].drop_duplicates().rename({'id': 'climber_id'}, axis=1)
df_citizenships.citizen = df_citizenships.citizen.str.split('/')
df_citizenships = df_citizenships.explode('citizen').drop_duplicates(ignore_index=True)

In [9]:
df_citizenships = update_country_list(df_citizenships, 'citizen')

In [10]:
df_climbers.drop('citizen', axis=1, inplace=True)

### Ascensions

In [11]:
ascent_1_df = df.loc[
	df.msmtdate1.notna() |
	df.msmttime1.notna() |
	(df.mroute1.notna() & df.mroute1 != 0) |
	(df.mascent1.notna() & df.mascent1 != 0),
	['expid', 'climber_id', 'msmtdate1', 'msmttime1', 'mroute1', 'mascent1', 'msmtnote1']]\
	.rename({'msmtdate1': 'date', 'msmttime1': 'time', 'mroute1': 'route', 'mascent1': 'ascent', 'msmtnote1': 'note'}, axis=1)
ascent_1_df['number'] = 1

In [12]:
ascent_2_df = df.loc[
	df.msmtdate2.notna() |
	df.msmttime2.notna() |
	(df.mroute2.notna() & df.mroute2 != 0) |
	(df.mascent2.notna() & df.mascent2 != 0),
	['expid', 'climber_id', 'msmtdate2', 'msmttime2', 'mroute2', 'mascent2', 'msmtnote2']]\
	.rename({'msmtdate2': 'date', 'msmttime2': 'time', 'mroute2': 'route', 'mascent2': 'ascent', 'msmtnote2': 'note'}, axis=1)
ascent_2_df['number'] = 2

In [13]:
ascent_3_df = df.loc[
	df.msmtdate3.notna() |
	df.msmttime3.notna() |
	(df.mroute3.notna() & df.mroute3 != 0) |
	(df.mascent3.notna() & df.mascent3 != 0),
	['expid', 'climber_id', 'msmtdate3', 'msmttime3', 'mroute3', 'mascent3', 'msmtnote3']]\
	.rename({'msmtdate3': 'date', 'msmttime3': 'time', 'mroute3': 'route', 'mascent3': 'ascent', 'msmtnote3': 'note'}, axis=1)
ascent_3_df['number'] = 3

In [14]:
df_ascents = pd.concat([ascent_1_df, ascent_2_df, ascent_3_df], ignore_index=True)

In [15]:
df_ascents.head()

Unnamed: 0,expid,climber_id,date,time,route,ascent,note,number
0,AMAD78301_1978,1,1978-10-21,,1,0,,1
1,AMAD78301_1978,3,1978-10-21,,1,0,,1
2,AMAD78301_1978,5,1978-10-20,,1,0,,1
3,AMAD78301_1978,6,1978-10-20,,1,0,,1
4,AMAD78301_1978,7,1978-10-21,,1,0,,1


In [16]:
df = df.drop(
	['msmtdate1', 'msmttime1', 'mroute1', 'mascent1', 'msmtnote1', 'msmtdate2', 'msmttime2', 'mroute2', 'mascent2', 'msmtnote2',
	 'msmtdate3', 'msmttime3', 'mroute3', 'mascent3', 'msmtnote3'], axis=1)\
	.drop_duplicates()

### Calamities
#### Deaths

In [17]:
df_deaths = df.loc[
	df.death,
	['expid', 'climber_id', 'death', 'deathdate', 'deathtime', 'deathtype', 'deathhgtm', 'deathclass', 'ams', 'weather',
	 'deathnote', 'deathrte']]\
	.drop('death', axis=1)\
	.rename({'deathdate': 'date', 'deathtime': 'time', 'deathtype': 'cause', 'deathhgtm': 'altitude', 'deathclass': 'class',
					 'deathnote': 'note', 'deathrte': 'route'}, axis=1)
df_deaths['type'] = 'death'
df_deaths.route = float_to_int(df_deaths.route)

In [18]:
df_deaths.head()

Unnamed: 0,expid,climber_id,date,time,cause,altitude,class,ams,weather,note,route,type
54,AMAD79302_1979,54,1979-10-24,1115,7,6100,3,False,False,Ice block avalanche shortly after 11 am,1,death
135,AMAD83301_1983,133,1983-10-27,2000,4,5300,6,False,False,Fall on scree slope at bottom of N Ridge after...,1,death
145,AMAD83301_1983,143,1983-10-27,2000,4,5300,6,False,False,Fall on scree slope at bottom of N Ridge after...,1,death
200,AMAD85102_1985,197,1985-05-04,930,4,5900,5,False,False,Killed in fall on descent from summit between ...,2,death
349,AMAD88102_1988,344,1988-04-09,1600,4,6200,3,False,True,Fall below C3 (estimate 6200m) during snow squall,1,death


#### Injuries

In [19]:
df_injuries = df.loc[
	df.injury,
	['expid', 'climber_id', 'injury', 'injurydate', 'injurytime', 'injurytype', 'injuryhgtm', 'deathnote']]\
	.drop('injury', axis=1)\
	.rename({'injurydate': 'date', 'injurytime': 'time', 'injurytype': 'cause', 'injuryhgtm': 'altitude', 'deathnote': 'note'},
					axis=1)
df_injuries['type'] = 'injury'

In [20]:
df_injuries.head()

Unnamed: 0,expid,climber_id,date,time,cause,altitude,note,type
52,AMAD79302_1979,52,1979-10-24,,7,6100,Suffered bad rope burn from avalanche,injury
53,AMAD79302_1979,53,1979-10-24,,7,6100,"Suffered broken forearm, badly sprained ankle ...",injury
105,AMAD81302_1981,103,1981-10-23,,3,6000,,injury
123,AMAD83101_1983,121,1983-04-21,,3,6800,Lost seven toes to frostbite,injury
204,AMAD85102_1985,200,,,10,0,Stomach illness,injury


#### Calamities

In [21]:
df_calamities = pd.concat([df_deaths, df_injuries], ignore_index=True)

In [22]:
df_calamities.head()

Unnamed: 0,expid,climber_id,date,time,cause,altitude,class,ams,weather,note,route,type
0,AMAD79302_1979,54,1979-10-24,1115,7,6100,3.0,False,False,Ice block avalanche shortly after 11 am,1,death
1,AMAD83301_1983,133,1983-10-27,2000,4,5300,6.0,False,False,Fall on scree slope at bottom of N Ridge after...,1,death
2,AMAD83301_1983,143,1983-10-27,2000,4,5300,6.0,False,False,Fall on scree slope at bottom of N Ridge after...,1,death
3,AMAD85102_1985,197,1985-05-04,930,4,5900,5.0,False,False,Killed in fall on descent from summit between ...,2,death
4,AMAD88102_1988,344,1988-04-09,1600,4,6200,3.0,False,True,Fall below C3 (estimate 6200m) during snow squall,1,death


In [23]:
df.drop(
	['death', 'deathdate', 'deathtime', 'deathtype', 'deathhgtm', 'deathclass', 'ams', 'weather', 'deathnote', 'deathrte',
	 'injury', 'injurydate', 'injurytime', 'injurytype', 'injuryhgtm', 'deathnote'], axis=1, inplace=True)

### Participations

In [24]:
# df_participations = df[
# 	['expid', 'membid', 'climber_id', 'status', 'leader', 'deputy', 'bconly', 'nottobc', 'support', 'disabled', 'hired', 'sherpa',
# 	 'tibetan', 'msuccess', 'mclaimed', 'mdisputed', 'msolo', 'mtraverse', 'mski', 'mparapente', 'mspeed', 'mhighpt', 'mperhighpt',
# 	 'mo2used', 'mo2none', 'mo2climb', 'mo2descent', 'mo2sleep', 'mo2medical', 'mo2note', 'membermemo', 'necrology', 'msmtbid',
# 	 'msmtterm', 'mchksum']]

In [25]:
df.head()

Unnamed: 0,expid,membid,peakid,myear,mseason,status,leader,deputy,bconly,nottobc,...,mo2descent,mo2sleep,mo2medical,mo2note,membermemo,necrology,msmtbid,msmtterm,mchksum,climber_id
0,AMAD78301_1978,1,AMAD,1978,3,Leader,True,False,False,False,...,False,False,False,,,,1,4,2426937,0
1,AMAD78301_1978,2,AMAD,1978,3,Deputy Leader,False,True,False,False,...,False,False,False,,,,1,4,2426501,1
2,AMAD78301_1978,3,AMAD,1978,3,Climber,False,False,False,False,...,False,False,False,,,,1,4,2431569,2
3,AMAD78301_1978,4,AMAD,1978,3,Exp Doctor,False,False,False,False,...,False,False,False,,,,1,4,2426809,3
4,AMAD78301_1978,5,AMAD,1978,3,Climber,False,False,False,False,...,False,False,False,,,,1,4,2429215,4
