In [225]:
import pandas as pd
from utils import *
from mappings import *

In [226]:
df = read_dbf('../data/raw/members.DBF')

In [227]:
df.head()

Unnamed: 0,expid,membid,peakid,myear,mseason,fname,lname,sex,age,birthdate,...,membermemo,necrology,msmtbid,msmtterm,hcn,mchksum,msmtnote1,msmtnote2,msmtnote3,deathrte
0,AMAD78301,1,AMAD,1978,3,Jean Robert,Clemenson,M,0,,...,,,1,4,0,2426937,,,,
1,AMAD78301,2,AMAD,1978,3,Bernard,Dufour,M,0,,...,,,1,4,0,2426501,,,,
2,AMAD78301,3,AMAD,1978,3,Philippe,Gerard,M,0,,...,,,1,4,0,2431569,,,,
3,AMAD78301,4,AMAD,1978,3,Eric,Lasserre,M,0,,...,,,1,4,0,2426809,,,,
4,AMAD78301,5,AMAD,1978,3,Guy,Peters,M,0,,...,,,1,4,0,2429215,,,,


### Primary Key

In [228]:
df.expid = df.expid.str.cat(df.myear.astype(str), sep='_')

In [229]:
df.head()

Unnamed: 0,expid,membid,peakid,myear,mseason,fname,lname,sex,age,birthdate,...,membermemo,necrology,msmtbid,msmtterm,hcn,mchksum,msmtnote1,msmtnote2,msmtnote3,deathrte
0,AMAD78301_1978,1,AMAD,1978,3,Jean Robert,Clemenson,M,0,,...,,,1,4,0,2426937,,,,
1,AMAD78301_1978,2,AMAD,1978,3,Bernard,Dufour,M,0,,...,,,1,4,0,2426501,,,,
2,AMAD78301_1978,3,AMAD,1978,3,Philippe,Gerard,M,0,,...,,,1,4,0,2431569,,,,
3,AMAD78301_1978,4,AMAD,1978,3,Eric,Lasserre,M,0,,...,,,1,4,0,2426809,,,,
4,AMAD78301_1978,5,AMAD,1978,3,Guy,Peters,M,0,,...,,,1,4,0,2429215,,,,


### Climbers


In [230]:
# isolate columns pertaining to an individual climber
df_climbers = df[['fname', 'lname', 'sex', 'yob', 'occupation', 'residence', 'citizen', 'hcn']]\
	.drop_duplicates(ignore_index=True)\
	.reset_index(names='id')

In [231]:
# swap climber info for climber id in expeditions df
df = df.merge(df_climbers, how='left')\
	.rename({'id': 'climber_id'}, axis=1)\
	.drop(['fname', 'lname', 'sex', 'yob', 'occupation', 'residence', 'citizen', 'age', 'calcage', 'birthdate', 'hcn'], axis=1)

#### Citizenship

In [232]:
# explode slash-separated countries into scalar values
df_citizenships = df_climbers[['id', 'citizen']].drop_duplicates().rename({'id': 'climber_id'}, axis=1)
df_citizenships.citizen = df_citizenships.citizen.str.split('/')
df_citizenships = df_citizenships.explode('citizen').drop_duplicates(ignore_index=True)

In [233]:
df_citizenships = update_country_list(df_citizenships, 'citizen')

In [234]:
df_climbers.drop('citizen', axis=1, inplace=True)

### Ascensions

In [235]:
ascent_1_df = df.loc[
	df.msmtdate1.notna() |
	df.msmttime1.notna() |
	(df.mroute1.notna() & df.mroute1 != 0) |
	(df.mascent1.notna() & df.mascent1 != 0),
	['expid', 'climber_id', 'msmtdate1', 'msmttime1', 'mroute1', 'mascent1', 'msmtnote1']]\
	.rename({'msmtdate1': 'date', 'msmttime1': 'time', 'mroute1': 'route', 'mascent1': 'ascent', 'msmtnote1': 'note'}, axis=1)
ascent_1_df['number'] = 1

In [236]:
ascent_2_df = df.loc[
	df.msmtdate2.notna() |
	df.msmttime2.notna() |
	(df.mroute2.notna() & df.mroute2 != 0) |
	(df.mascent2.notna() & df.mascent2 != 0),
	['expid', 'climber_id', 'msmtdate2', 'msmttime2', 'mroute2', 'mascent2', 'msmtnote2']]\
	.rename({'msmtdate2': 'date', 'msmttime2': 'time', 'mroute2': 'route', 'mascent2': 'ascent', 'msmtnote2': 'note'}, axis=1)
ascent_2_df['number'] = 2

In [237]:
ascent_3_df = df.loc[
	df.msmtdate3.notna() |
	df.msmttime3.notna() |
	(df.mroute3.notna() & df.mroute3 != 0) |
	(df.mascent3.notna() & df.mascent3 != 0),
	['expid', 'climber_id', 'msmtdate3', 'msmttime3', 'mroute3', 'mascent3', 'msmtnote3']]\
	.rename({'msmtdate3': 'date', 'msmttime3': 'time', 'mroute3': 'route', 'mascent3': 'ascent', 'msmtnote3': 'note'}, axis=1)
ascent_3_df['number'] = 3

In [238]:
df_ascents = pd.concat([ascent_1_df, ascent_2_df, ascent_3_df], ignore_index=True)

In [239]:
df_ascents.head()

Unnamed: 0,expid,climber_id,date,time,route,ascent,note,number
0,AMAD78301_1978,1,1978-10-21,,1,0,,1
1,AMAD78301_1978,3,1978-10-21,,1,0,,1
2,AMAD78301_1978,5,1978-10-20,,1,0,,1
3,AMAD78301_1978,6,1978-10-20,,1,0,,1
4,AMAD78301_1978,7,1978-10-21,,1,0,,1


In [240]:
df = df.drop(
	['msmtdate1', 'msmttime1', 'mroute1', 'mascent1', 'msmtnote1', 'msmtdate2', 'msmttime2', 'mroute2', 'mascent2', 'msmtnote2',
	 'msmtdate3', 'msmttime3', 'mroute3', 'mascent3', 'msmtnote3'], axis=1)\
	.drop_duplicates()

### Participations

In [241]:
df.head()

Unnamed: 0,expid,membid,peakid,myear,mseason,status,leader,deputy,bconly,nottobc,...,injurytype,injuryhgtm,deathnote,membermemo,necrology,msmtbid,msmtterm,mchksum,deathrte,climber_id
0,AMAD78301_1978,1,AMAD,1978,3,Leader,True,False,False,False,...,0,0,,,,1,4,2426937,,0
1,AMAD78301_1978,2,AMAD,1978,3,Deputy Leader,False,True,False,False,...,0,0,,,,1,4,2426501,,1
2,AMAD78301_1978,3,AMAD,1978,3,Climber,False,False,False,False,...,0,0,,,,1,4,2431569,,2
3,AMAD78301_1978,4,AMAD,1978,3,Exp Doctor,False,False,False,False,...,0,0,,,,1,4,2426809,,3
4,AMAD78301_1978,5,AMAD,1978,3,Climber,False,False,False,False,...,0,0,,,,1,4,2429215,,4


In [242]:
df.columns

Index(['expid', 'membid', 'peakid', 'myear', 'mseason', 'status', 'leader',
       'deputy', 'bconly', 'nottobc', 'support', 'disabled', 'hired', 'sherpa',
       'tibetan', 'msuccess', 'mclaimed', 'mdisputed', 'msolo', 'mtraverse',
       'mski', 'mparapente', 'mspeed', 'mhighpt', 'mperhighpt', 'mo2used',
       'mo2none', 'mo2climb', 'mo2descent', 'mo2sleep', 'mo2medical',
       'mo2note', 'death', 'deathdate', 'deathtime', 'deathtype', 'deathhgtm',
       'deathclass', 'ams', 'weather', 'injury', 'injurydate', 'injurytime',
       'injurytype', 'injuryhgtm', 'deathnote', 'membermemo', 'necrology',
       'msmtbid', 'msmtterm', 'mchksum', 'deathrte', 'climber_id'],
      dtype='object')

In [243]:
# df_participations = df[
# 	['expid', 'membid', 'climber_id', 'status', 'leader', 'deputy', 'bconly', 'nottobc', 'support', 'disabled', 'hired', 'sherpa',
# 	 'tibetan', 'msuccess', 'mclaimed', 'mdisputed', 'msolo', 'mtraverse', 'mski', 'mparapente', 'mspeed', 'mhighpt', 'mperhighpt',
# 	 'mo2used', 'mo2none', 'mo2climb', 'mo2descent', 'mo2sleep', 'mo2medical', 'mo2note', 'membermemo', 'necrology', 'msmtbid', 'msmtterm', 'mchksum', 'deathrte']
# ]