In [1]:
import pandas as pd
from utils import *
from mappings import *

In [2]:
df = read_dbf('../data/raw/peaks.DBF')

In [3]:
df.head()

Unnamed: 0,peakid,pkname,pkname2,location,heightm,heightf,himal,region,open,unlisted,...,peakmemo,pyear,pseason,pexpid,psmtdate,pcountry,psummiters,psmtnote,refermemo,photomemo
0,AMAD,Ama Dablam,Amai Dablang,Khumbu Himal,6814,22356,12,2,True,False,...,"Other map altitudes:\r\n 6814m - HMG-MT, HMG...",1961,1,AMAD61101,Mar 13,"New Zealand, USA, UK","Mike Gill, Wally Romanes, Barry Bishop, Michae...",,,W Face (High 126:5 May 1993)\r\nSE Face (High ...
1,AMPG,Amphu Gyabjen,Amphu Gyabien,Khumbu Himal (N of Ama Dablam),5630,18471,12,2,True,False,...,"Other map altitudes:\r\n 5630m - HMG-Finn, N...",1953,1,AMPG53101,Apr 11,UK,"John Hunt, Tom Bourdillon",,,
2,ANN1,Annapurna I,,Annapurna Himal,8091,26545,1,5,True,False,...,"Other map altitudes:\r\n 8091m - HMG-MT, HMG...",1950,1,ANN150101,Jun 03,France,"Maurice Herzog, Louis Lachenal",,Dyhrenfurth history 1950-1977 (MM 58:44-47 Nov...,S Face (High 122:3 Jan 1993) (Beghin accident)...
3,ANN2,Annapurna II,,Annapurna Himal,7937,26040,1,5,True,False,...,"Other map altitudes:\r\n 7937m - HMG-MT, HMG...",1960,1,ANN260101,May 17,"UK, Nepal","Richard Grant, Chris Bonington, Ang Nyima Sherpa",,Dyhrenfurth history 1960-1976 (MM 51:36-37 Sep...,N Face (MM 51:36 Sep 1976)
4,ANN3,Annapurna III,,Annapurna Himal,7555,24787,1,5,True,False,...,"Other map altitudes:\r\n 7555m - HMG-MT, HMG...",1961,1,ANN361101,May 06,India,"Mohan S. Kohli, Sonam Gyatso, Sonam Girmi",,,S Side (MM 125:11 Jan 1989)\r\nSW Face (MM 71:...


### Mountains and Regions

In [4]:
# store documentation defined mappings in dfs
df_mountains = pd.DataFrame.from_dict(himal_map, orient='index', columns=['name']).reset_index(names='id')
df_regions = pd.DataFrame.from_dict(region_map, orient='index', columns=['name']).reset_index(names='id')

In [5]:
df_mountains.head()

Unnamed: 0,id,name
0,0,Unclassified
1,1,Annapurna
2,2,Api/Byas Risi/Guras
3,3,Damodar
4,4,Dhaulagiri


### Peak Local Names

In [6]:
# explode comma-separated names into scalar values
df_local_names = df[['peakid', 'pkname2']].dropna()
df_local_names['name'] = df_local_names.pkname2.str.split(',')
df_local_names = df_local_names.drop('pkname2', axis=1).explode('name')

In [7]:
df.drop('pkname2', axis=1, inplace=True)

In [8]:
df.head()

Unnamed: 0,peakid,pkname,location,heightm,heightf,himal,region,open,unlisted,trekking,...,peakmemo,pyear,pseason,pexpid,psmtdate,pcountry,psummiters,psmtnote,refermemo,photomemo
0,AMAD,Ama Dablam,Khumbu Himal,6814,22356,12,2,True,False,False,...,"Other map altitudes:\r\n 6814m - HMG-MT, HMG...",1961,1,AMAD61101,Mar 13,"New Zealand, USA, UK","Mike Gill, Wally Romanes, Barry Bishop, Michae...",,,W Face (High 126:5 May 1993)\r\nSE Face (High ...
1,AMPG,Amphu Gyabjen,Khumbu Himal (N of Ama Dablam),5630,18471,12,2,True,False,False,...,"Other map altitudes:\r\n 5630m - HMG-Finn, N...",1953,1,AMPG53101,Apr 11,UK,"John Hunt, Tom Bourdillon",,,
2,ANN1,Annapurna I,Annapurna Himal,8091,26545,1,5,True,False,False,...,"Other map altitudes:\r\n 8091m - HMG-MT, HMG...",1950,1,ANN150101,Jun 03,France,"Maurice Herzog, Louis Lachenal",,Dyhrenfurth history 1950-1977 (MM 58:44-47 Nov...,S Face (High 122:3 Jan 1993) (Beghin accident)...
3,ANN2,Annapurna II,Annapurna Himal,7937,26040,1,5,True,False,False,...,"Other map altitudes:\r\n 7937m - HMG-MT, HMG...",1960,1,ANN260101,May 17,"UK, Nepal","Richard Grant, Chris Bonington, Ang Nyima Sherpa",,Dyhrenfurth history 1960-1976 (MM 51:36-37 Sep...,N Face (MM 51:36 Sep 1976)
4,ANN3,Annapurna III,Annapurna Himal,7555,24787,1,5,True,False,False,...,"Other map altitudes:\r\n 7555m - HMG-MT, HMG...",1961,1,ANN361101,May 06,India,"Mohan S. Kohli, Sonam Gyatso, Sonam Girmi",,,S Side (MM 125:11 Jan 1989)\r\nSW Face (MM 71:...


### Peak Location

In [9]:
# create location id-to-name mapping
df_locations = df[['location']].drop_duplicates(ignore_index=True)\
	.reset_index(names='id')\
	.rename({'location': 'name'}, axis=1)

In [10]:
df_locations.head()

Unnamed: 0,id,name
0,0,Khumbu Himal
1,1,Khumbu Himal (N of Ama Dablam)
2,2,Annapurna Himal
3,3,Annapurna Himal (ENE of Annapurna I)
4,4,Annapurna Himal (S of Annapurna I and Fang)


In [11]:
# replace location names with ids
df = df.merge(df_locations, left_on='location', right_on='name', how='left')\
	.drop(['location', 'name'], axis=1)\
	.rename({'id': 'location_id'}, axis=1)

In [12]:
df.head()

Unnamed: 0,peakid,pkname,heightm,heightf,himal,region,open,unlisted,trekking,trekyear,...,pyear,pseason,pexpid,psmtdate,pcountry,psummiters,psmtnote,refermemo,photomemo,location_id
0,AMAD,Ama Dablam,6814,22356,12,2,True,False,False,,...,1961,1,AMAD61101,Mar 13,"New Zealand, USA, UK","Mike Gill, Wally Romanes, Barry Bishop, Michae...",,,W Face (High 126:5 May 1993)\r\nSE Face (High ...,0
1,AMPG,Amphu Gyabjen,5630,18471,12,2,True,False,False,,...,1953,1,AMPG53101,Apr 11,UK,"John Hunt, Tom Bourdillon",,,,1
2,ANN1,Annapurna I,8091,26545,1,5,True,False,False,,...,1950,1,ANN150101,Jun 03,France,"Maurice Herzog, Louis Lachenal",,Dyhrenfurth history 1950-1977 (MM 58:44-47 Nov...,S Face (High 122:3 Jan 1993) (Beghin accident)...,2
3,ANN2,Annapurna II,7937,26040,1,5,True,False,False,,...,1960,1,ANN260101,May 17,"UK, Nepal","Richard Grant, Chris Bonington, Ang Nyima Sherpa",,Dyhrenfurth history 1960-1976 (MM 51:36-37 Sep...,N Face (MM 51:36 Sep 1976),2
4,ANN3,Annapurna III,7555,24787,1,5,True,False,False,,...,1961,1,ANN361101,May 06,India,"Mohan S. Kohli, Sonam Gyatso, Sonam Girmi",,,S Side (MM 125:11 Jan 1989)\r\nSW Face (MM 71:...,2


### Peak Host

In [13]:
# replace host ids with country names
df['host'] = apply_map(df.phost, host_map).str.split(";")

In [14]:
# explode host names with ids and update country list
df_peak_hosts = df[['peakid', 'host']].explode('host')
df_peak_hosts = update_country_list(df_peak_hosts, 'host')

In [15]:
df.drop(['phost', 'host'], axis=1, inplace=True)

### Cleanup

In [16]:
df = df.drop(['heightf', 'pexpid', 'psmtdate', 'pcountry', 'psummiters', 'psmtnote'], axis=1)\
	.rename({'pkname': 'name', 'heightm': 'height', 'himal': 'mountain_id', 'region': 'region_id'}, axis=1)

In [17]:
df.head()

Unnamed: 0,peakid,name,height,mountain_id,region_id,open,unlisted,trekking,trekyear,restrict,pstatus,peakmemo,pyear,pseason,refermemo,photomemo,location_id
0,AMAD,Ama Dablam,6814,12,2,True,False,False,,,2,"Other map altitudes:\r\n 6814m - HMG-MT, HMG...",1961,1,,W Face (High 126:5 May 1993)\r\nSE Face (High ...,0
1,AMPG,Amphu Gyabjen,5630,12,2,True,False,False,,Opened in 2002,2,"Other map altitudes:\r\n 5630m - HMG-Finn, N...",1953,1,,,1
2,ANN1,Annapurna I,8091,1,5,True,False,False,,,2,"Other map altitudes:\r\n 8091m - HMG-MT, HMG...",1950,1,Dyhrenfurth history 1950-1977 (MM 58:44-47 Nov...,S Face (High 122:3 Jan 1993) (Beghin accident)...,2
3,ANN2,Annapurna II,7937,1,5,True,False,False,,,2,"Other map altitudes:\r\n 7937m - HMG-MT, HMG...",1960,1,Dyhrenfurth history 1960-1976 (MM 51:36-37 Sep...,N Face (MM 51:36 Sep 1976),2
4,ANN3,Annapurna III,7555,1,5,True,False,False,,,2,"Other map altitudes:\r\n 7555m - HMG-MT, HMG...",1961,1,,S Side (MM 125:11 Jan 1989)\r\nSW Face (MM 71:...,2
