In [16]:
import pandas as pd
from io import StringIO

# Fetching all LOS from Beyond Horizons
with open('../data/raw/beyond_horizons.txt', 'r', encoding='utf-8') as f:
    htmlString = f.read()

html = StringIO(htmlString)
rawlos = pd.read_html(html)[0]

rawlos.head()

Unnamed: 0,FROM,TO,DIST,POST
0,Dankova – 5934m. Hindu Kush (KY),Hindu Tagh – 6421m. N/A (CH),542 km.,
1,HinduTagh – 5420m. HinduKush (CH),Kongur Tube – 7530m. HinduKush (CH),506 km.,
2,Alto Mora – 3367m. Colombia (CO),Pico La Reina – 5535m. S.N. St. Marta (CO),505 km.,
3,Alto Mora – 3367m. Colombia (CO),Pico Cristobal Colon – 5775m. S.N. St. Marta (CO),502 km.,
4,Kongur Tagh – 7649m. Hindu Kush (CH),N/N – 5017m. HinduKush (CH),500 km.,


In [17]:
# Formatting LOS from Beyond Horizons with photographic evidence
knownLos = rawlos.dropna(subset=['POST']).copy()

knownLos = knownLos.rename(columns={'DIST': 'distance'})
knownLos['distance'] = knownLos['distance'].str[:-4].astype(int)

from_split1 = knownLos['FROM'].str.split(r'm\.\s*', expand=True)
from_split2 = from_split1[0].str.split(r'\s*–\s*', expand=True)

knownLos['from'] = from_split2[0]
knownLos['from_area'] = from_split1[1]
knownLos['from_elevation'] = from_split2[1]

to_split1 = knownLos['TO'].str.split(r'm\.\s*', expand=True)
to_split2 = to_split1[0].str.split(r'\s*–\s*', expand=True)

knownLos['to'] = to_split2[0]
knownLos['to_area'] = to_split1[1]
knownLos['to_elevation'] = to_split2[1]

knownLos = knownLos.drop_duplicates()

knownLos['from_latitude'] = None
knownLos['from_longitude'] = None
knownLos['to_latitude'] = None
knownLos['to_longitude'] = None

knownLos = knownLos[['from', 'from_area', 'from_elevation', 'from_latitude', 'from_longitude', 
                     'to', 'to_area', 'to_elevation', 'to_latitude', 'to_longitude', 
                     'distance']]

# Adding known extreme LOS not included by Beyond Horizons
knownLos.loc[len(knownLos)] = ['Marcy', 'New York (US)', 1629, 44.11275, -73.92371,
                               'Washington', 'New Hampshire', 1916, 44.27049, -71.30327, 
                               210]

knownLos = knownLos.sort_values('distance', ascending=False)

  knownLos.loc[len(knownLos)] = ['Marcy', 'New York (US)', 1629, 44.11275, -73.92371,


In [18]:
print(f'There are {len(knownLos)} confirmed extreme lines of sight for atmospheric light curvature analysis.')

There are 73 confirmed extreme lines of sight for atmospheric light curvature analysis.


In [19]:
knownLos.head()

Unnamed: 0,from,from_area,from_elevation,from_latitude,from_longitude,to,to_area,to_elevation,to_latitude,to_longitude,distance
10,Finestrelles,Pirineos E (ES),2828,,,Pic Gaspard,Alpes (FR),3867,,,443
14,Bastiments,Pirineos E (ES),2881,,,Doigt de Dieu,Alpes (FR),4102,,,436
16,Bastiments,Pirineos E (ES),2881,,,Barre des Ecrins,Alpes (FR),4102,,,432
20,Canigou,Pirineos E (FR),2786,,,Pic Gaspard,Alpes (FR),3867,,,416
23,Canigou,Pirineos E (FR),2786,,,Barre des Ecrins,Alpes (FR),4102,,,412


In [9]:
knownLos.to_csv('../data/clean/known_los.csv', index=False)