/
station_data.py
207 lines (173 loc) · 7.43 KB
/
station_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
# Copyright (c) 2019 MetPy Developers.
# Distributed under the terms of the BSD 3-Clause License.
# SPDX-License-Identifier: BSD-3-Clause
"""Pull out station metadata."""
from collections import ChainMap, namedtuple
from collections.abc import Mapping
from functools import cached_property
import numpy as np
import pandas as pd
from ..cbook import get_test_data
from ..package_tools import Exporter
from ..units import units
exporter = Exporter(globals())
Station = namedtuple('Station', ['id', 'synop_id', 'name', 'state', 'country',
'longitude', 'latitude', 'altitude', 'source'])
def to_dec_deg(dms):
"""Convert to decimal degrees."""
if not dms:
return 0.
deg, minutes = dms.split()
side = minutes[-1]
minutes = minutes[:2]
float_deg = int(deg) + int(minutes) / 60.
return float_deg if side in ('N', 'E') else -float_deg
def _read_station_table(input_file=None):
"""Read in the GEMPAK station table.
Yields tuple of station ID and `Station` for each entry.
"""
if input_file is None:
input_file = get_test_data('sfstns.tbl', as_file_obj=False)
with open(input_file) as station_file:
for line in station_file:
stid = line[:9].strip()
synop_id = int(line[9:16].strip())
name = line[16:49].strip()
state = line[49:52].strip()
country = line[52:55].strip()
lat = int(line[55:61].strip()) / 100.
lon = int(line[61:68].strip()) / 100.
alt = int(line[68:74].strip())
yield stid, Station(stid, synop_id=synop_id, name=name.title(), latitude=lat,
longitude=lon, altitude=alt, country=country, state=state,
source=input_file)
def _read_master_text_file(input_file=None):
"""Read in the master text file.
Yields tuple of station ID and `Station` for each entry.
"""
if input_file is None:
input_file = get_test_data('master.txt', as_file_obj=False)
with open(input_file) as station_file:
station_file.readline()
for line in station_file:
state = line[:3].strip()
name = line[3:20].strip().replace('_', ' ')
stid = line[20:25].strip()
synop_id = line[32:38].strip()
lat = to_dec_deg(line[39:46].strip())
lon = to_dec_deg(line[47:55].strip())
alt_part = line[55:60].strip()
alt = int(alt_part or 0.)
if stid:
if stid[0] in ('P', 'K'):
country = 'US'
else:
country = state
state = '--'
yield stid, Station(stid, synop_id=synop_id, name=name.title(), latitude=lat,
longitude=lon, altitude=alt, country=country, state=state,
source=input_file)
def _read_station_text_file(input_file=None):
"""Read the station text file.
Yields tuple of station ID and `Station` for each entry.
"""
if input_file is None:
input_file = get_test_data('stations.txt', as_file_obj=False)
with open(input_file) as station_file:
for line in station_file:
if line[0] == '!':
continue
lat = line[39:45].strip()
if not lat or lat == 'LAT':
continue
lat = to_dec_deg(lat)
state = line[:3].strip()
name = line[3:20].strip().replace('_', ' ')
stid = line[20:25].strip()
synop_id = line[32:38].strip()
lon = to_dec_deg(line[47:55].strip())
alt = int(line[55:60].strip())
country = line[81:83].strip()
yield stid, Station(stid, synop_id=synop_id, name=name.title(), latitude=lat,
longitude=lon, altitude=alt, country=country, state=state,
source=input_file)
def _read_airports_file(input_file=None):
"""Read the airports file."""
if input_file is None:
input_file = get_test_data('airport-codes.csv', as_file_obj=False)
df = pd.read_csv(input_file)
return pd.DataFrame({'id': df.ident.values, 'synop_id': 99999,
'latitude': df.latitude_deg.values,
'longitude': df.longitude_deg.values,
'altitude': units.Quantity(df.elevation_ft.values, 'ft').to('m').m,
'country': df.iso_region.str.split('-', n=1, expand=True)[1].values,
'source': input_file
}).to_dict()
@exporter.export
class StationLookup(Mapping):
"""Look up station information from multiple sources.
This class follows the `Mapping` protocol with station ID as the key. This makes it
possible to e.g. iterate over all locations and get all of a certain criteria:
>>> import metpy.io
>>> conus_stations = [s for s in metpy.io.station_info if s.startswith('K')]
>>> conus_stations[:3]
['KEET', 'K8A0', 'KALX']
"""
@cached_property
def tables(self):
"""Return an iterable mapping combining all the tables."""
return ChainMap(dict(_read_station_table()),
dict(_read_master_text_file()),
dict(_read_station_text_file()),
dict(_read_airports_file()))
def __len__(self):
"""Get the number of stations."""
return len(self.tables)
def __iter__(self):
"""Allow iteration over the stations."""
return iter(self.tables)
def __getitem__(self, stid):
"""Lookup station information from the ID."""
try:
return self.tables[stid]
except KeyError:
raise KeyError(f'No station information for {stid}') from None
with exporter:
station_info = StationLookup()
@exporter.export
def add_station_lat_lon(df, stn_var=None):
"""Lookup station information to add the station latitude and longitude to the DataFrame.
This function will add two columns to the DataFrame ('latitude' and 'longitude') after
looking up all unique station identifiers available in the DataFrame.
Parameters
----------
df : `pandas.DataFrame`
The DataFrame that contains the station observations
stn_var : str, optional
The string of the variable name that represents the station in the DataFrame. If not
provided, 'station', 'stid', and 'station_id' are tried in that order.
Returns
-------
`pandas.DataFrame` that contains original Dataframe now with the latitude and longitude
values for each location found in :data:`!station_info`.
"""
def key_finder(df):
names_to_try = ('station', 'stid', 'station_id')
for id_name in names_to_try:
if id_name in df:
return id_name
raise KeyError('Second argument not provided to add_station_lat_lon, but none of '
f'{names_to_try} were found.')
df['latitude'] = np.nan
df['longitude'] = np.nan
if stn_var is None:
stn_var = key_finder(df)
for stn in df[stn_var].unique():
try:
info = station_info[stn]
df.loc[df[stn_var] == stn, 'latitude'] = info.latitude
df.loc[df[stn_var] == stn, 'longitude'] = info.longitude
except KeyError:
df.loc[df[stn_var] == stn, 'latitude'] = np.nan
df.loc[df[stn_var] == stn, 'longitude'] = np.nan
return df