[IGRA (Integrated Global Radiosonde Archive) data](https://www1.ncdc.noaa.gov/pub/data/igra/)

In [7]:
import sys
sys.path.append('../python/')
from IGRA import Monthly
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt
from cartopy import crs

In [14]:
%matplotlib nbagg

In [166]:
def anplot(x, ax=None, color=None):
    ax = plt.gca() if ax is None else ax
    p = ax.scatter(*x.as_matrix().T, marker='o', transform=crs.PlateCarree(), color=color)
    for i, st in x.dropna().iterrows():
        ax.annotate(i, xy=st, xycoords=crs.PlateCarree()._as_mpl_transform(ax), color=p.get_facecolor()[0])
    ax.coastlines()
    ax.gridlines()
    ax.set_extent((-180, 180, -65, -90), crs.PlateCarree())
    return p

In [93]:
def set_logic(a, b):
    a, b = a.dropna(), b.dropna()
    i = a.index.intersection(b.index)
    return a.loc[a.index.difference(i)], b.loc[b.index.difference(i)], a.loc[i]

## Stations

In [12]:
# IGRA (radiosonde archive)
ista = Monthly.read_stations('../../data/IGRA/igra2-station-list.txt').query('lat<=-60')

# READER (BAS) and NCDC
with pd.HDFStore('../../data/Antarctica/stations.h5') as S:
    sta = S['sta']

In [9]:
ista.loc[89611]

id            AYM00089611
lat              -66.2825
lon               110.523
elev                   40
state                 NaN
name                CASEY
first_year           1957
last_year            2017
nobs                39636
Name: 89611, dtype: object

In [2]:
x = Monthly.tar_to_xarray('../../data/IGRA/IGRA-mly.tar.gz', ['AYM00089611'])

### NCDC vs READER stations

In [170]:
ncdc_sta, read_sta, both_sta = set_logic(sta[['longitude', 'latitude']], sta[['Longitude', 'Latitude']])

plt.figure()
ax = plt.axes(projection=crs.SouthPolarStereo())

plt.legend([anplot(ncdc_sta), anplot(read_sta), anplot(both_sta)],
    ['NCDC only', 'READER only', 'both'])

plt.tight_layout()

<IPython.core.display.Javascript object>

In [154]:
with pd.HDFStore('../../data/Antarctica/READER.h5') as S:
    reader = pd.concat((S['sfc'], S['aws']), 1)
    # drop station with less than 12 months of data
    reader.drop(
        (reader.groupby(axis=1, level=0).apply(lambda x:x.count().max()) < 12).replace(False, float('nan')).dropna().index,
        1, 0, inplace=True
    )

In [163]:
reader.xs('msl_pressure', 1, 1).dropna(0, 'all').columns.get_level_values(0).unique()

Int64Index([88958, 89057, 89524, 88967, 89034, 89050, 93947, 89611, 89571,
            88938, 89642, 88963, 89063, 89252, 68906, 89058, 88903, 89022,
            89053, 89251, 89657, 94998, 89055, 68994, 89662, 89056, 89564,
            89664, 89592, 89542, 89002, 89512, 89059, 88968, 89061, 89062,
            89132, 89066, 89665, 89042, 89532, 89573],
           dtype='int64')

In [180]:
w = set(z[0] for z in filter(lambda s:re.search('wind', s[1]), reader.columns.tolist()))
ncdc_wind, read_wind, both_wind = set_logic(
    sta[sta.WIND==True][['longitude', 'latitude']],
    sta.loc[w][['Longitude', 'Latitude']]
)

fig, axs = plt.subplots(1, 2, figsize=(10, 6), subplot_kw={'projection': crs.SouthPolarStereo()})

axs[0].legend([anplot(ncdc_wind, axs[0]), anplot(read_wind, axs[0]), anplot(both_wind, axs[0])],
           ['NCDC only', 'READER only', 'both'])
axs[0].set_title('wind')

ll = ['Longitude', 'Latitude']
axs[1].legend(
    [anplot(sta.loc[reader.xs(s, 1, 1).dropna(0, 'all').columns.get_level_values(0).unique(), ll], axs[1])
     for s in ['msl_pressure', 'station_level_pressure', 'pressure']],
    ['sea level', 'station level', 'aws'])
axs[1].set_title('pressure')

plt.tight_layout()

<IPython.core.display.Javascript object>