To make a kernel with a siphon virtualenv from the terminal:

```
conda create -n siphon siphon
conda activate siphon
conda install ipykernel
python -m ipykernel install --user --name=siphon
```

Then select the `siphon` from the pulldown/window in Jupyter.

In [1]:
from siphon.simplewebservice.wyoming import WyomingUpperAir
from datetime import datetime
import pandas

start_time = datetime(2023, 3, 1, 0)
end_time = datetime(2023, 6, 30, 0)

station = "43371" # Trivandrum, Thiruvananthapuram, 'VOTX'?

df = WyomingUpperAir.request_data(start_time, station) # returns Pandas dataframe

In [2]:
df

Unnamed: 0,pressure,height,temperature,dewpoint,direction,speed,u_wind,v_wind,station,station_number,time,latitude,longitude,elevation,pw
0,1005.0,64,26.2,22.1,0.0,0.0,-0.0,-0.0,,43371,2023-03-01,8.48,76.95,64.0,38.48
1,1000.0,109,25.8,22.8,30.0,2.0,-1.0,-1.732051,,43371,2023-03-01,8.48,76.95,64.0,38.48
2,925.0,793,22.0,17.0,80.0,4.0,-3.939231,-0.6945927,,43371,2023-03-01,8.48,76.95,64.0,38.48
3,900.0,1030,20.2,16.4,75.0,10.0,-9.659258,-2.58819,,43371,2023-03-01,8.48,76.95,64.0,38.48
4,850.0,1523,16.4,15.3,75.0,15.0,-14.488887,-3.882286,,43371,2023-03-01,8.48,76.95,64.0,38.48
5,848.0,1543,16.2,15.2,75.0,15.0,-14.488887,-3.882286,,43371,2023-03-01,8.48,76.95,64.0,38.48
6,841.0,1614,15.6,15.0,80.0,15.0,-14.772116,-2.604723,,43371,2023-03-01,8.48,76.95,64.0,38.48
7,804.0,1992,13.8,12.7,105.0,17.0,-16.420739,4.399924,,43371,2023-03-01,8.48,76.95,64.0,38.48
8,753.0,2543,11.1,9.3,90.0,21.0,-21.0,-1.285879e-15,,43371,2023-03-01,8.48,76.95,64.0,38.48
9,738.0,2712,10.3,8.2,110.0,23.0,-21.61293,7.866463,,43371,2023-03-01,8.48,76.95,64.0,38.48


In [3]:
# for column in df:
#     print( df[column].name )
#     print( df[column].values )

# df[column].shape[0]
df[df.columns[0:8]]

# skip column 8

# scalars


df.columns[9]
type(df[df.columns[9]]) is pandas.core.series.Series

df[df.columns[10]][0].strftime('%Y-%m-%d %H:%M:%S') # convert Datestamp to string
type(df[df.columns[10]][0]) is pandas._libs.tslibs.timestamps.Timestamp
    

True

In [5]:
from netCDF4 import Dataset

"convert Timestamps into string, otherwise just return input."
def Timestamp2String( t ):
    return ( t.strftime('%Y-%m-%d %H:%M:%S') if type(t) is pandas._libs.tslibs.timestamps.Timestamp else t )

"write a sounding dataframe as a NetCDF4 file."
def sounding2nc( df, filename ):

    # Open a new NetCDF file for writing
    nc_file = Dataset(filename, mode='w')

    # Set the attributes from df cols 9...
    for column in df.columns[9:]:
        #  print( "set attribute "+df[column].name+"=", Timestamp2String(df[column][0]) )
        nc_file.setncattr( df[column].name, Timestamp2String(df[column][0]) )

    # or set all at once from a dict: setncatts(self,attdict)

    # Define dimensions for pressure, temperature, etc. variables
    time_dim = nc_file.createDimension('time', 1)
    level_dim = nc_file.createDimension('level', df[df.columns[0]].shape[0])

    units_list = [ 'hPa', 'm', 'degrees C', 'degrees C', 'degrees', 'm/s', 'm/s', 'm/s' ]

    # Create variables for time, level, latitude, longitude, pressure, and temperature
    for i, column in enumerate( df.columns[0:7] ):
        # create the netcdf variable
        var = nc_file.createVariable(df[column].name, 'f4', ('time', 'level'))
        # write the data to the NetCDF file
        var[:] = df[column][:]
        # supply units attributes
        var.units = units_list[i]

    # Close the NetCDF file
    return nc_file.close()

In [None]:
"write a sounding dataframe as a NetCDF4 file."
def sounding2nc( df, filename ):

    # Open a new NetCDF file for writing
    nc_file = Dataset(filename, mode='w')

    # Set the attributes from df cols 9...
    for column in df.columns[9:]:
        #  print( "set attribute "+df[column].name+"=", Timestamp2String(df[column][0]) )
        nc_file.setncattr( df[column].name, Timestamp2String(df[column][0]) )

    # or set all at once from a dict: setncatts(self,attdict)

    # Define dimensions for pressure, temperature, etc. variables
    time_dim = nc_file.createDimension('time', 1)
    level_dim = nc_file.createDimension('level', df[df.columns[0]].shape[0])

    units_list = [ 'hPa', 'm', 'degrees C', 'degrees C', 'degrees', 'm/s', 'm/s', 'm/s' ]

    # Create variables for time, level, latitude, longitude, pressure, and temperature
    for i, column in enumerate( df.columns[0:7] ):
        # create the netcdf variable
        var = nc_file.createVariable(df[column].name, 'f4', ('time', 'level'))
        # write the data to the NetCDF file
        var[:] = df[column][:]
        # supply units attributes
        var.units = units_list[i]

    # Close the NetCDF file
    return nc_file.close()

In [155]:
# get Trivandrum soundings from 2019, write to netcdf

start_time = datetime(2019, 3, 1, 0)
end_time = datetime(2019, 6, 30, 0)
station = "43371" # Trivandrum, Thiruvananthapuram, 'VOTX'?

for dt in pandas.date_range(start_time, end_time, freq='12H'):
    try:
        df = WyomingUpperAir.request_data(dt, station) # returns Pandas dataframe
        sounding2nc( df, "../data/uwyo/trivandrum/Trivandrum"+df.time[0].strftime('%Y%m%d_%H%M')+".nc" )
    except:
        continue
    else:
        continue
        



In [7]:
start_time = datetime(2019, 3, 1, 0)
end_time = datetime(2019, 6, 30, 0)
station = "43063"  # Pune
for dt in pandas.date_range(start_time, end_time, freq='12H'):
    try:
        df = WyomingUpperAir.request_data(dt, station) # returns Pandas dataframe
        sounding2nc( df, "../data/uwyo/pune/Pune"+df.time[0].strftime('%Y%m%d_%H%M')+".nc" )
    except:
        continue
    finally:
        continue

In [8]:
# IGRA2
from siphon.simplewebservice.igra2 import IGRAUpperAir

station = "INM00043192" # Goa
df, header = IGRAUpperAir.request_data(start_time, station)
# ! probably in a different format!

In [47]:
type(df.columns) # pandas.core.indexes.base.Index
# var_indx = pandas.core.indexes.base.Index(['pressure', 'height', 'temperature', 'dewpoint'])
header
df
units_list = [ 'none', 'none', 'seconds?', 'hPa', 'none', 'meters', 'none', 'degrees C', 'none', 'percent', 'degrees', 'm/s', 'datestring', 'm/s', 'm/s', 'degree C']
    
# df.drop(columns='date')
# for i, column in enumerate(df.columns):
#     print( i, column, units_list[i] )

dfd = df.drop(columns='date')
for i, column in enumerate(dfd.columns):
    print( i, column, units_list[i] )

0 lvltyp1 none
1 lvltyp2 none
2 etime seconds?
3 pressure hPa
4 pflag none
5 height meters
6 zflag none
7 temperature degrees C
8 tflag none
9 relative_humidity percent
10 direction degrees
11 speed m/s
12 date datestring
13 u_wind m/s
14 v_wind m/s
15 dewpoint degree C
0 lvltyp1 none
1 lvltyp2 none
2 etime seconds?
3 pressure hPa
4 pflag none
5 height meters
6 zflag none
7 temperature degrees C
8 tflag none
9 relative_humidity percent
10 direction degrees
11 speed m/s
12 u_wind datestring
13 v_wind m/s
14 dewpoint m/s


In [53]:
"write an IGRA2 sounding dataframe as a NetCDF4 file."
def igra2nc( df, header, filename ):

    # Open a new NetCDF file for writing
    nc_file = Dataset(filename, mode='w')

    # Set the attributes from header
    for column in header.columns:
        nc_file.setncattr( header[column].name, Timestamp2String(header[column][0]) )

    # or set all at once from a dict: setncatts(self,attdict)

    # Define dimensions for pressure, temperature, etc. variables
    time_dim = nc_file.createDimension('time', 1)
    level_dim = nc_file.createDimension('level', df[df.columns[0]].shape[0])

    dfd = df.drop(columns='date') # Datestamp redundant and type not allowed in NetCDF
    units_list = [ 'none', 'none', 'seconds?', 'hPa', 'none', 'meters', 'none', 'degrees C', 'none', 'percent', 'degrees', 'm/s', #'datestring', 
                   'm/s', 'm/s', 'degree C']

    # Create variables for time, level, latitude, longitude, pressure, and temperature
    for i, column in enumerate( dfd.columns ):
        # create the netcdf variable
        var = nc_file.createVariable(dfd[column].name, 'f4', ('time', 'level'))
        # write the data to the NetCDF file
        var[:] = dfd[column][:]
        # supply units attributes
        var.units = units_list[i]b

    # Close the NetCDF file
    return nc_file.close()

In [60]:
pandas.date_range(start_time, end_time, freq='12H')

DatetimeIndex(['2019-03-01 00:00:00', '2019-03-01 12:00:00',
               '2019-03-02 00:00:00', '2019-03-02 12:00:00',
               '2019-03-03 00:00:00', '2019-03-03 12:00:00',
               '2019-03-04 00:00:00', '2019-03-04 12:00:00',
               '2019-03-05 00:00:00', '2019-03-05 12:00:00',
               ...
               '2019-06-25 12:00:00', '2019-06-26 00:00:00',
               '2019-06-26 12:00:00', '2019-06-27 00:00:00',
               '2019-06-27 12:00:00', '2019-06-28 00:00:00',
               '2019-06-28 12:00:00', '2019-06-29 00:00:00',
               '2019-06-29 12:00:00', '2019-06-30 00:00:00'],
              dtype='datetime64[ns]', length=243, freq='12H')

In [None]:
start_time = datetime(2019, 3, 1, 0)
end_time = datetime(2019, 6, 30, 0)
station = "INM00043192" # Goa

for dt in pandas.date_range(start_time, end_time, freq='12H'): # freq='D'
    print( dt.strftime('%Y%m%d_%H%M') )
    try:
        df, header = IGRAUpperAir.request_data(dt.to_pydatetime(), station)
        igra2nc( df, header, "../data/igra2/goa/Goa"+dt.strftime('%Y%m%d_%H%M')+".nc" )
    except:
        # print exception!
        continue
    finally:
        continue
        
# IGRA2 downloading is slow.
# There does not appear to be any 12Z soundings.


20190301_0000
20190301_1200
20190302_0000
20190302_1200
20190303_0000
20190303_1200
20190304_0000
20190304_1200
20190305_0000
20190305_1200
20190306_0000
20190306_1200
20190307_0000
20190307_1200
20190308_0000
20190308_1200
20190309_0000
20190309_1200
20190310_0000
20190310_1200
20190311_0000
20190311_1200
20190312_0000
20190312_1200
20190313_0000
20190313_1200
20190314_0000
20190314_1200
20190315_0000
