# Download thate from thredds Norway

In [1]:
import requests
import pandas as pd
import datetime
import numpy as np
import xarray as xr
import netCDF4
import matplotlib.pyplot as plt
import tqdm

In [40]:
def haversine(lat1, lon1, lat2, lon2):
    # return: distance [km] between two points, assuming earth is a sphere
    # https://stackoverflow.com/questions/19412462/getting-distance-between-two-points-based-on-latitude-longitude
    R = 6373.0 # approximate radius of earth in km
    lat1 = np.radians(lat1)
    lon1 = np.radians(lon1)
    lat2 = np.radians(lat2)
    lon2 = np.radians(lon2)
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = np.sin(dlat / 2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2)**2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
    distance = R * c
    return abs(distance)

In [2]:
# Eksample using lat and lon from frost data:

# set id used to communicate with FROST (string)
# you need your own ID to do this
with open('/home/erlend/frost_id', 'r') as infile:
    clientID = infile.read().split()[0]

def get_metadata_station(ids):
    url = "https://frost.met.no/sources/v0.jsonld"
    headers = {"Accept": "application/json", "Authorization": f"Basic {clientID}"}
    params = {
        "types": "SensorSystem",
        "ids": ids,
    }
    r = requests.get(url, params, headers=headers, auth=(clientID, ""))
    return r.json()
dd = get_metadata_station('SN17850') # Søråsjordet meteorological station
lon, lat = dd['data'][0]['geometry']['coordinates']

In [39]:
dd

{'@context': 'https://frost.met.no/schema',
 '@type': 'SourceResponse',
 'apiVersion': 'v0',
 'license': 'https://creativecommons.org/licenses/by/3.0/no/',
 'createdAt': '2023-11-21T14:01:54Z',
 'queryTime': 1.468,
 'currentItemCount': 1,
 'itemsPerPage': 1,
 'offset': 0,
 'totalItemCount': 1,
 'currentLink': 'https://frost.met.no/sources/v0.jsonld?types=SensorSystem&ids=SN17850',
 'data': [{'@type': 'SensorSystem',
   'id': 'SN17850',
   'name': 'ÅS',
   'shortName': 'Ås (NMBU)',
   'country': 'Norge',
   'countryCode': 'NO',
   'wmoId': 1463,
   'geometry': {'@type': 'Point',
    'coordinates': [10.7818, 59.6605],
    'nearest': False},
   'masl': 92,
   'validFrom': '1874-01-01T00:00:00.000Z',
   'county': 'VIKEN',
   'countyId': 30,
   'municipality': 'ÅS',
   'municipalityId': 3021,
   'stationHolders': ['NORGES MILJØ- OG BIOVITENSKAPELIGE UNIVERSITET',
    'MET.NO'],
   'externalIds': ['0-20000-0-01463', '10.240.40.51', '1463'],
   'wigosId': '0-20000-0-01463'}]}

In [3]:
points = {'SN17850':[lat, lon]} # add more pairs to get more data.. 

In [4]:
points['SN17850'][0]

59.6605

In [5]:
# specify which days to download data from
# edit code below to specify hours
start = '2021-12-01'
stop = '2021-12-09'

# days to iterate
days = []
for month in pd.date_range(start, stop).tolist():
    days.append(month)

In [6]:
# get lat/lon grid for thredds data (asuuming the grid stays the same for all timesteps) 
link = 'https://thredds.met.no/thredds/dodsC/metppltcarchivev1/2021/12/31/met_analysis_ltc_1_0km_nordic_20211231T23Z.nc'
ds = netCDF4.Dataset(link)
lat_norway = ds['latitude'][:].data
lon_norway = ds['longitude'][:].data

In [7]:
# shift coordinates so they approximate midpoint of the grids
# just simpler logic for me.. 
lat_norway_shift = (lat_norway[0:-1, :] + 0.5*(lat_norway[1:, :] - lat_norway[0:-1, :]))[:, 0:-1]
lon_norway_shift = (lon_norway[:, 0:-1] + 0.5*(lon_norway[:, 1:] - lon_norway[:, 0:-1]))[0:-1, :]

# find possitions of all points in thredds grid
x_indices = []
y_indices = []
for point_id in tqdm.tqdm(points.keys()):
    # get indice for this disdrometer
    lat1 = points[point_id][0]
    lon1 = points[point_id][1]
    
    index = np.argmin(haversine(lat1, lon1, lat_norway_shift, lon_norway_shift)) 
    y, x = np.unravel_index(index, lon_norway_shift.shape)
    x_indices.append(x)
    y_indices.append(y)

100%|█| 1/1 [00:00<0


In [18]:
# then for each cml and each day: download data
# here I show how to do it with air temp and precipitation, just add what you need in addition
data = []
for i, point_id in enumerate(points.keys()):
    times = []
    air_temperature_2m = []
    precipitation_amount = []
    
    for date in tqdm.tqdm(days): 
        day = date.strftime('%d')
        month = date.strftime('%m')
        year = date.strftime('%Y')
        
        for t in np.arange(0, 24):
            # setup link: 
            link = 'https://thredds.met.no/thredds/dodsC/metppltcarchivev1/'+ year + '/' + month + '/' + day + '/met_analysis_ltc_1_0km_nordic_'+year+month+day+'T%02dZ.nc' % t
            ds = netCDF4.Dataset(link)
            
            # get times for this day
            times.append(ds['time'][:].data)

            # download data ,add nan if error
            try:
                air_temperature_2m.append(ds.variables['air_temperature_2m'][:, y_indices[i], x_indices[i]].data)
            except:
                print('yes')
                air_temperature_2m.append(np.zeros(times[-1].shape)*np.nan)            
            try:
                precipitation_amount.append(ds.variables['precipitation_amount'][:, y_indices[i], x_indices[i]].data)
            except:
                precipitation_amount.append(np.zeros(times[-1].shape)*np.nan)
        
    air_temperature_2m = np.concatenate(air_temperature_2m)
    precipitation_amount = np.concatenate(precipitation_amount)
    times = np.concatenate(times) 
    times = [datetime.datetime.utcfromtimestamp(i) for i in times]
    
    da = xr.Dataset(
        data_vars= dict(
            air_temperature_2m=(["point_id", 'time'], [air_temperature_2m - 273.15]), # to celsius
            precipitation_amount=(["point_id", 'time'], [precipitation_amount]), 
        ),
        coords=dict(
            point_id = [point_id],
            time = times,
        ),
    ) 
    
    # resample and slice to same timescale as CMLs
    data.append( da.resample(time = '1T').nearest())
    

100%|█| 9/9 [00:51<0


In [19]:
da

In [20]:
#if several points, concat along stations aswell
stations_ds = xr.combine_nested(da, concat_dim='point_id')

ValueError: concat_dims has length 1 but the datasets passed are nested in a 0-dimensional structure

In [38]:
# Inspect that midpoint approx is ok: 
# # export coordinates and name of the gauges close to CML
# lat= lat_norway_shift.ravel()[0:5000]
# lon = lon_norway_shift.ravel()[0:5000]
# name = da.point_id.values
# df = pd.DataFrame([[name, 'POINT (' + str(lon[i]) + str(' ')+ str(lat[i]) + str( ')')] for i in range(len(lon))])
# df.to_csv("/home/erlend/delete/grid_shift_norway.csv", sep=';', index=False)


# lat= lat_norway.ravel()[0:5000]
# lon = lon_norway.ravel()[0:5000]
# name = da.point_id.values
# df = pd.DataFrame([[name, 'POINT (' + str(lon[i]) + str(' ')+ str(lat[i]) + str( ')')] for i in range(len(lon))])
# df.to_csv("/home/erlend/delete/grid_norway.csv", sep=';', index=False)
