In [1]:
import netCDF4 as nc
import netcdftime
import pandas as pd
import numpy as np
from scipy import spatial
np.set_printoptions(precision=3,suppress=True)

import glob
from tqdm import tqdm_notebook as tqdm
import datetime

In [2]:
def coords_to_index(filename, coords):
    # The idea is to make a table, where all the model grid points are stored in a following manner:
    # latitide - longitude - x index - y index
    # And make query to this table
    dset = nc.Dataset(filename, 'r')
    
    lat = dset.variables['nav_lat'][:]  
    lon = dset.variables['nav_lon'][:]
    
    # The following code returns 2x1d arrays for the lat-lon mesh
    lat_mesh = np.dstack(np.meshgrid(np.arange(np.shape(lat)[0]),
                                     np.arange(np.shape(lat)[1]),
                                     indexing='ij'))[:,:,0].ravel()
    
    lon_mesh = np.dstack(np.meshgrid(np.arange(np.shape(lat)[0]),
                                     np.arange(np.shape(lat)[1]),
                                     indexing='ij'))[:,:,1].ravel()
    # stack all the 1d arrays to the table
    array = np.column_stack((lat.ravel(),
                   lon.ravel(),
                   lat_mesh,
                   lon_mesh))
    
    latlonarr = array[:,[0,1]]
    
    # Here the KD-tree algorythm is used for finding the closest spatial point (nearest neighbour)
    # More information about the algorithm
    # https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.KDTree.html
    # input: array(2d array of points to search), query (list-like coordinates to find neighbour)

    tree = spatial.KDTree(latlonarr, leafsize=100)
    result = tree.query(coords)[1]
    
    idxs = array[result][[2,3]].astype(int)
    return idxs

def file_time(dset):
    file_in = dset
    tname = "time_instant"
    
    nctime = dset.variables[tname][:] # get values
    t_unit = dset.variables[tname].units # get unit
    
    try :
        t_cal = dset.variables[tname].calendar
    except AttributeError : # Attribute doesn't exist
        t_cal = u"gregorian" # or standard

    datevar = netcdftime.num2date(nctime,units = t_unit,calendar = t_cal)[0]

    datestr = datevar.strftime('%Y%m%d')
    return datestr

In [3]:
def extract_point(dset, point, variables_list):
    
    filetime = file_time(dset)
    out_list = [filetime]

    for var in variables_list:
        dimensions = dset.variables[var].dimensions

        if  len(dimensions) == 4:
            data = dset.variables[var][0,0,:,:] # choose 2d field for one time moment on the 1st depth layer
            value = data[point[0], point[1]]
            
        elif len(dimensions) == 3:
            data = dset.variables[var][0,:,:] # choose 2d field for one time moment on the 1st depth layer
            value = data[point[0], point[1]]
            
        out_list.append(value)
        
    return out_list

In [4]:
lat = 80
lon = 90
idxs = coords_to_index('ARCTIC_1h_T_grid_T_20170101-20170101.nc', [lat, lon])

In [5]:
# T-grid
filelist = glob.glob('/Users/drigo/ITMO/_disser/surrogate/testdata/ARCTIC_1h_T_grid_T_*.nc')
var_list = ['sossheig','votemper','vosaline']

In [6]:
def makedf(filelist, varlist):
    data = []
    for f in tqdm(filelist):
        dset = nc.Dataset(f, 'r')
        filedata = extract_point(dset, idxs, var_list)
        data.append(filedata)
        dset.close()
        
    df = pd.DataFrame(data=data, columns = ['date']+var_list)
    df['Date'] = pd.to_datetime(df['date'], format='%Y%m%d')
    df.set_index('Date', inplace = True)
    df.drop(['date'], axis = 1, inplace = True)
    return df

In [7]:
makedf(filelist, var_list)

HBox(children=(IntProgress(value=0, max=12), HTML(value='')))




Unnamed: 0_level_0,sossheig,votemper,vosaline
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-01-07,0.961914,-1.474504,28.656054
2017-01-02,0.590346,-1.532974,28.497662
2017-01-04,0.675867,-1.536139,28.518414
2017-01-05,0.823841,-1.534338,28.493067
2017-01-12,0.741309,-1.481854,28.726442
2017-01-10,0.803201,-1.479769,28.517647
2017-01-08,0.954632,-1.484992,28.615772
2017-01-06,0.811704,-1.533235,28.467285
2017-01-03,0.648458,-1.536795,28.531078
2017-01-11,0.744901,-1.509002,28.561533
