# This notebook provides a recipe for creating netcdf files of profiles by looping through the files on in /results2, /results, /opp
- It has been heavily generalised to provide a general recipe for making the profiles, rather than providing explicit functions that do the job.
- I would estimate that it would take about an hour to concatenate a year's worth of data. Consider a multiprocessing.py approach to creating multiple profiles on a queue. 

In [42]:
import xarray as xr
import numpy as np
from dateutil.parser import parse
# import the file path location from the forcing_paths.py script
import importlib.util
spec = importlib.util.spec_from_file_location("forcing_paths.py", "/ocean/abhudia/MEOPAR/analysis-ashutosh/scripts/make-hdf5/forcing_paths.py")
paths = importlib.util.module_from_spec(spec)
spec.loader.exec_module(paths)

# produce profiles using a multiprocessing queue

import time
import multiprocessing

def timer(func):
    """Decorator function for timing function calls
    """
    def f(*args, **kwargs):
        beganat = time.time()
        rv = func(*args, *kwargs)
        elapsed = time.time() - beganat
        hours = int(elapsed / 3600)
        mins = int((elapsed - (hours*3600))/60)
        secs = int((elapsed - (hours*3600) - (mins*60)))
        print('\nTime elapsed: {}:{}:{}\n'.format(hours, mins, secs))
        return rv
    return f

#### Get a list of the files to loop through using the functions in the forcing_paths.py script

#### For the timestart, timeend positional arguments, pass a datetime.datetime object natively or simply use parse

#### for SSC profiles, the filetype argument is 'grid_U', 'grid_V', 'grid_W', 'grid_T' for U, W, V T parameters respectively; e.g. for U files, 
```python
paths.salishseacast_paths(timestart = parse('1 Jan 2015'), timeend = parse('3 jan 2015'), path = '/results2/SalishSea/nowcast-green.201806/', filetype = 'grid_U')
```
#### for WaveWatch files, 
```python
paths.ww3_paths(timestart = parse('1 Jan 2015'), timeend = parse('3 jan 2015'), path = '/opp/wwatch3/nowcast/')
```
#### for HRDPS files,
```python
paths.hrdps_paths(timestart = parse('1 Jan 2015'), timeend = parse('3 jan 2015'), path = '/results/forcing/atmospheric/GEM2.5/operational/')
```

## Let's Step through creating time profiles of surface U current

In [25]:
# Creates a list of paths to, e.g.' grid_U' files between "start_time" and "end_time"
start_time = '1 june 2015'
end_time = '20 june 2015'
output_netcdf = 'timeseries_' + '_'.join(start_time.split()) + '_' + '_'.join(end_time.split()) + '.nc'
U_paths = paths.salishseacast_paths(parse(start_time), parse(end_time), '/results2/SalishSea/nowcast-green.201806/', 'grid_U')

### From analysis-ashutosh/climatology_analysis_notebooks/Pick grid points.ipynb, we have the locations of the three points chosen as: ((y,x) order)

In [26]:
SB=(np.array([256]), np.array([268]))
TP=(np.array([343]), np.array([250]))
SoG=(np.array([474]), np.array([252]))

### Loop through the list of files and create a netcdf file of the profile

In [43]:
@timer
def make_profile(start_time, end_time, output_netcdf):
    U_paths = paths.salishseacast_paths(parse(start_time), parse(end_time), '/results2/SalishSea/nowcast-green.201806/', 'grid_U')
    first = True
    for file_path in U_paths:
        f = xr.open_dataset(file_path).isel(depthu = 0).vozocrtx
        sog_now = f.isel(y = 256, x = 268)
        if first is True:
            sog = sog_now
            first = False
        else:
            sog = xr.concat((sog, sog_now), dim = 'time_counter')
    # finally, stitch them together and turn the profile into a netcdf file
    current_u = xr.Dataset({'SoG': sog})
    current_u.to_netcdf(output_netcdf, format = 'NETCDF4',engine = 'netcdf4')
    print(output_netcdf)

In [28]:
# loading the netcdf file containing the profile
xr.open_dataset(output_netcdf)

<xarray.Dataset>
Dimensions:        (time_counter: 480)
Coordinates:
    nav_lat        float32 ...
    nav_lon        float32 ...
    depthu         float32 ...
    time_centered  (time_counter) datetime64[ns] ...
  * time_counter   (time_counter) datetime64[ns] 2015-06-01T00:30:00 ... 2015-06-20T23:30:00
Data variables:
    SoG            (time_counter) float32 ...

In [44]:
def manage_queue(remaining, workers, current=[]):
    print(len(remaining))
    while ((len(current) != 0)  or  (len(remaining) != 0)):
        if ((len(current) != workers) and (len(remaining) != 0)):
            new_task = remaining.pop()
            new_task.start()
            current.append(new_task)
            continue
        for task in current:
            if task.is_alive():
                continue
            else:
                try:
                    task.join()
                    current.remove(task)
                except RuntimeError as err:
                    if 'cannot join current thread' in err.args[0]:
                        continue
                    else:
                        raise
        time.sleep(1)
    
# num_processes_alive = Number of cores to use in multiproccessing
# args is list of arguments used an input target function, which in this case is "make_profile".  
#    If number of args > num_processes_alive then the job is queued and managed by "manage_queue"
# multiprocessing.Process feeds the arguments to the function.
if __name__ == '__main__':
    num_processes_alive = 2
    processes= []
    args = [('1 jan 2015', '7 jan 2015', 'jan2015.nc') ,('1 jan 2016', '7 jan 2016', 'jan2016.nc'), ('1 jan 2017', '7 jan 2017', 'jan2017.nc'), ('1 jan 2018', '7 jan 2018', 'jan2018.nc')]
    for i in args:
        p = multiprocessing.Process(target = make_profile, args = i)
        processes.append(p)
    manage_queue(processes, num_processes_alive)

4
jan2018.nc

Time elapsed: 0:0:26

jan2017.nc

Time elapsed: 0:0:28

jan2016.nc

Time elapsed: 0:0:17

jan2015.nc

Time elapsed: 0:0:17

