In [1]:
from my_packages import signal_elaboration as s
from my_packages.utils import probes, probes_walk, HandlePaths
from my_packages.directory_data import  GetCoordinates, make_generator
from my_packages.my_hdf5 import explore_library


import os
import pandas
import numpy as np
from datetime import datetime
import scipy.signal
import scipy.io
import pickle
import h5py

In [2]:
from pprint import pprint


# exploring the NAS
# you can explore the NAS using cmd line tools:

print("we have the following probes", probes)
print("the structure in which the files are saved is: ")
pprint(probes_walk)

# obtain all possible paths from the json-like structure
data_paths = HandlePaths()(probes_walk)
probe_paths = {probe: HandlePaths(base_path="/NAS/"+probe)(probes_walk[probe]) for probe in probes}
all_paths = HandlePaths()(probes_walk) 
pprint(probe_paths)  
pprint(all_paths)


we have the following probes ['xfb31', 'XFE_04s', 'XFR31']
the structure in which the files are saved is: 
{'XFE_04s': {'r18': {}, 'r18_o': {}},
 'XFR31': {'r18_o': {'along_x': {}, 'along_y': {}}},
 'xfb31': {'r18': {}}}
{'XFE_04s': ['/NAS/XFE_04s/r18', '/NAS/XFE_04s/r18_o'],
 'XFR31': ['/NAS/XFR31/r18_o/along_x', '/NAS/XFR31/r18_o/along_y'],
 'xfb31': ['/NAS/xfb31/r18']}
['/NAS/xfb31/r18',
 '/NAS/XFE_04s/r18',
 '/NAS/XFE_04s/r18_o',
 '/NAS/XFR31/r18_o/along_x',
 '/NAS/XFR31/r18_o/along_y']


In [9]:
## create an object for the probes that can handle all the measurements for each probe
path = "measurements.h5"

data_handlers = dict()
for probe in probes:
    data_handlers[probe] = []
    for measurement in probe_paths[probe]:
        print("current measurement path:", measurement)
        dh = GetCoordinates(measurement)
        data_handlers[probe].append(dh)

        dh.save_to_hdf5(path)        


current measurement path: /NAS/xfb31/r18
the probe contains the following groups:  []
current measurement path: /NAS/XFE_04s/r18


Exception: must give the list of contents

In [5]:
dh = GetCoordinates(all_paths[0])
make_generator(*dh.coordinates["grid"])


<my_packages.directory_data.Batch_Generator at 0x7fa0f0938e50>

In [8]:
dh.save_to_hdf5("measurements.h5")
explore_library("measurements.h5")

the probe contains the following groups:  ['coordinates']
NAME:             xfb31             
Type: GROUP - Subgroups: ['coordinates']
Parent Path: /         
Attributes: 
{}




NAME:       xfb31/coordinates       
Type: GROUP - Subgroups: ['measurement_points', 'x_coordinates', 'y_coordinates']
Parent Path: /xfb31    
Attributes: 
{'creation date': '14/09/2022 21:39:15',
 'description': 'These coordinates were obtained as the coordinates that '
                'appear atleast once among the         measurement points as '
                'found in the names of the csv files',
 'measurement_path': '/NAS/xfb31/r18'}




NAME: xfb31/coordinates/measurement_points
Type:       DATASET       
Parent Path: /xfb31/coordinates
Attributes: 
{}
shape:  (2166,) ____ dtype:  [('x', '<f2'), ('y', '<f2')]




NAME: xfb31/coordinates/x_coordinates
Type:       DATASET       
Parent Path: /xfb31/coordinates
Attributes: 
{}
shape:  (57,) ____ dtype:  float32




NAME: xfb31/coordinates/y_coordinates
T

In [11]:
dh.point_table.dtype

dtype((numpy.record, [('x', '<f2'), ('y', '<f2')]))

In [9]:
from my_packages.my_hdf5 import *
from pprint import pprint

print(get_all_h5())
pprint(see_groups_and_datasets("measurements.h5"))
pprint(see_groups_and_datasets("measurements.h5", "XFE_04s"))

['xfb31.h5', 'measurements.h5']
{'dataset_keys': ['measurement_points'],
 'group_keys': ['XFE_04s', 'XFR31', 'xfb31']}
{'dataset_keys': [], 'group_keys': []}


In [95]:
def save_measurement_info(library, dh, probes, measurement_info={}, group_info={}):
    path = dh.path
    probe = get_probe_from_path(probes, dh.path)
    xcoord = dh.coordinates["x"]; ycoord = dh.coordinates["y"] 
    if not exists(library):
        build_hdf5(name=library, groups=[probe])
    
    if not group_exist(library, probe):
        add_group(library, probe, **group_info)

    now = datetime.now()
    dt_string = now.strftime("%d/%m/%Y %H:%M:%S")

    # open the group
    with h5py.File(library, "a") as f:
        g = f[probe]
        # create the dataset
        # require_dataset is the same as create_dataset. However, if the dataset already exists it does not overwirte.

        #check if the coordinate group already exists
        group_keys = [key for key, items in g.items() if isinstance(items, h5py.Group)]
        print(group_keys)
        if "coordinates" in group_keys:
            res = input("type y to overwrite")

            if res != "y":
                return 
            else:
                del g["coordinates"]

       

        coord_gr = g.create_group("coordinates")
        coord_gr.attrs["creation date"]= dt_string
        coord_gr.attrs["measurement_path"] = path
        coord_gr.attrs["description"] = \
        "These coordinates were obtained as the coordinates that appear atleast once among the \
        measurement points as found in the names of the csv files"

        x_ds=coord_gr.require_dataset("x_coordinates", shape=xcoord.shape, dtype=np.float32, data=xcoord)
        y_ds=coord_gr.require_dataset("y_coordinates", shape=ycoord.shape, dtype=np.float32, data=ycoord)
        points = coord_gr.require_dataset("measurement_points", shape=dh.points.shape, dtype=np.float32, data=dh.points)



    
def get_probe_from_path(probes, path):
    probe_ = [p for p in probes if p in path]
    # check there is one element in the probe list
    try:
        probe = (lambda x: x)(*probe_)
    except:
        raise("probe length is ", len(probe_))
    return probe

save_measurement_info("measurements.h5", dh, probes)


['coordinates']


In [75]:
path= "measurements.h5"
pprint(see_groups_and_datasets(path))
print(group_exist(path, "test"))

remove_group(path, "test")
add_group(path, "test", description = "this group is a test", owner="tomas")

{'dataset_keys': ['measurement_points'],
 'group_keys': ['XFE_04s', 'XFR31', 'test', 'xfb31']}
True
['XFE_04s', 'XFR31', 'test', 'xfb31']


In [8]:
path = "measurements.h5"


def explore_library(path, recursive=True):
    def printall(name, obj):
        print("NAME: {:^30}".format(name))
        print("Type: {:^20}".format(f"GROUP - Subgroups: {list(obj.keys())}" if isinstance(obj, h5py.Group) else "DATASET"))
        print("Parent Path: {:<10}".format(obj.parent.name))
        print("Attributes: ")
        pprint(dict(obj.attrs))
        if isinstance(obj, h5py.Dataset):
            print("shape: ", obj.shape, "____ dtype: ", obj.dtype) 
        print("\n\n\n")



    with h5py.File(path, "r") as f:
        if recursive:
            f.visititems(printall)
        else:
            for name, obj in f.items():
                printall(name, obj)

In [3]:
explore_library("measurements.h5", recursive=True)

NAME:              NAS              
Type: GROUP - Subgroups: ['XFE_04s', 'XFR31', 'xfb31']
Parent Path: /         
Attributes: 
{}




NAME:          NAS/XFE_04s          
Type: GROUP - Subgroups: ['r18', 'r18_o']
Parent Path: /NAS      
Attributes: 
{}




NAME:        NAS/XFE_04s/r18        
Type: GROUP - Subgroups: ['coordinates']
Parent Path: /NAS/XFE_04s
Attributes: 
{'READ': 'info on the probe'}




NAME:  NAS/XFE_04s/r18/coordinates  
Type: GROUP - Subgroups: ['measurement_points', 'x_coordinates', 'y_coordinates']
Parent Path: /NAS/XFE_04s/r18
Attributes: 
{'creation date': '14/09/2022 23:26:21',
 'description': 'These coordinates were obtained as the coordinates that '
                'appear atleast once among the         measurement points as '
                'found in the names of the csv files',
 'measurement_path': '/NAS/XFE_04s/r18',
 'probe': 'XFE_04s'}




NAME: NAS/XFE_04s/r18/coordinates/measurement_points
Type:       DATASET       
Parent Path: /NAS/XFE_04s/r18/co

In [71]:
from datetime import datetime

now = datetime.now()
dt_string = now.strftime("%d/%m/%Y %H:%M:%S")
print("date and time =", dt_string)

date and time = 14/09/2022 17:13:29


In [62]:
f = h5py.File("measurements.h5", "r")

for k, obj in f.items():
    if isinstance(obj, h5py.Dataset):
        print(obj.shape)

f.close()


(2166, 2)


In [73]:
x = dh.coordinates["x"]
y = dh.coordinates["y"]

cc = np.rec.fromarrays([dh.points[:,0]/1e3, dh.points[:,1]/1e3], dtype=[("x", "float16"), ("y", "float16")])

In [74]:
import pandas as pd

pd.DataFrame(cc)

Unnamed: 0,x,y
0,134.0,34.5
1,88.0,64.5
2,124.0,50.5
3,148.0,64.5
4,172.0,44.5
...,...,...
2161,182.0,34.5
2162,122.0,70.5
2163,182.0,32.5
2164,178.0,46.5


In [70]:
pip install pandas

Collecting pandas
  Downloading pandas-1.3.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (11.3 MB)
[K     |████████████████████████████████| 11.3 MB 3.9 MB/s eta 0:00:01
Installing collected packages: pandas
Successfully installed pandas-1.3.5
Note: you may need to restart the kernel to use updated packages.
