In [7]:
from my_packages import signal_elaboration as s
from my_packages.utils import probes, probes_walk, HandlePaths
from my_packages.directory_data import  GetCoordinates, make_generator
from my_packages.my_hdf5 import explore_library, see_groups_and_datasets


import os
import pandas
import numpy as np
from datetime import datetime
import scipy.signal
import scipy.io
import pickle
import h5py

In [8]:
from pprint import pprint


# exploring the NAS
# you can explore the NAS using cmd line tools:

print("we have the following probes", probes)
print("the structure in which the files are saved is: ")
pprint(probes_walk)

# obtain all possible paths from the json-like structure
data_paths = HandlePaths()(probes_walk)
probe_paths = {probe: HandlePaths(base_path="/NAS/"+probe)(probes_walk[probe]) for probe in probes}
all_paths = HandlePaths()(probes_walk) 
pprint(probe_paths)  
pprint(all_paths)


we have the following probes ['xfb31', 'XFE_04s', 'XFR31']
the structure in which the files are saved is: 
{'XFE_04s': {'r18': {}, 'r18_o': {}},
 'XFR31': {'r18_o': {'along_x': {}, 'along_y': {}}},
 'xfb31': {'r18': {}}}
{'XFE_04s': ['/NAS/XFE_04s/r18', '/NAS/XFE_04s/r18_o'],
 'XFR31': ['/NAS/XFR31/r18_o/along_x', '/NAS/XFR31/r18_o/along_y'],
 'xfb31': ['/NAS/xfb31/r18']}
['/NAS/xfb31/r18',
 '/NAS/XFE_04s/r18',
 '/NAS/XFE_04s/r18_o',
 '/NAS/XFR31/r18_o/along_x',
 '/NAS/XFR31/r18_o/along_y']


In [38]:
## create an object for the probes that can handle all the measurements for each probe
file = "measurements.h5"

def make_all_generators(file):
    coordinates_dict = make_coord_dict(file)
    return {k:make_generator(*np.meshgrid(coordinates[k]["x"], coordinates[k]["y"])) for k in coordinates.keys()}

def make_coord_dict(file):
    with h5py.File(file, "r") as f:
        return {
            group: dict(
                x=np.array(f[group]["coordinates"]["x_coordinates"]),
                y=np.array(f[group]["coordinates"]["y_coordinates"])
            )

            for group in see_groups_and_datasets(file)["group_keys"]
        }
    




make_all_generators(file)
              


{'XFE_04s': <my_packages.directory_data.Batch_Generator at 0x7fd4a7aeb990>,
 'XFR31_along_x': <my_packages.directory_data.Batch_Generator at 0x7fd3415a3590>,
 'XFR31_along_y': <my_packages.directory_data.Batch_Generator at 0x7fd2fa973a90>,
 'xfb31': <my_packages.directory_data.Batch_Generator at 0x7fd2b3d44f90>}

In [36]:
pprint(generators)

next(generators["XFE_04s"])

{'XFE_04s': <my_packages.directory_data.Batch_Generator object at 0x7fd4a7aa5ed0>,
 'XFR31_along_x': <my_packages.directory_data.Batch_Generator object at 0x7fd45a319d50>,
 'XFR31_along_y': <my_packages.directory_data.Batch_Generator object at 0x7fd415aab2d0>,
 'xfb31': <my_packages.directory_data.Batch_Generator object at 0x7fd3cee7c7d0>}


['x86000.00y20500.001.csv',
 'x86000.00y20500.002.csv',
 'x86000.00y20500.003.csv',
 'x86000.00y20500.004.csv',
 'x86000.00y20500.005.csv',
 'x86000.00y20500.006.csv',
 'x86000.00y20500.007.csv',
 'x86000.00y20500.008.csv',
 'x86000.00y20500.009.csv',
 'x86000.00y20500.0010.csv',
 'x86000.00y20500.0011.csv',
 'x86000.00y20500.0012.csv',
 'x86000.00y20500.0013.csv',
 'x86000.00y20500.0014.csv',
 'x86000.00y20500.0015.csv',
 'x86000.00y20500.0016.csv',
 'x86000.00y20500.0017.csv',
 'x86000.00y20500.0018.csv',
 'x86000.00y20500.0019.csv',
 'x86000.00y20500.0020.csv',
 'x86000.00y20500.0021.csv',
 'x86000.00y20500.0022.csv',
 'x86000.00y20500.0023.csv',
 'x86000.00y20500.0024.csv',
 'x86000.00y20500.0025.csv',
 'x86000.00y20500.0026.csv',
 'x86000.00y20500.0027.csv',
 'x86000.00y20500.0028.csv',
 'x86000.00y20500.0029.csv',
 'x86000.00y20500.0030.csv',
 'x86000.00y20500.0031.csv',
 'x86000.00y20500.0032.csv',
 'x86000.00y20500.0033.csv',
 'x86000.00y20500.0034.csv',
 'x86000.00y20500.0035.

In [29]:
def get_ds_dictionaries(name, node):
    fullname = node.name
    if isinstance(node, h5py.Dataset):
    # node is a dataset
        ds_dict[fullname] = np.array(node)
    else:
        pass

with h5py.File("measurements.h5", "r") as f:
    ds_dict = dict()
    f.visititems(get_ds_dictionaries)

pprint(ds_dict)

{'/XFE_04s/coordinates/measurement_points': array([(114., 46.5), (154., 74.5), (116., 50.5), ..., ( 88., 90.5),
       (118., 68.5), (176., 34.5)], dtype=[('x', '<f2'), ('y', '<f2')]),
 '/XFE_04s/coordinates/x_coordinates': array([ 86.,  88.,  90.,  92.,  94.,  96.,  98., 100., 102., 104., 106.,
       108., 110., 112., 114., 116., 118., 120., 122., 124., 126., 128.,
       130., 132., 134., 136., 138., 140., 142., 144., 146., 148., 150.,
       152., 154., 156., 158., 160., 162., 164., 166., 168., 170., 172.,
       174., 176., 178., 180., 182., 184., 186., 188., 190., 192., 194.,
       196., 198.], dtype=float32),
 '/XFE_04s/coordinates/y_coordinates': array([20.5, 22.5, 24.5, 26.5, 28.5, 30.5, 32.5, 34.5, 36.5, 38.5, 40.5,
       42.5, 44.5, 46.5, 48.5, 50.5, 52.5, 54.5, 56.5, 58.5, 60.5, 62.5,
       64.5, 66.5, 68.5, 70.5, 72.5, 74.5, 76.5, 78.5, 80.5, 82.5, 84.5,
       86.5, 88.5, 90.5, 92.5, 94.5], dtype=float32),
 '/XFE_04s/incomplete/coordinates/measurement_points': array([(

In [5]:
dh = GetCoordinates(all_paths[0])
make_generator(*dh.coordinates["grid"])


<my_packages.directory_data.Batch_Generator at 0x7fa0f0938e50>

In [2]:
explore_library("measurements.h5")

NAME:            XFE_04s            
Type: GROUP - Subgroups: ['coordinates', 'incomplete']
Parent Path: /         
Attributes: 
{}




NAME:      XFE_04s/coordinates      
Type: GROUP - Subgroups: ['measurement_points', 'x_coordinates', 'y_coordinates']
Parent Path: /XFE_04s  
Attributes: 
{'creation date': '15/09/2022 16:10:01',
 'description': 'These coordinates were obtained as the coordinates that '
                'appear atleast once among the         measurement points as '
                'found in the names of the csv files',
 'measurement_path': '/NAS/XFE_04s/r18_o',
 'probe': 'XFE_04s'}




NAME: XFE_04s/coordinates/measurement_points
Type:       DATASET       
Parent Path: /XFE_04s/coordinates
Attributes: 
{}
shape:  (2166,) ____ dtype:  [('x', '<f2'), ('y', '<f2')]




NAME: XFE_04s/coordinates/x_coordinates
Type:       DATASET       
Parent Path: /XFE_04s/coordinates
Attributes: 
{}
shape:  (57,) ____ dtype:  float32




NAME: XFE_04s/coordinates/y_coordinates
Type:     

In [11]:
dh.point_table.dtype

dtype((numpy.record, [('x', '<f2'), ('y', '<f2')]))

In [9]:
from my_packages.my_hdf5 import *
from pprint import pprint

print(get_all_h5())
pprint(see_groups_and_datasets("measurements.h5"))
pprint(see_groups_and_datasets("measurements.h5", "XFE_04s"))

['xfb31.h5', 'measurements.h5']
{'dataset_keys': ['measurement_points'],
 'group_keys': ['XFE_04s', 'XFR31', 'xfb31']}
{'dataset_keys': [], 'group_keys': []}


In [95]:
def save_measurement_info(library, dh, probes, measurement_info={}, group_info={}):
    path = dh.path
    probe = get_probe_from_path(probes, dh.path)
    xcoord = dh.coordinates["x"]; ycoord = dh.coordinates["y"] 
    if not exists(library):
        build_hdf5(name=library, groups=[probe])
    
    if not group_exist(library, probe):
        add_group(library, probe, **group_info)

    now = datetime.now()
    dt_string = now.strftime("%d/%m/%Y %H:%M:%S")

    # open the group
    with h5py.File(library, "a") as f:
        g = f[probe]
        # create the dataset
        # require_dataset is the same as create_dataset. However, if the dataset already exists it does not overwirte.

        #check if the coordinate group already exists
        group_keys = [key for key, items in g.items() if isinstance(items, h5py.Group)]
        print(group_keys)
        if "coordinates" in group_keys:
            res = input("type y to overwrite")

            if res != "y":
                return 
            else:
                del g["coordinates"]

       

        coord_gr = g.create_group("coordinates")
        coord_gr.attrs["creation date"]= dt_string
        coord_gr.attrs["measurement_path"] = path
        coord_gr.attrs["description"] = \
        "These coordinates were obtained as the coordinates that appear atleast once among the \
        measurement points as found in the names of the csv files"

        x_ds=coord_gr.require_dataset("x_coordinates", shape=xcoord.shape, dtype=np.float32, data=xcoord)
        y_ds=coord_gr.require_dataset("y_coordinates", shape=ycoord.shape, dtype=np.float32, data=ycoord)
        points = coord_gr.require_dataset("measurement_points", shape=dh.points.shape, dtype=np.float32, data=dh.points)



    
def get_probe_from_path(probes, path):
    probe_ = [p for p in probes if p in path]
    # check there is one element in the probe list
    try:
        probe = (lambda x: x)(*probe_)
    except:
        raise("probe length is ", len(probe_))
    return probe

save_measurement_info("measurements.h5", dh, probes)


['coordinates']


In [75]:
path= "measurements.h5"
pprint(see_groups_and_datasets(path))
print(group_exist(path, "test"))

remove_group(path, "test")
add_group(path, "test", description = "this group is a test", owner="tomas")

{'dataset_keys': ['measurement_points'],
 'group_keys': ['XFE_04s', 'XFR31', 'test', 'xfb31']}
True
['XFE_04s', 'XFR31', 'test', 'xfb31']


In [8]:
path = "measurements.h5"


def explore_library(path, recursive=True):
    def printall(name, obj):
        print("NAME: {:^30}".format(name))
        print("Type: {:^20}".format(f"GROUP - Subgroups: {list(obj.keys())}" if isinstance(obj, h5py.Group) else "DATASET"))
        print("Parent Path: {:<10}".format(obj.parent.name))
        print("Attributes: ")
        pprint(dict(obj.attrs))
        if isinstance(obj, h5py.Dataset):
            print("shape: ", obj.shape, "____ dtype: ", obj.dtype) 
        print("\n\n\n")



    with h5py.File(path, "r") as f:
        if recursive:
            f.visititems(printall)
        else:
            for name, obj in f.items():
                printall(name, obj)

In [10]:
explore_library("measurements.h5", recursive=True)

NAME:             xfb31             
Type: GROUP - Subgroups: ['coordinates']
Parent Path: /         
Attributes: 
{}




NAME:       xfb31/coordinates       
Type: GROUP - Subgroups: ['measurement_points', 'x_coordinates', 'y_coordinates']
Parent Path: /xfb31    
Attributes: 
{'creation date': '14/09/2022 21:26:36',
 'description': 'These coordinates were obtained as the coordinates that '
                'appear atleast once among the         measurement points as '
                'found in the names of the csv files',
 'measurement_path': '/NAS/xfb31/r18'}




NAME: xfb31/coordinates/measurement_points
Type:       DATASET       
Parent Path: /xfb31/coordinates
Attributes: 
{}
shape:  (2166,) ____ dtype:  float32




NAME: xfb31/coordinates/x_coordinates
Type:       DATASET       
Parent Path: /xfb31/coordinates
Attributes: 
{}
shape:  (57,) ____ dtype:  float32




NAME: xfb31/coordinates/y_coordinates
Type:       DATASET       
Parent Path: /xfb31/coordinates
Attributes: 
{}
shape

In [71]:
from datetime import datetime

now = datetime.now()
dt_string = now.strftime("%d/%m/%Y %H:%M:%S")
print("date and time =", dt_string)

date and time = 14/09/2022 17:13:29


In [62]:
f = h5py.File("measurements.h5", "r")

for k, obj in f.items():
    if isinstance(obj, h5py.Dataset):
        print(obj.shape)

f.close()


(2166, 2)


In [73]:
x = dh.coordinates["x"]
y = dh.coordinates["y"]

cc = np.rec.fromarrays([dh.points[:,0]/1e3, dh.points[:,1]/1e3], dtype=[("x", "float16"), ("y", "float16")])

In [74]:
import pandas as pd

pd.DataFrame(cc)

Unnamed: 0,x,y
0,134.0,34.5
1,88.0,64.5
2,124.0,50.5
3,148.0,64.5
4,172.0,44.5
...,...,...
2161,182.0,34.5
2162,122.0,70.5
2163,182.0,32.5
2164,178.0,46.5


In [70]:
pip install pandas

Collecting pandas
  Downloading pandas-1.3.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (11.3 MB)
[K     |████████████████████████████████| 11.3 MB 3.9 MB/s eta 0:00:01
Installing collected packages: pandas
Successfully installed pandas-1.3.5
Note: you may need to restart the kernel to use updated packages.
