In [14]:
from appgeopy import *
from my_packages import *

In [15]:
# Step 1: Generate Synthetic Data
# -------------------------------
# Load synthetic data for multiple locations and sensors
time_series_data = gwatertools.h5pytools.load_input_data()

# Convert the data to 2D by stacking additional time series as additional columns
for location, sensors in time_series_data.items():
    for sensor, series in sensors.items():
        additional_series = pd.Series(np.random.rand(365) * 5, index=series.index)
        combined_df = pd.DataFrame({'Value1': series, 'Value2': additional_series})
        time_series_data[location][sensor] = combined_df

print("Synthetic 2D time series data generated.")

Synthetic 2D time series data generated.


In [18]:
time_series_data.keys()

dict_keys(['Location_001', 'Location_002', 'Location_003'])

In [17]:
time_series_data['Location_001']["Sensor_001"]

Unnamed: 0,Value1,Value2
2022-01-01,8.753881,0.066434
2022-01-02,6.585007,1.540044
2022-01-03,7.884271,1.029498
2022-01-04,4.362576,2.154528
2022-01-05,2.294721,3.641017
...,...,...
2022-12-27,0.642456,1.791195
2022-12-28,9.587306,2.163248
2022-12-29,0.914990,1.133548
2022-12-30,2.171831,4.843484


In [3]:
# Step 2: Generate Metadata
# -------------------------
# Prepare metadata for each location and each sensor
location_metadata, sensor_metadata = prepare_metadata(time_series_data)
print("Metadata for locations and sensors generated.")

Metadata for locations and sensors generated.


In [4]:
# Step 3: Save Data to HDF5
# -------------------------
# Transform the data for HDF5 storage
transformed_data = transform_data_for_hdf5(time_series_data)

# Initialize and save data to an HDF5 file
hdf5_file_name = 'synthetic_2d_data.h5'
initialize_hdf5_file(hdf5_file_name, transformed_data, location_metadata, sensor_metadata)
print(f"Data and metadata saved to {hdf5_file_name}.")

Data and metadata saved to synthetic_2d_data.h5.


In [5]:
# Step 4: Load and Update Data
# ----------------------------
# Refresh the environment (simulate starting from scratch)
del time_series_data, transformed_data, location_metadata, sensor_metadata

In [8]:
# Load the existing data from the HDF5 file
loaded_data = load_hdf5_data(hdf5_file_name)
print("Loaded existing data from HDF5.")

# Correctly reinitialize sensor_metadata based on loaded data
# Convert loaded numpy arrays back to pandas DataFrame for consistency
sensor_metadata = {}
for location, sensors in loaded_data.items():
    sensor_metadata[location] = {}
    for sensor, data in sensors.items():
        if sensor != 'date':
            # Assuming 'data' is a 2D numpy array, convert it back to DataFrame
            num_columns = data.shape[1]
            column_names = [f'Value{i+1}' for i in range(num_columns)]
            sensor_metadata[location][sensor] = pd.DataFrame(data, columns=column_names)

# Prepare to update data: Adding one more similar-length array to some sensors
updates = {}
for location, sensors in loaded_data.items():
    sensor_updates = {}
    metadata_updates = {}
    
    for sensor, data in sensors.items():
        if sensor != 'date':
            # Convert numpy data back to DataFrame for consistent processing
            df_data = pd.DataFrame(data, columns=[f'Value{i+1}' for i in range(data.shape[1])])
            
            # Decide randomly whether to update this sensor
            if np.random.rand() > 0.5:  # 50% chance to update
                # Generate a 2D array of additional measurements with the same shape as the existing data
                additional_measurements = np.random.rand(*df_data.shape) * 3
                updated_data = df_data.values + additional_measurements
                sensor_updates[sensor] = updated_data
                print(f"Updated measurement data for {location}/{sensor}")
            
            # Update sensor metadata
            metadata_updates[sensor] = {
                'Calibration': np.random.rand() * 10,  # Random calibration value
                'Sensor Status': 'Active',  # Sensor status
                'Last Maintenance': '2024-01-15'  # Maintenance date
            }

    # Prepare updates dictionary
    updates[location] = {
        'sensor_data': sensor_updates,
        'metadata': {
            'Last Updated': pd.Timestamp.now().strftime('%Y-%m-%d'),
            'Updated By': 'Demonstration Script'
        },
        'sensor_metadata': metadata_updates
    }

Loaded existing data from HDF5.
Updated measurement data for Location_001/Sensor_002
Updated measurement data for Location_001/Sensor_003
Updated measurement data for Location_002/Sensor_001
Updated measurement data for Location_003/Sensor_002
Updated measurement data for Location_003/Sensor_003


In [33]:
updates

{'Location_001': {'sensor_data': {'Sensor_002': array([[ 1.42230485,  3.73989421],
          [12.91010172,  7.49650416],
          [12.87623917,  3.33601997],
          [12.84769669,  3.14577556],
          [ 5.86614426,  4.10732898],
          [10.38801126,  3.11070347],
          [ 5.57234804,  4.96609095],
          [ 8.4965641 ,  5.96879334],
          [15.50416575,  4.24835524],
          [13.73018069,  6.3169183 ],
          [ 1.83784844,  6.10654117],
          [13.23188317,  3.58406323],
          [ 2.62357087,  2.63794669],
          [13.85521875,  3.66611007],
          [12.4625589 ,  6.13398768],
          [ 3.61700692,  4.93927299],
          [ 8.3661525 ,  3.08593194],
          [ 9.53249902,  2.42480498],
          [ 2.43424526,  5.52871692],
          [ 5.72031008,  2.71612027],
          [ 7.73824581,  3.18017977],
          [12.7253822 ,  4.77618793],
          [ 8.37869764,  4.34890541],
          [13.27806126,  2.72509263],
          [ 6.00497052,  4.91196666],
     

In [12]:
shutil.copy2(src=hdf5_file_name, dst=hdf5_file_name.replace(".h5", "_secure.h5"))
# Update the HDF5 file with new data and metadata
update_hdf5(hdf5_file_name, updates)
print("HDF5 file updated with new measurement data and metadata.")

HDF5 file updated with new measurement data and metadata.
