In [5]:
import json

with open('data.json', 'r') as file:
    data = json.loads(file.read())

In [7]:
metadata = data.get("metadata", {})
metadata

{'doi': '10.1234/esrf.2025.001',
 'facility': 'ESRF',
 'PI': 'Dr. Dmitrii Ivanov'}

In [8]:
experiment_info = {
    "Experiment ID": data["experiment_id"],
    "Beamline": data["beamline"],
    "Date": data["date"],
    "Sample Name": data["sample"]["name"],
    "Sample Type": data["sample"]["type"],
    "DOI": metadata.get("doi", "N/A"),
    "Principal Investigator": metadata.get("PI", "N/A")
}

In [10]:
for key, value in experiment_info.items():
    print(f"{key}: {value}")

Experiment ID: ESRF-2025-001
Beamline: ID23
Date: 2025-03-24
Sample Name: Protein Crystal
Sample Type: Biomolecular
DOI: 10.1234/esrf.2025.001
Principal Investigator: Dr. Dmitrii Ivanov


In [14]:
with open('data_2.json', 'r') as file:
    data2 = json.loads(file.read())

In [17]:
print(json.dumps(data, indent=4))

{
    "experiment_id": "ESRF-2025-001",
    "beamline": "ID23",
    "date": "2025-03-24",
    "sample": {
        "name": "Protein Crystal",
        "type": "Biomolecular",
        "temperature": -173,
        "xray_wavelength": 0.98
    },
    "metadata": {
        "doi": "10.1234/esrf.2025.001",
        "facility": "ESRF",
        "PI": "Dr. Dmitrii Ivanov"
    },
    "data": [
        {
            "scan_id": 1,
            "intensity": 12000,
            "wavelength": 0.98
        },
        {
            "scan_id": 2,
            "intensity": 13500,
            "wavelength": 1.0
        }
    ]
}


In [18]:
experiment_id = data["experiment_id"]
beamline = data["beamline"]
sample_name = data["sample"]["name"]

print(f"Experiment ID: {experiment_id}")
print(f"Beamline: {beamline}")
print(f"Sample Name: {sample_name}")

Experiment ID: ESRF-2025-001
Beamline: ID23
Sample Name: Protein Crystal


In [19]:
for key, value in data2.items():
    print(f"{key}: {value}")

experiment_id: ESRF-2025-001
beamline: ID11
researchers: [{'name': 'Dr. John Smith', 'institution': 'ESRF', 'role': 'Principal Investigator'}, {'name': 'Dr. Alice Doe', 'institution': 'CNRS', 'role': 'Collaborator'}]
sample: {'name': 'Perovskite Thin Film', 'composition': 'CsPbBr3', 'preparation_date': '2025-03-15'}
measurements: [{'timestamp': '2025-03-16T14:05:00', 'temperature': 300, 'intensity': 1200}, {'timestamp': '2025-03-16T14:10:00', 'temperature': 310, 'intensity': 1250}, {'timestamp': '2025-03-16T14:15:00', 'temperature': 320, 'intensity': 1300}]


In [21]:
for researcher in data2["researchers"]:
    print(f"{researcher['name']} ({researcher['institution']}) - {researcher['role']}")

Dr. John Smith (ESRF) - Principal Investigator
Dr. Alice Doe (CNRS) - Collaborator


In [23]:
import pandas as pd

df = pd.DataFrame(data2['measurements'])

In [25]:
df['timestamp'] = pd.to_datetime(df['timestamp'])

In [26]:
df

Unnamed: 0,timestamp,temperature,intensity
0,2025-03-16 14:05:00,300,1200
1,2025-03-16 14:10:00,310,1250
2,2025-03-16 14:15:00,320,1300


In [28]:
new_measurement = {
    "timestamp": "2020-01-01T00:00:00",
    "temperature": 25,
    "pressure": 1000
}
data2['measurements'].append(new_measurement)

In [29]:
data2['measurements']

[{'timestamp': '2025-03-16T14:05:00', 'temperature': 300, 'intensity': 1200},
 {'timestamp': '2025-03-16T14:10:00', 'temperature': 310, 'intensity': 1250},
 {'timestamp': '2025-03-16T14:15:00', 'temperature': 320, 'intensity': 1300},
 {'timestamp': '2020-01-01T00:00:00', 'temperature': 25, 'pressure': 1000}]

In [30]:
df = pd.DataFrame(data2['measurements'])

In [31]:
df

Unnamed: 0,timestamp,temperature,intensity,pressure
0,2025-03-16T14:05:00,300,1200.0,
1,2025-03-16T14:10:00,310,1250.0,
2,2025-03-16T14:15:00,320,1300.0,
3,2020-01-01T00:00:00,25,,1000.0


In [32]:
with open('updated_data.json', 'w') as file:
    json.dump(data2, file, indent=4)

In [34]:
import h5py
import numpy as np

In [67]:
with h5py.File('data.h5', 'w') as hdf:
    # Create a group for experiment metadata
    meta_group = hdf.create_group("experiment_metadata")
    meta_group.attrs["experiment_id"] = "ESRF-2025-001"
    meta_group.attrs["beamline"] = "ID11"

    # Create a dataset for sample details
    sample_group = hdf.create_group("sample")
    sample_group.attrs["name"] = "Perovskite Thin Film"
    sample_group.attrs["composition"] = "CsPbBr3"
    sample_group.attrs["preparation_date"] = "2025-03-15"

    # Create a dataset for X-ray measurements
    measurements_group = hdf.create_group("measurements")
    timestamps = np.array(["2025-03-16T14:05:00", "2025-03-16T14:10:00", "2025-03-16T14:15:00"], dtype="S")
    temperatures = np.array([300, 310, 320], dtype=np.float32)
    intensities = np.array([1200, 1250, 1300], dtype=np.float32)

    measurements_group.create_dataset("timestamps", data=timestamps,maxshape=(None,), chunks=True)
    measurements_group.create_dataset("temperatures", data=temperatures,maxshape=(None,), chunks=True)
    measurements_group.create_dataset("intensities", data=intensities,maxshape=(None,), chunks=True)

In [68]:
with h5py.File('data.h5', 'r') as hdf:
    print("Keys:", list(hdf.keys()))

Keys: ['experiment_metadata', 'measurements', 'sample']


In [69]:
with h5py.File('data.h5', 'r') as hdf:
    meta_group = hdf['experiment_metadata']
    print("Experiment ID:", meta_group.attrs['experiment_id'])
    print("Beamline:", meta_group.attrs['beamline'])

    sample_group = hdf['sample']
    print("Sample Name:", sample_group.attrs['name'])
    print("Composition:", sample_group.attrs['composition'])

    measurements_group = hdf['measurements']
    print('Datasets in measurements:', list(measurements_group.keys()))

Experiment ID: ESRF-2025-001
Beamline: ID11
Sample Name: Perovskite Thin Film
Composition: CsPbBr3
Datasets in measurements: ['intensities', 'temperatures', 'timestamps']


In [70]:
with h5py.File('data.h5', 'r') as hdf:
    measurements_group = hdf['measurements']
    timestamps = [t.decode('utf-8') for t in measurements_group['timestamps'][:]]
    temperatures = measurements_group['temperatures'][:]
    intensities = measurements_group['intensities'][:]

    df = pd.DataFrame({
        'Timestamp': timestamps,
        'Temerature': temperatures,
        'Intensity': intensities
    })

In [71]:
df

Unnamed: 0,Timestamp,Temerature,Intensity
0,2025-03-16T14:05:00,300.0,1200.0
1,2025-03-16T14:10:00,310.0,1250.0
2,2025-03-16T14:15:00,320.0,1300.0


In [72]:
new_timestamp = "2025-03-16T14:20:00"
new_temperature = 330
new_intensity = 1350

In [73]:
with h5py.File('data.h5', 'a') as hdf:
    measurements_group = hdf['measurements']
    measurements_group['timestamps'].resize(measurements_group['timestamps'].shape[0] + 1, axis=0)
    measurements_group["temperatures"].resize(measurements_group["temperatures"].shape[0] + 1, axis=0)
    measurements_group["intensities"].resize(measurements_group["intensities"].shape[0] + 1, axis=0)

    measurements_group["timestamps"][-1] = new_timestamp
    measurements_group["temperatures"][-1] = new_temperature
    measurements_group["intensities"][-1] = new_intensity

In [74]:
with h5py.File('data.h5', 'r') as hdf:
    measurements_group = hdf['measurements']
    timestamps = [t.decode('utf-8') for t in measurements_group['timestamps'][:]]
    temperatures = measurements_group['temperatures'][:]
    intensities = measurements_group['intensities'][:]

    df = pd.DataFrame({
        'Timestamp': timestamps,
        'Temerature': temperatures,
        'Intensity': intensities
    })

In [75]:
df

Unnamed: 0,Timestamp,Temerature,Intensity
0,2025-03-16T14:05:00,300.0,1200.0
1,2025-03-16T14:10:00,310.0,1250.0
2,2025-03-16T14:15:00,320.0,1300.0
3,2025-03-16T14:20:00,330.0,1350.0


In [121]:
timestamps = np.array(["2025-03-16T14:05:00", "2025-03-16T14:10:00", "2025-03-16T14:15:00"], dtype="S")
temperatures = np.array([295, 310, 320], dtype=np.float32)
intensities = np.array([1200, 1250, 1350], dtype=np.float32)
researchers = np.array(["Alice Smith", "Bob Johnson", "Charlie Brown"], dtype="S")

with h5py.File('beamline_experiment.h5', 'w') as hdf:
    experiment_metadata = hdf.create_group('experiment_metadata')
    experiment_metadata.attrs['experiment_id'] = 'ESRF-2025-03'
    experiment_metadata.attrs['beamline'] = 'ID31'
    experiment_metadata.create_dataset('researchers', data=researchers, maxshape=(None,), chunks=True)

    sample = hdf.create_group('sample')
    sample.attrs['name'] = 'perovskite thin film'
    sample.attrs['composition'] = 'CsPbBr3'

    measurements = hdf.create_group('measurements')
    measurements.create_dataset('Timestamp', data=timestamps, maxshape=(None,), chunks=True)
    measurements.create_dataset('Temperature (°C)', data=temperatures, maxshape=(None,), chunks=True)
    measurements.create_dataset('Intensity (counts)', data=intensities, maxshape=(None,), chunks=True)

In [122]:
with h5py.File('beamline_experiment.h5', 'r') as hdf:
    measurements_group = hdf['measurements']
    timestamps = [t.decode('utf-8') for t in measurements_group['Timestamp'][:]]
    temperatures = measurements_group['Temperature (°C)'][:]
    intensities = measurements_group['Intensity (counts)'][:]

    df = pd.DataFrame({
        'Timestamp': timestamps,
        'Temperature (°C)': temperatures,
        'Intensity (counts)': intensities
    })

In [123]:
df

Unnamed: 0,Timestamp,Temperature (°C),Intensity (counts)
0,2025-03-16T14:05:00,295.0,1200.0
1,2025-03-16T14:10:00,310.0,1250.0
2,2025-03-16T14:15:00,320.0,1350.0


In [124]:
df_to_add = pd.DataFrame({
    'Timestamp': ["2025-03-16T14:20:00", "2025-03-16T14:25:00", "2025-03-16T14:30:00"],
    'Temperature (°C)': [330, 340, 350],
    'Intensity (counts)': [1400, 1450, 1500]
})

with h5py.File('beamline_experiment.h5', 'a') as hdf:
    measurements_group = hdf['measurements']
    timestamps = measurements_group['Timestamp'][:]
    temperatures = measurements_group['Temperature (°C)'][:]
    intensities = measurements_group['Intensity (counts)'][:]

    new_timestamps = [ts.encode('utf-8') for ts in df_to_add['Timestamp'].values]
    new_temperatures = df_to_add['Temperature (°C)'].values
    new_intensities = df_to_add['Intensity (counts)'].values

    old_size = measurements_group['Timestamp'].shape[0]
    new_size = old_size + len(new_timestamps)

    measurements_group['Timestamp'].resize(new_size, axis=0)
    measurements_group['Temperature (°C)'].resize(new_size, axis=0)
    measurements_group['Intensity (counts)'].resize(new_size, axis=0)

    measurements_group['Timestamp'][old_size:new_size] = new_timestamps
    measurements_group['Temperature (°C)'][old_size:new_size] = new_temperatures
    measurements_group['Intensity (counts)'][old_size:new_size] = new_intensities

In [125]:
with h5py.File('beamline_experiment.h5', 'r') as hdf:
    experiment_metadata = hdf['experiment_metadata']
    experiment_id = experiment_metadata.attrs['experiment_id']
    beamline = experiment_metadata.attrs['beamline']
    researchers = [r.decode('utf-8') for r in experiment_metadata['researchers'][:]]

    measurements = hdf['measurements']
    timestamps = [t.decode('utf-8') for t in measurements['Timestamp'][:]]
    temperatures = measurements['Temperature (°C)'][:]
    intensities = measurements['Intensity (counts)'][:]

    print("Experiment ID:", experiment_id)
    print("Beamline:", beamline)
    print("Researchers:", researchers)

    df = pd.DataFrame({
        'Timestamp': timestamps,
        'Temperature (°C)': temperatures,
        'Intensity (counts)': intensities
    })

    print(f"Lowest T: {df['Temperature (°C)'].min()} and Highest T: {df['Temperature (°C)'].max()}")
    print(f"For Intensity > 1300:\n {df[df['Intensity (counts)'] > 1300]}")

Experiment ID: ESRF-2025-03
Beamline: ID31
Researchers: ['Alice Smith', 'Bob Johnson', 'Charlie Brown']
Lowest T: 295.0 and Highest T: 350.0
For Intensity > 1300:
              Timestamp  Temperature (°C)  Intensity (counts)
2  2025-03-16T14:15:00             320.0              1350.0
3  2025-03-16T14:20:00             330.0              1400.0
4  2025-03-16T14:25:00             340.0              1450.0
5  2025-03-16T14:30:00             350.0              1500.0


In [126]:
df

Unnamed: 0,Timestamp,Temperature (°C),Intensity (counts)
0,2025-03-16T14:05:00,295.0,1200.0
1,2025-03-16T14:10:00,310.0,1250.0
2,2025-03-16T14:15:00,320.0,1350.0
3,2025-03-16T14:20:00,330.0,1400.0
4,2025-03-16T14:25:00,340.0,1450.0
5,2025-03-16T14:30:00,350.0,1500.0
