diff --git a/.gitignore b/.gitignore index 62582c52..b16726d6 100644 --- a/.gitignore +++ b/.gitignore @@ -3,7 +3,7 @@ development_scripts/*/*.png development_scripts/*/*.svg development_scripts/*/*.csv -docs/source/figures/big_src/* +docs/source/figures/big_src/ diff --git a/.readthedocs.yml b/.readthedocs.yml index 5c05f812..e5612bb8 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -18,8 +18,7 @@ sphinx: # - pdf # Optionally set the version of Python and requirements required to build your docs -# python: -# version: 3.7 -# install: -# - requirements: docs/requirements.txt -# - requirements: docs/requirements.txt \ No newline at end of file +python: + version: 3.6 + install: + - requirements: docs/requirements.txt \ No newline at end of file diff --git a/README.rst b/README.rst index 039c048b..b83842b0 100644 --- a/README.rst +++ b/README.rst @@ -1,18 +1,128 @@ +.. figure:: docs/source/figures/logo.svg + :width: 200 + ============================================= ``ixdat``: The In-situ Experimental Data Tool ============================================= -NOTE: We are currently working mainly on the `[user_ready] `_ branch. Both the `documentation `_ and the `pip installation `_ are presently compiled from that branch while we work through some important details in `a pull request `_. +With ``ixdat``, you can import, combine, and export complex experimental datasets +as simply as:: + + ec = Measurement.read_set("awesome_EC_data", reader="biologic") + ec.plot_measurement() + + ms = Measurement.read("2021-03-30 16_59_35 MS data.tsv", reader="zilien") + ms.plot_measurement() + + ecms = ec + ms + ecms.plot_measurement() + + ecms.export("my_combined_data.csv") + +Output: + +.. figure:: docs/source/figures/ixdat_example_figures.png + :width: 700 + + In-situ experimental data made easy + +Or rather than exporting, you can take advantage of ``ixdat``'s powerful analysis +tools and database backends to be a one-stop tool from messy raw data to public +repository accompanying your breakthrough publication and advancing our field. + +About +----- + +``ixdat`` provides a powerful **object-oriented** interface to experimental data, especially in-situ experimental data for which it is of interest to combine data obtained simultaneously from multiple techniques. + +Documentation is at https://ixdat.readthedocs.io + +In addition to a **pluggable** parser interface for importing your data format, ``ixdat`` also includes +pluggable exporters and plotters, as well as a database interface. A relational model of experimental data is +designed into every level. + +.. list-table:: Techniques and Readers + :widths: 20 15 50 + :header-rows: 1 + + + * - Measurement technique + - Status + - Readers + * - Electrochemistry + - Released + - - biologic: .mpt files from Biologic's EC-Lab software + - autolab: ascii files from AutoLab's NOVA software + - ivium: .txt files from Ivium's IviumSoft software + * - Mass Spectrometry + - Released + - - pfeiffer: .dat files from Pfeiffer Vacuum's PVMassSpec software + - cinfdata: text export from DTU Physics' cinfdata system + - zilien: .tsv files from Spectro Inlets' Zilien software + * - Electrochemistry - Mass Spectrometry + - Released + - - zilien: .tsv files from Spectro Inlets' Zilien software + - EC_MS: .pkl files from the legacy EC_MS python package + * - Spectroelectrochemistry + - Released + - - msrh_sec: .csv file sets from Imperial College London's SEC system + * - X-ray photoelectron spectroscopy (XPS) + - Future + - + * - X-ray diffraction (XRD) + - Future + - + * - Low-Energy Ion Scattering (LEIS) + - Future + - + +Tutorials are described at https://ixdat.readthedocs.io/en/latest/tutorials.html + +Installation +------------ + +To use ``ixdat``, you need to have python installed. We recommend +`Anaconda python `_. + +To install ``ixdat``, just type in your terminal or Anaconda prompt:: + + $ pip install ixdat + +And hit enter. + +``ixdat`` is under development, and to make use of the newest features, +you may need to upgrade to the latest version. This is also easy. Just type:: + + $ pip install --upgrade ixdat + + +Article repositories +-------------------- + +``ixdat`` is shown in practice in a growing number of open repositories of data and analysis +for academic publications: + +- Soren B. Scott, et al. **Tracking oxygen atoms in electrochemical CO oxidation –Part I: Oxygen exchange via CO2 hydration**. `Electrochimica Acta, 374, 137842 `_, **2021**. + + Repository: https://github.com/ScottSoren/pyCOox_public + +- Soren B. Scott, et al. **Tracking oxygen atoms in electrochemical CO oxidation –Part II: Lattice oxygen reactivity in oxides of Pt and Ir**. `Electrochimica Acta, 374, 137844 `_, **2021**. + + Repository: https://github.com/ScottSoren/pyCOox_public + +- Kevin Krempl, et al. **Dynamic Interfacial Reaction Rates from Electrochemistry - Mass Spectrometry**. `Journal of Analytical Chemistry. 93, 7022-7028 `_, **2021** + + Repository: https://github.com/kkrempl/Dynamic-Interfacial-Reaction-Rates -``ixdat`` will provide a powerful **object-oriented** interface to experimental data, especially in-situ experimental data for which it is of interest to combine data obtained simultaneously from multiple techniques. +- Junheng Huang, et al. **Online Electrochemistry−Mass Spectrometry Evaluation of the Acidic Oxygen Evolution Reaction at Supported Catalysts**. `ACS Catal. 11, 12745-12753 `_, **2021** -``ixdat`` will replace the existing electrochemistry - mass spectrometry data tool, `EC_MS `_, and will thus become a powerful stand-alone tool for analysis and visualization of data acquired by the equipment of `Spectro Inlets `_ and other EC-MS solutions. -It will also replace the existing electrochemistry - synchrotron GIXRD data tool, `EC_Xray `_ when needed. -Over time, it will acquire functionality for more and more techniques. + Repository: https://github.com/ScottSoren/Huang2021 -In addition to a **pluggable** parser interface for importing your data format, it will include pluggable exporters and plotters, as well as a database interface. -We will update this README as features are added. More importantly, we will document this project as we develop it here: https://ixdat.readthedocs.io/ +Join us +------- -``ixdat`` is free and open source software and we welcome input and new collaborators. +`ixdat`` is free and open source software and we welcome input and new collaborators. Please help us improve! +Contact us (sbscott@ic.ac.uk) or just +`get started developing `_. \ No newline at end of file diff --git a/TOOLS.rst b/TOOLS.rst index c0c9428f..f1ade5c6 100644 --- a/TOOLS.rst +++ b/TOOLS.rst @@ -12,7 +12,7 @@ development: * **sphinx** is used to build documentation The following is a list of "tool and commands runners": - + * **invoke** is used during development, to run tools and other pre-configured maintenance tasks inside the existing development environment @@ -42,6 +42,8 @@ summary here with our suggestions for usage. Tools ----- +black +..... **black** is an autoformatter, which fixes your white space usage etc. https://black.readthedocs.io/en/stable/ It's nice to run it from the same terminal @@ -50,6 +52,8 @@ before committing (and avoid a "fix formatting" commit later). To get black and the other tools available in git bash, you have to tell ~\.bashrc where it is (see Windows instructions below under git hooks). +flake8 +...... **flake8** is a linter, which checks the code for errors. https://flake8.pycqa.org/en/latest/ This includes @@ -64,9 +68,29 @@ Additional allowances may need to be added there. flake8 also enforces a maximum line length of 89, chosen to match the default setting of black (+/- 1 char). +pytest +...... **pytest** is a suite of stuff used to write and run software tests. https://docs.pytest.org/en/stable/ +sphinx +...... +**sphinx** is used to generate the beautiful documentation on +https://ixdat.readthedocs.io from ReStructuredText and ixdat source code. +To set it up just install sphinx, if you haven't already. In your terminal or Anaconda prompt, type:: + + $ pip install sphinx + +Then, to build the documentation, navigate to ``ixdat/docs``, and run in your terminal or Anaconda prompt:: + + $ ./make html + +Note, if you get an "access denied" error, you will just need to run the terminal as an administrator. + +Then you can see the built documentation in your browser by double-clicking +``ixdat/docs/build/html/index.html`` + + **sphinx** is a tool for building the documentation into html and other formats from restructured text (.rst). It also enables automatic documentation generation from the doc-strings in the code. Read The Docs uses sphinx to compile diff --git a/development_scripts/append_ec_files.py b/development_scripts/append_ec_files.py index b54c21e3..c947ea6b 100644 --- a/development_scripts/append_ec_files.py +++ b/development_scripts/append_ec_files.py @@ -74,7 +74,7 @@ cv_selection = cv[10:16] cv_selection.plot_measurement(j_name="cycle") -cv_selection.redefine_cycle(start_potential=0.4, redox=1) +cv_selection.redefine_cycle(start_potential=0.45, redox=1) cv_selection.plot_measurement(j_name="cycle") ax = cv_selection[1].plot(label="cycle 1") diff --git a/development_scripts/functional_test.py b/development_scripts/functional_test.py index ff407313..aa6fb1d5 100644 --- a/development_scripts/functional_test.py +++ b/development_scripts/functional_test.py @@ -28,7 +28,7 @@ assert np.isclose( ec_measurement.v[0] - ec_measurement["raw_potential"].data[0], - ec_measurement.RE_vs_RHE + ec_measurement.RE_vs_RHE, ) # To make it complex, we first select a couple cycles, this time by converting @@ -36,12 +36,10 @@ cv = ec_measurement.as_cv() cvs_1_plus_2 = cv[1] + cv[2] -# Check that the calibration survived all that: +# Check that the ms_calibration survived all that: assert cvs_1_plus_2.RE_vs_RHE == ec_measurement.RE_vs_RHE # Check that the main time variable, that of potential, wasn't corrupted: -assert len(cvs_1_plus_2.grab("potential")[0]) == len( - cvs_1_plus_2["time/s"].data -) +assert len(cvs_1_plus_2.grab("potential")[0]) == len(cvs_1_plus_2["time/s"].data) # Check that the selector is still available and works with the main time var: assert len(cvs_1_plus_2.selector.data) == len(cvs_1_plus_2.t) diff --git a/development_scripts/reader_testers/test_autolab_reader.py b/development_scripts/reader_testers/test_autolab_reader.py new file mode 100644 index 00000000..a6d21602 --- /dev/null +++ b/development_scripts/reader_testers/test_autolab_reader.py @@ -0,0 +1,14 @@ +"""For use in development of the autolab reader. Requires access to sample data.""" + +from pathlib import Path +from matplotlib import pyplot as plt + +from ixdat import Measurement + +path_to_file = Path.home() / ( + "Dropbox/ixdat_resources/test_data/autolab/autolab_test_file.txt" +) + +meas = Measurement.read(path_to_file, reader="autolab") + +meas.plot() diff --git a/development_scripts/reader_testers/test_cinfdata_reader.py b/development_scripts/reader_testers/test_cinfdata_reader.py new file mode 100644 index 00000000..ef796fcd --- /dev/null +++ b/development_scripts/reader_testers/test_cinfdata_reader.py @@ -0,0 +1,45 @@ +"""For use in development of the cinfdata reader. Requires access to sample data.""" + +from pathlib import Path +from matplotlib import pyplot as plt + +from ixdat import Measurement + +path_to_file = ( + Path.home() + / "Dropbox/ixdat_resources/test_data/cinfdata/Trimarco2018_fig3/QMS_1.txt" +) +ms_meas = Measurement.read(path_to_file, reader="cinfdata") +ms_meas.plot_measurement() + +path_to_ec_file_start = ( + Path.home() / "Dropbox/ixdat_resources/test_data/cinfdata/Trimarco2018_fig3/09_fig4" +) +ec_meas = Measurement.read_set(path_to_ec_file_start, reader="biologic") +ec_meas.calibrate(RE_vs_RHE=0.65, A_el=0.196) +ec_meas.plot_measurement() + +ecms_meas = ec_meas + ms_meas +axes = ecms_meas.plot_measurement( + mass_lists=[["M44", "M2"], ["M4", "M28"]], + tspan_bg=[30, 40], + legend=False, + unit="pA", +) + +axes[0].set_ylim([-7, 70]) +axes[2].set_ylim([-1.8e3, 18e3]) +fig = axes[0].get_figure() +fig.tight_layout() +# fig.savefig("../../docs/source/figures/ec_ms.svg") + +ecms_meas.set_bg(tspan_bg=[0, 10]) + +cv = ecms_meas.as_cv() +cv.redefine_cycle(start_potential=0.39, redox=False) + +axes_cv = cv[2].plot(mass_list=["M2", "M44"], logplot=False, ) +axes_cv = cv[1].plot( + mass_list=["M2", "M44"], linestyle="--", axes=axes_cv, logplot=False, +) +axes_cv[0].get_figure().savefig("Trimarco2018_ixdat.png") diff --git a/development_scripts/reader_testers/test_ivium_reader.py b/development_scripts/reader_testers/test_ivium_reader.py new file mode 100644 index 00000000..27047e65 --- /dev/null +++ b/development_scripts/reader_testers/test_ivium_reader.py @@ -0,0 +1,28 @@ +"""For use in development of the ivium reader. Requires access to sample data.""" + +from pathlib import Path +import pandas as pd + +from ixdat import Measurement +from ixdat.techniques import CyclicVoltammogram + +path_to_file = Path.home() / ( + "Dropbox/ixdat_resources/test_data/ivium/ivium_test_dataset" +) +path_to_single_file = path_to_file.parent / (path_to_file.name + "_1") +df = pd.read_csv(path_to_single_file, sep=r"\s+", header=1) + +meas = Measurement.read(path_to_file, reader="ivium") + +meas.save() + +meas.plot_measurement() + +meas_cv = CyclicVoltammogram.read(path_to_file, reader="ivium") + +meas_cv.save() + +meas_cv.plot_measurement() +meas_cv.redefine_cycle(start_potential=0.4, redox=False) +for i in range(4): + meas_cv[i].plot() diff --git a/development_scripts/reader_testers/test_ixdat_csv_reader.py b/development_scripts/reader_testers/test_ixdat_csv_reader.py new file mode 100644 index 00000000..c664854b --- /dev/null +++ b/development_scripts/reader_testers/test_ixdat_csv_reader.py @@ -0,0 +1,31 @@ +"""For use in development of the ixdat .csv reader (and exporter).""" + +from ixdat import Measurement + + +if False: # test the version that's online on the tutorials page + meas = Measurement.read_url( + "https://raw.githubusercontent.com/ixdat/tutorials/" + + "main/loading_appending_and_saving/co_strip.csv", + reader="ixdat", + ) + meas.plot_measurement() + +else: + meas = Measurement.read( + "../../test_data/biologic/Pt_poly_cv_CUT.mpt", reader="biologic" + ) + meas.calibrate_RE(0.715) + + meas.correct_ohmic_drop(R_Ohm=100) + + meas.normalize_current(0.196) + + cv = meas.as_cv() + + cv.export("test.csv") + + meas_loaded = Measurement.read("test.csv", reader="ixdat") + + meas_loaded.plot() + diff --git a/development_scripts/reader_testers/test_msrh_sec_decay_reader.py b/development_scripts/reader_testers/test_msrh_sec_decay_reader.py new file mode 100644 index 00000000..10203545 --- /dev/null +++ b/development_scripts/reader_testers/test_msrh_sec_decay_reader.py @@ -0,0 +1,64 @@ +"""For use in development of the MSRH SEC reader. Requires access to sample data. +MSRH = molecular science research hub, at Imperial College London. +""" + +from pathlib import Path +from ixdat import Measurement + +data_dir = Path.home() / "Dropbox/ixdat_resources/test_data/sec" + +sec_meas = Measurement.read( + # data_dir / "decay/PDtest-1.35-1OSP-SP.csv", + data_dir / "decay/PDtest-1.33-1OSP-SP.csv", + path_to_ref_spec_file=data_dir / "WL.csv", + # path_to_t_V_file=data_dir / "decay/PDtest-1.35-1OSP-E-t.csv", + # path_to_t_J_file=data_dir / "decay/PDtest-1.35-1OSP-J-t.csv", + path_to_t_V_file=data_dir / "decay/PDtest-1.33-1OSP-E-t.csv", + path_to_t_J_file=data_dir / "decay/PDtest-1.33-1OSP-J-t.csv", + tstamp=1, + reader="msrh_sec_decay", +) +# Suggestion: command-line switching for development scripts. +# https://github.com/ixdat/ixdat/pull/30/files#r810014299 + +sec_meas.calibrate_RE(RE_vs_RHE=0.26) + +sec_meas.set_reference_spectrum(t_ref=5) + +axes = sec_meas.plot_measurement( + # V_ref=0.66, # can't do a V_ref for this as can't interpolate on potential.. + # So OD will be calculated using the reference spectrum in WL.csv + # cmap_name="jet", + cmap_name="inferno", + make_colorbar=False, +) +# axes[0].get_figure().savefig("decay_vs_t.png") + +axes = sec_meas.plot_wavelengths(wavelengths=["w500", "w600", "w700", "w800"]) + +ax_w = sec_meas.plot_waterfall() + +# exit() +# ax_w.get_figure().savefig("decay_waterfall.png") + +ref_spec = sec_meas.reference_spectrum +resting_spec = sec_meas.get_spectrum(t=5) # 5 seconds in, i.e. before the pulse +working_spec = sec_meas.get_spectrum(t=20) # during the pulse. +decaying_spec = sec_meas.get_spectrum(t=40) # after the pulse. + +ax = resting_spec.plot(color="k", label="resting") +working_spec.plot(color="r", label="working", ax=ax) +decaying_spec.plot(color="b", label="decaying", ax=ax) +ref_spec.plot(color="0.5", linestyle="--", label="reference", ax=ax) +ax.legend() +# ax.get_figure().savefig("select raw spectra.png") + +resting_OD_spec = sec_meas.get_dOD_spectrum(t=5) # 5 seconds in, i.e. before the pulse +working_OD_spec = sec_meas.get_dOD_spectrum(t=20) # during the pulse. +decaying_OD_spec = sec_meas.get_dOD_spectrum(t=40) # after the pulse. + +ax_OD = resting_OD_spec.plot(color="k", label="resting") +working_OD_spec.plot(color="r", label="working", ax=ax_OD) +decaying_OD_spec.plot(color="b", label="decaying", ax=ax_OD) +ax_OD.legend() +# ax_OD.get_figure().savefig("select OD spectra.png") diff --git a/development_scripts/reader_testers/test_msrh_sec_reader.py b/development_scripts/reader_testers/test_msrh_sec_reader.py new file mode 100644 index 00000000..60b5c57c --- /dev/null +++ b/development_scripts/reader_testers/test_msrh_sec_reader.py @@ -0,0 +1,64 @@ +"""For use in development of the MSRH SEC reader. Requires access to sample data. +MSRH = molecular science research hub, at Imperial College London. +""" + +from pathlib import Path +from ixdat import Measurement + +from matplotlib import pyplot as plt + +plt.close("all") + +data_dir = Path.home() / "Dropbox/ixdat_resources/test_data/sec" +sec_meas = Measurement.read( + data_dir / "test-7SEC.csv", + path_to_ref_spec_file=data_dir / "WL.csv", + path_to_V_J_file=data_dir / "test-7_JV.csv", + scan_rate=1, + tstamp=1, + reader="msrh_sec", +) + +sec_meas.calibrate_RE(RE_vs_RHE=0.26) # provide RE potential in [V] vs RHE +sec_meas.normalize_current(A_el=1) # provide electrode area in [cm^2] + +sec_meas.set_reference_spectrum(V_ref=0.66) +ax = sec_meas.get_dOD_spectrum(V=1.0, V_ref=0.66).plot(color="b", label="species 1") +sec_meas.get_dOD_spectrum(V=1.4, V_ref=1.0).plot(color="g", label="species 2", ax=ax) +sec_meas.get_dOD_spectrum(V=1.7, V_ref=1.4).plot(color="r", label="species 3", ax=ax) +ax.legend() + + +if True: # test export and reload + # Suggestion: command-line switching for development scripts. + # https://github.com/ixdat/ixdat/pull/30/files#r810014299 + export_name = "exported_sec.csv" + sec_meas.export(export_name) + sec_reloaded = Measurement.read(export_name, reader="ixdat") + sec_reloaded.set_reference_spectrum(V_ref=0.66) + sec_reloaded.plot_vs_potential(cmap_name="jet") + + +axes = sec_meas.plot_measurement( + V_ref=0.4, + cmap_name="jet", + make_colorbar=True, +) + +ax = sec_meas.plot_waterfall( + V_ref=0.4, + cmap_name="jet", + make_colorbar=True, +) +ax.get_figure().savefig("sec_waterfall.png") + +axes2 = sec_meas.plot_vs_potential(V_ref=0.66, cmap_name="jet", make_colorbar=False) +axes2 = sec_meas.plot_vs_potential( + V_ref=0.66, vspan=[1.4, 2], cmap_name="jet", make_colorbar=False +) + +ax = sec_meas.get_dOD_spectrum(V_ref=0.66, V=1.0).plot(color="b", label="species 1") +sec_meas.get_dOD_spectrum(V_ref=1.0, V=1.45).plot(color="g", ax=ax, label="species 2") +sec_meas.get_dOD_spectrum(V_ref=1.45, V=1.75).plot(color="r", ax=ax, label="species 3") + +ax.legend() diff --git a/development_scripts/reader_testers/test_pfeiffer_reader.py b/development_scripts/reader_testers/test_pfeiffer_reader.py new file mode 100644 index 00000000..76554e67 --- /dev/null +++ b/development_scripts/reader_testers/test_pfeiffer_reader.py @@ -0,0 +1,16 @@ +"""For use in development of the pfeiffer reader. Requires access to sample data.""" + +from pathlib import Path +from matplotlib import pyplot as plt + +from ixdat import Measurement + +path_to_file = ( + Path.home() + / ("Dropbox/ixdat_resources/test_data/pfeiffer") + / "MID_air, Position 1, RGA PrismaPro 200 44526001, 003-02-2021 17'41'12 - Bin.dat" +) + +meas = Measurement.read(path_to_file, reader="pfeiffer") + +meas.plot_measurement() diff --git a/development_scripts/reader_testers/test_zilien_reader.py b/development_scripts/reader_testers/test_zilien_reader.py new file mode 100644 index 00000000..dfc2aae5 --- /dev/null +++ b/development_scripts/reader_testers/test_zilien_reader.py @@ -0,0 +1,44 @@ +"""For use in development of the zilien reader(s). Requires access to sample data.""" + +from pathlib import Path + +from ixdat import Measurement +from ixdat.techniques import MSMeasurement, ECMeasurement + +data_dir = Path(r"~\Dropbox\ixdat_resources\test_data\zilien_with_ec").expanduser() + +path_to_file = data_dir / "2021-02-01 17_44_12.tsv" + +# This imports it with the EC data +ecms = Measurement.read(path_to_file, reader="zilien") +ecms.calibrate_RE(0) +ecms.plot_measurement() + +# This imports it as just an MS measurement. +ms = MSMeasurement.read(path_to_file, reader="zilien") +ms.plot_measurement() # nice. one panel, no MS :) + +# This adds in the EC data from Biologic: + +ec = Measurement.read_set( + data_dir / "2021-02-01 17_44_12", reader="biologic", suffix=".mpt" +) +ecms_2 = ec + ms +ecms_2.plot_measurement() + +# This imports it as just an EC measurement +ec_2 = ECMeasurement.read(path_to_file, reader="zilien") +ec_2.plot() + + +# This plots just the EC data from the first EC-MS measurement: + +ecms.ec_plotter.plot_measurement() +ecms.ec_plotter.plot_vs_potential() +ecms.ms_plotter.plot_measurement() + +# This plots it as a cyclic voltammagram + +ecms_cv = ecms.as_cv() +ecms_cv.ec_plotter.plot_vs_potential() +ecms_cv.plot() diff --git a/development_scripts/reader_testers/test_zilien_spectrum_reader.py b/development_scripts/reader_testers/test_zilien_spectrum_reader.py new file mode 100644 index 00000000..ff8e4094 --- /dev/null +++ b/development_scripts/reader_testers/test_zilien_spectrum_reader.py @@ -0,0 +1,21 @@ +"""For use in development of zilien spectrum reader. Requires access to sample data.""" + +from pathlib import Path +from ixdat import Spectrum + +path_to_file = ( + Path(r"C:\Users\scott\Dropbox\ixdat_resources\test_data\zilien_spectra") + / "mass scan started at measurement time 0001700.tsv" +) + +spec = Spectrum.read( + path_to_file, + reader="zilien_spec", +) + +spec.plot(color="k") + +s_id = spec.save() + +loaded = Spectrum.get(s_id) +loaded.plot(color="g") diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 00000000..638c60bc --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,7 @@ +# needed as per https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html + +sphinx +sphinx_rtd_theme +readthedocs-sphinx-search +ixdat +docutils<0.18 # see https://github.com/sphinx-doc/sphinx/issues/9727 \ No newline at end of file diff --git a/docs/source/conf.py b/docs/source/conf.py index ca99ed65..b44637a3 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -45,8 +45,10 @@ "sphinx.ext.coverage", "sphinx.ext.napoleon", "sphinx_rtd_theme", + "sphinx.ext.viewcode", ] +source_suffix = {".rst": "restructuredtext", ".txt": "restructuredtext"} # Add any paths that contain templates here, relative to this directory. # templates_path = ['_templates'] @@ -62,6 +64,7 @@ # a list of builtin themes. # html_theme = "sphinx_rtd_theme" +html_logo = "figures/logo.svg" # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, diff --git a/docs/source/data-series.rst b/docs/source/data-series.rst new file mode 100644 index 00000000..c21eecb5 --- /dev/null +++ b/docs/source/data-series.rst @@ -0,0 +1,50 @@ +.. _`data_series`: + +The data series structure +========================= + +A **data series** is an object of the ``DataSeries`` class in the ``data_series`` module +or an inheriting class. It is basically a wrapper around a numpy array, which is its +``data`` attribute, together with a name and a unit. Most data series also contain some +additional metadata and/or references to other data series. The most important function +of these is to keep track of everything in time, as described below. + +data series and time +-------------------- + +(Copied from text in design workshop 2, in December 2020): + +Time is special! +In some deeper way, time is just another dimension… +but for hyphenated laboratory measurements, as well as multi-technique experimental projects in general (so long as samples and equipment are moving slow compared to the speed of light), time is special because it is the one measurable quantity that is always shared between all detectors. + +Absolute time (epoch timestamp) exists in two places: + +- ``Measurement.tstamp``: This timestamp is a bit decorative – it tells the measurement’s plotter and data selection methods what to use as t=0 +- ``TSeries.tstamp``: This timestamp is truth. It defines the t=0 for the primary time data of any measurement. + +Data carriers: + +- ``DataSeries``: The ``TimeSeries`` is a special case of the ``DataSeries``. All data + carried by ixdat will be as a numpy array in a Series. All Series share a primary key + (id in table series in the db diagram on the left), and in addition to the data have a ``name`` (think “column header”) and a ``unit``. Series is a table in the ``ixdat`` database structure, with helper tables for special cases. +- ``TimeSeries``: The only additional row for ``TimeSeries`` (table tseries) is ``tstamp``, as described above. +- ``Field``: Some series consist of values spanning a space defined by other series. Such a series is called a ``Field``, and defined by a list of references to the series which define their space. In the database, this is represented in a field_axis table, of which n rows (with axis_number from 0 to n-1) will be dedicated to representing each n-D ``Field``. +- ``ValueSeries``: Finally, a very common series type is a scalar value over time. This is called a ``ValueSeries``, and must have a corresponding ``TimeSeries``. A ``ValueSeries`` is actually a special case of a ``Field``, spanning a 1-d space, and so doesn’t need a new table in the db. + +**Immutability!** All of the data carriers above will be immutable! This means that, +even though truth is preserved by adding ``dt`` to a ``tsteries.tstamp`` and subtracting +``dt`` from ``tseries.data``, we will never do this! This is a cheap calculation that +``ixdat`` can do on demand. Same with appending corresponding +series from consecutive measurements. Performing these operations on every series in +a measurement set is referred to as building a combined measurement, and is only done +when explicitly asked for (f.ex. to export or save the combined measurement). Building +makes new Series rather than mutating existing ones. A possible exception to immutability +may be appending data to use ``ixdat`` on an ongoing measurement. + + +The ``data_series`` module +-------------------------- + +.. automodule:: ixdat.data_series + :members: \ No newline at end of file diff --git a/docs/source/dataset.rst b/docs/source/dataset.rst deleted file mode 100644 index cade89ef..00000000 --- a/docs/source/dataset.rst +++ /dev/null @@ -1,13 +0,0 @@ -The dataset structure -===================== - - -The data_series module -^^^^^^^^^^^^^^^^^^^^^^ -.. automodule:: ixdat.data_series - :members: - -The dataset module -^^^^^^^^^^^^^^^^^^ -.. automodule:: ixdat.dataset - :members: \ No newline at end of file diff --git a/docs/source/developing.rst b/docs/source/developing.rst new file mode 100644 index 00000000..61ba2bde --- /dev/null +++ b/docs/source/developing.rst @@ -0,0 +1,98 @@ +.. _developing: + +================ +Developing ixdat +================ + +If there's an experimental technique or analysis procedure or database that ixdat +should support and doesn't, it might be because **you** haven't coded it yet. + +Here are a few resources to help you get started developing ixdat. + +Git and Github +************** + +The source code for ixdat (and this documentation) lives at: +https://github.com/ixdat/ixdat + +Note, we are currently compiling from the +`[user_ready] `_ +branch, not the master branch. + +To develop ixdat, you will need to use git and github. This means + +- `install git `_. Git bash is strongly recommended for Windows +- Create an account at https://github.com +- Clone the repository. Navigate in the terminal which you will use for git (e.g. git bash) to + the location that you want the repository (e.g. ``/c/Users/your_user_name/git/``), and type:: + + git clone https://github.com/ixdat/ixdat + +- Install ixdat from the repository to use the ixdat code you're working on. You can do this in a virtual environment, + but it is simpler to just install it dynamically. In your terminal or Anaconda Prompt, navigate + to the folder which contains the ixdat project folder (e.g. ``/c/Users/your_user_name/git/``) + and type:: + + pip uninstall ixdat + pip install -e ixdat + + If you want to go back to using the released version later, just re-install it from PyPi:: + + pip install --upgrade ixdat + +- Check out the branch you want to work from. Note, this is also how to *use* a feature that is under development.:: + + git checkout branch_to_work_from + +- Make a branch using:: + + git branch my_branch_name + git checkout my_branch_name + + +- Develop your feature, committing regularly and pushing regularly to your github account. + +- When it's ready (i.e., works like you want, and passes linting and testing), make a pull request! + A pull request (PR) is an awesome open review process that gives others a chance to comment and suggest + improvements in your code before it's merged into the main ixdat package. You can see + existing pull requests at https://github.com/ixdat/ixdat/pulls + +style +***** + +We do our best to follow the conventions at + +- code style guide: https://www.python.org/dev/peps/pep-0008/ +- docstring style guide: https://www.python.org/dev/peps/pep-0257/ + +Exceptions include + +- It's fine to capitalize names for a quantity that is conventionally capitalized in equations (`T` for temperature, for example). + +The tools **black** and **flake8** help us keep the style up to standards. + +tools +***** + +We use tools to make sure that our code is both functional and pretty. This makes it +easier to work together. See instructions for the tools in `tools.rst `_ + +Note on ongoing developments +**************************** + +If you develop now, pretend that **[user_ready]** is the master branch. We are working +though some issues in the guts of ixdat to make sure it'll be able to scale to large +projects with SQL backends. Hopefully this won't change the API much, so that the updated +guts won't require changes in your code. Here is what we're going through: + +.. figure:: figures/21F03_DWS4_frontend_and_planning.png + :width: 500 + + Git plan as of 21F03 (June 3, 2021) + + +Write to us +*********** +We'd love to know what you're working on and help with any issues developing, even +before you make a PR. +One great way to do so is through `github discussions `_ \ No newline at end of file diff --git a/docs/source/exporter_docs/index.rst b/docs/source/exporter_docs/index.rst new file mode 100644 index 00000000..4b39d151 --- /dev/null +++ b/docs/source/exporter_docs/index.rst @@ -0,0 +1,46 @@ +.. _exporters: + +Exporters: getting data out of ``ixdat`` +======================================== + + +Here is an example of an ixdat csv file (exported from a ``CyclicVoltammagram`` +measurement using its default ``ECExporter``.): +https://github.com/ixdat/tutorials/blob/main/loading_appending_and_saving/co_strip.csv + +The ``csv_exporter`` module +........................... +.. _csv-exporter: + +.. automodule:: ixdat.exporters.csv_exporter + :members: + +The ``ec_exporter`` module +.......................... +.. _`ec-exporter`: + +.. automodule:: ixdat.exporters.ec_exporter + :members: + +The ``ecms_exporter`` module +............................ + +.. automodule:: ixdat.exporters.ecms_exporter + :members: + +The ``spectrum_exporter`` module +................................ +.. _spectrum-exporter: + +.. automodule:: ixdat.exporters.spectrum_exporter + :members: + +The ``sec_exporter`` module +............................ +.. _sec-exporter: + +.. automodule:: ixdat.exporters.sec_exporter + :members: + + + diff --git a/docs/source/extended_concept.rst b/docs/source/extended-concept.rst similarity index 95% rename from docs/source/extended_concept.rst rename to docs/source/extended-concept.rst index c7bab708..7cfd1aa8 100644 --- a/docs/source/extended_concept.rst +++ b/docs/source/extended-concept.rst @@ -1,11 +1,17 @@ +.. _concept: ================ Extended concept ================ -*By Soren B. Scott, 20H03 (August 3, 2020)* +.. figure:: figures/ixdat_profile_pic.svg + :width: 200 + + The power of combining techniques (fig made with ``EC_Xray``, an ``ixdat`` precursor) + +*By Soren B. Scott, 20H03 (August 3, 2020)* -My idea is that ``ixdat`` will have two "faces": +My idea is that ``ixdat`` will have two "faces": 1. The first face is towards the raw data and the experimenter. Here, by "combining techniques", we mean making one dataset out of separately saved data files. Electrochemistry - Mass Spectrometry (EC-MS) is a perfect example, where, typically one has data files for the potentiostat and the mass spectrometer and the data tool has to line them up in time and make one dataset for the methods to be analyzed simultaneously (in contrast to some proprietary softwares like Spectro Inlets' Zilien which combine the datasets during acquisition, but inevitably make tradeoffs in the process). This will be the core of what ixdat does. On top of that, it will have a lot of auxiliary functionality for low-level analysis of typical combined datasets - for example automated calibration of the MS data based on electrochemistry (like using the electrode current during steady hydrogen evolution to calibrate the |H2| signal).  diff --git a/docs/source/figures/21F03_DWS4_frontend_and_planning.png b/docs/source/figures/21F03_DWS4_frontend_and_planning.png new file mode 100644 index 00000000..f511ae1d Binary files /dev/null and b/docs/source/figures/21F03_DWS4_frontend_and_planning.png differ diff --git a/docs/source/figures/cv_diff.svg b/docs/source/figures/cv_diff.svg new file mode 100644 index 00000000..cdd1ddda --- /dev/null +++ b/docs/source/figures/cv_diff.svg @@ -0,0 +1,2689 @@ + + + + + + + + + 2021-03-12T10:09:46.321242 + image/svg+xml + + + Matplotlib v3.3.2, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/source/figures/ec_ms_annotated.svg b/docs/source/figures/ec_ms_annotated.svg new file mode 100644 index 00000000..a4e2b105 --- /dev/null +++ b/docs/source/figures/ec_ms_annotated.svg @@ -0,0 +1,2047 @@ + + + + + + + + 2021-03-13T22:48:52.540275 + image/svg+xml + + + Matplotlib v3.3.2, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + m/z=2 (H2) + m/z=4 +(He) + m/z=28 (CO) + m/z=44 +(CO2) + U + J + + + + + + + diff --git a/docs/source/figures/ec_subplots.svg b/docs/source/figures/ec_subplots.svg new file mode 100644 index 00000000..23588824 --- /dev/null +++ b/docs/source/figures/ec_subplots.svg @@ -0,0 +1,2228 @@ + + + + + + + + + 2021-03-12T10:24:24.895500 + image/svg+xml + + + Matplotlib v3.3.2, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/source/figures/inheritance.svg b/docs/source/figures/inheritance.svg new file mode 100644 index 00000000..04b763b8 --- /dev/null +++ b/docs/source/figures/inheritance.svg @@ -0,0 +1 @@ +Measurement base-Plugging with Backend, etc.-Relationships with DataSeries-Appending/hyphenating with the `+` operatorECMeasurementEverything in Measurementbase, AND:-Current and potential-Calibrating RE, normalizing, correcting ohmic drop-Selecting CV cycles-Integrating over potential rangeMSMeasurementEverything in Measurementbase, AND:-Quantifying MS signals-Storing MS settings-Relationship with mass spectraECMSMeasurementEverything in ECMeasurementAND MSMeasurement, AND:-Internal calibration-Mass transport modellingDataSeries-Raw data -Unit-Keeping track of timeEvery DataSeriesknows its timestamp. Sothey line up automatically. \ No newline at end of file diff --git a/docs/source/figures/ixdat_example_figures.png b/docs/source/figures/ixdat_example_figures.png new file mode 100644 index 00000000..eab8cf9b Binary files /dev/null and b/docs/source/figures/ixdat_example_figures.png differ diff --git a/docs/source/figures/ixdat_flow.png b/docs/source/figures/ixdat_flow.png new file mode 100644 index 00000000..5c3f2d69 Binary files /dev/null and b/docs/source/figures/ixdat_flow.png differ diff --git a/docs/source/figures/ixdat_flow.svg b/docs/source/figures/ixdat_flow.svg new file mode 100644 index 00000000..d50d67ce --- /dev/null +++ b/docs/source/figures/ixdat_flow.svg @@ -0,0 +1,1137 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + relational database + + + + + data sharing & transparency + + + + + + } + } + ixdat + + + + A A + + B B + x act. /[s-1] + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diss. /[s-1] + plotting tools + + + analysistools @1.6 VRHE + EC-MS SEC ICP-MS FE-EPR XRD XPSc LEIS SEMc + + + + + + + diff --git a/docs/source/figures/ixdat_profile_pic.svg b/docs/source/figures/ixdat_profile_pic.svg new file mode 100644 index 00000000..a50d3d9a --- /dev/null +++ b/docs/source/figures/ixdat_profile_pic.svg @@ -0,0 +1,2170 @@ + + + + + + + + + 2021-03-12T11:19:30.748666 + image/svg+xml + + + Matplotlib v3.3.2, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/source/figures/logo.svg b/docs/source/figures/logo.svg new file mode 100644 index 00000000..076e9b9f --- /dev/null +++ b/docs/source/figures/logo.svg @@ -0,0 +1,172 @@ + + + + + + + + + + image/svg+xml + + + + + + + ixdat + + + + + + + + + + + + + + + + + + diff --git a/docs/source/figures/pluggable.svg b/docs/source/figures/pluggable.svg new file mode 100644 index 00000000..8a56e34e --- /dev/null +++ b/docs/source/figures/pluggable.svg @@ -0,0 +1 @@ +Measurement class-Interface to the data. (e.g.ECMeasurementgives easy access to potential and current)-Methods for processing.(e.g.calibrating electrodes)Backend-Interface to the database: Controls what save() does.Could be:-folder/file or SQL-Local or remote-open or privateReader-Interface to the files made by the data acquisition software“biologic”, “gamry”, and “CH Instruments” could all be readers for ECMeasurementPlotter-Matplotlib or external softwareInteractive or static figure, or video-CustomizeableCo-plot or separate axes? What to include by default? EtcExporter-Bringing your data elsewhere.Could be open or private, local or remote, SQL or a folder/file. \ No newline at end of file diff --git a/docs/source/figures/sec_class.svg b/docs/source/figures/sec_class.svg new file mode 100644 index 00000000..89dab2db --- /dev/null +++ b/docs/source/figures/sec_class.svg @@ -0,0 +1,554 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + Measurement base-Plugging withBackend, etc.-Relationships withDataSeries-Appending/hyphenating withthe `+` operator + + ECMeasurementEverything inMeasurementbase, AND:-Current and potential-Calibrating RE, normalizing,correcting ohmic drop-Selecting CV cycles-Integrating over potential range + + SpectrumSeriesEverything inSpectrumbase, AND:-Evolution over time-Heat plots-Waterfall plots + + SpectroECMeasurementEverything inECMeasurement, AND:-SpectrumSeriesfor spec data-dODcalculation + + + + + DataSeries-Raw data-Unit-Keeping track of time + + + Spectrum base-Relationship with Backend,etc-xand yDataSeries-Plotting x vs y, selecting ranges in x-Simple peak fitting (gauss) + + + SpectroelectrochemistryRelations and inheritance + diff --git a/docs/source/index.rst b/docs/source/index.rst index 841f93c8..86bcc4d7 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -1,18 +1,63 @@ +.. figure:: figures/logo.svg + :width: 500 -Documentation for **ixdat** -=========================== + +Documentation for ``ixdat`` +########################### The in-situ experimental data tool ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -We are just getting started with this project, please have patience. + +With ``ixdat``, you can import, combine, and export complex experimental datasets +as simply as:: + + ec = Measurement.read_set("awesome_EC_data", reader="biologic") + ec.plot_measurement() + + ms = Measurement.read("2021-03-30 16_59_35 MS data.tsv", reader="zilien") + ms.plot_measurement() + + ecms = ec + ms + ecms.plot_measurement() + + ecms.export("my_combined_data.csv") + +Output: + +.. figure:: figures/ixdat_example_figures.png + :width: 700 + + In-situ experimental data made easy + +Or rather than exporting, you can take advantage of ``ixdat``'s powerful analysis +tools and database backends to be a one-stop tool from messy raw data to public +repository accompanying your breakthrough publication and advancing our field. + + +The documentation +----------------- + +Welcome to the ``ixdat`` documentation. We hope that you can find what you are looking for here! + +The :ref:`Introduction` has a list of the techniques and file types supported so far. + +This documentation, like ``ixdat`` itself, is a work in progress and we appreciate any +feedback or requests `here `_. .. toctree:: - :maxdepth: 2 + :maxdepth: 1 introduction - extended_concept - dataset + tutorials + extended-concept + developing + measurement + technique_docs/index + data-series + reader_docs/index + exporter_docs/index + plotter_docs/index license Indices and tables diff --git a/docs/source/introduction.rst b/docs/source/introduction.rst index 562adefb..514abdd9 100644 --- a/docs/source/introduction.rst +++ b/docs/source/introduction.rst @@ -1,16 +1,80 @@ +.. _Introduction: + ============ Introduction ============ -``ixdat`` will provide a powerful **object-oriented** interface to experimental data, especially in-situ experimental data for which it is of interest to combine data obtained simultaneously from multiple techniques. +``ixdat`` provides a powerful **object-oriented** interface to experimental data, +especially in-situ experimental data for which it is of interest to combine data obtained +simultaneously from multiple techniques. +In addition to a **pluggable** ``reader`` interface for importing your data format, it +includes pluggable exporters and plotters, as well as a database interface. + +For the philosophy behind ixdat, see :ref:`concept`. -``ixdat`` will replace the existing electrochemistry - mass spectrometry data tool, `EC_MS `_, and will thus become a powerful stand-alone tool for analysis and visualization of data acquired by the equipment of `Spectro Inlets `_ and other EC-MS solutions. -It will also replace the existing electrochemistry - synchrotron GIXRD data tool, `EC_Xray `_ when needed. -Over time, it will acquire functionality for more and more techniques. +``ixdat`` is free and open source software and we welcome input and new collaborators. +See :ref:`developing`. -In addition to a **pluggable** parser interface for importing your data format, it will include pluggable exporters and plotters, as well as a database interface. +Supported techniques +-------------------- -We will update this documentation as features are added. +.. list-table:: Techniques and Readers + :widths: 20 15 50 + :header-rows: 1 -``ixdat`` is free and open source software and we welcome input and new collaborators. -The source is here: https://github.com/ixdat \ No newline at end of file + * - Measurement technique + - Status + - Readers + * - :ref:`electrochemistry` + - Released + - - biologic: .mpt files from Biologic's EC-Lab software + - autolab: ascii files from AutoLab's NOVA software + - ivium: .txt files from Ivium's IviumSoft software + * - :ref:`mass-spec` + - Released + - - pfeiffer: .dat files from Pfeiffer Vacuum's PVMassSpec software + - cinfdata: text export from DTU Physics' cinfdata system + - zilien: .tsv files from Spectro Inlets' Zilien software + * - :ref:`ec-ms` + - Released + - - zilien: .tsv files from Spectro Inlets' Zilien software + - EC_MS: .pkl files from the legacy EC_MS python package + * - :ref:`sec` + - Released + - - msrh_sec: .csv file sets from Imperial College London's SEC system + * - X-ray photoelectron spectroscopy (XPS) + - Future + - + * - X-ray diffraction (XRD) + - Future + - + * - Low-Energy Ion Scattering (LEIS) + - Future + - + + +Installation +------------ + +To use ``ixdat``, you need to have python installed. We recommend +`Anaconda python `_. + +To install ``ixdat``, just type in your terminal or Anaconda prompt:: + + $ pip install ixdat + +And hit enter. + +``ixdat`` is under development, and to make use of the newest features, +you may need to upgrade to the latest version. This is also easy. Just type:: + + $ pip install --upgrade ixdat + + + +ixdat workflow +-------------- +.. figure:: figures/ixdat_flow.png + :width: 500 + + The power of combining techniques \ No newline at end of file diff --git a/docs/source/measurement.rst b/docs/source/measurement.rst new file mode 100644 index 00000000..3d46f546 --- /dev/null +++ b/docs/source/measurement.rst @@ -0,0 +1,91 @@ +.. _measurement: + +The measurement structure +========================= + +The **measurement** (``meas``) is the central object in the pluggable structure of ixdat, and the +main interface for user interaction. A measurement is an object of the generalized class +``Measurement``, defined in the ``measurements`` module, or an inheriting +***TechniqueMeasurement*** class defined in a module of the ``techniques`` folder +(see :ref:`techniques`). + +The general pluggable structure is defined by ``Measurement``, connecting every +measurement to a *reader* for importing from text, a *backend* for saving and loading in +``ixdat``, a *plotter* for visualization, and an *exporter* for saving outside of ``ixdat``. +Each TechniqueMeasurement class will likely have its own default reader, plotter, and +exporter, while an ``ixdat`` session will typically work with one backend handled by the +``db`` model. + +.. image:: figures/pluggable.svg + :width: 400 + :alt: Design: pluggability + +Classes for measurement techniques +---------------------------------- + +Inheritance in TechniqueMeasurement classes makes it so that related techniques +can share functionality. Here is an illustration of the role of inheritence, using +EC, MS, and EC-MS as an example: + +.. image:: figures/inheritance.svg + :width: 400 + :alt: Design: inheritance + +A full list of TechniqueMeasurements is in :ref:`techniques`. + +Initiating a measurement +------------------------ + +A typical workflow is to start by reading a file. For convenience, most readers are +accessible directly from ``Measurement``. So, for example, to read a .mpt file exported +by Biologic's EC-Lab, one can type: + +>>> from ixdat import Measurement +>>> ec_meas = Measurement.read("my_file.mpt", reader="biologic") + +See :ref:`readers ` for a description of the available readers. + +The biologic reader (``ixdat.readers.biologic.BiologicMPTReader``) ensures that the +object returned, ``ec_meas``, is of type ``ECMeasurement``. + +Another workflow starts with loading a measurement from the active ``ixdat`` backend. +This can also be done straight from ``Measurement``, as follows: + +>>> from ixdat import Measurement +>>> ec_meas = Measurement.get(3) + +Where the row with id=3 of the measurements table represents an electrochemistry +measurement. Here the column "technique" in the measurements table specifies which +TechniqueMeasurement class is returned. For row three of the measurements +table, the entry "technique" is "EC", ensuring ``ec_meas`` is an object of type +``ECMeasurement``. + +What's in a measurement +----------------------- +A measurement is basically a wrapper around a collection of ``data_series`` (see +:ref:`data_series`). + +There are several ways of interracting with a measurement's ``data_series``: + +- ``meas.grab()`` is the canonical way of getting numerical data out of a + measurement. Given the name of a ``ValueSeries``, it returns two numpy arrays, ``t`` and ``v`` + where ``t`` is the time (wrt ``meas.tstamp``) and ``v`` is the value as a function of that + time vector. ``grab`` takes a series name as its first argument and can also take a ``tspan`` + argument in which case it cuts the vectors to return data for the specific timespan of + the measurement. +- Indexing a measurement with the name of a data series returns that data series, with + any time values tstamp'd at ``meas.tstamp`` +- Most TechniqueMeasureements provide attribute-style access to essential DataSeries and + data. For example, ``ECMeasurement`` has properties for ``potential`` and ``current`` series, + as well as ``t``, ``v``, and ``j`` for data. +- The names of the series are available in ``meas.series_names``. +- The raw series are available in ``meas.series_list``. + + +The ``measurements`` module +--------------------------- +Here is the full in-line documentation of the ``measurements`` module containing the +``Measurement`` class. + +.. automodule:: ixdat.measurements + :members: diff --git a/docs/source/plotter_docs/index.rst b/docs/source/plotter_docs/index.rst new file mode 100644 index 00000000..4f3f10c1 --- /dev/null +++ b/docs/source/plotter_docs/index.rst @@ -0,0 +1,75 @@ +.. _plotters: + +Plotters: visualizing ``ixdat`` data +==================================== +Source: https://github.com/ixdat/ixdat/tree/user_ready/src/ixdat/plotters + +Basic +----- + +The ``base_mpl_plotter`` module +............................... + +.. automodule:: ixdat.plotters.base_mpl_plotter + :members: + +The ``value_plotter`` module +............................... + +.. automodule:: ixdat.plotters.value_plotter + :members: + +Electrochemistry +---------------- + +.. _`ec-plotter`: + +The ``ec_plotter`` module +............................... + +.. automodule:: ixdat.plotters.ec_plotter + :members: + +Mass Spectrometry +----------------- + +.. _`ms-plotter`: + +The ``ms_plotter`` module +............................... + +.. automodule:: ixdat.plotters.ms_plotter + :members: + +EC-MS +----- + +The ``ecms_plotter`` module +............................... + +.. _ecms-plotter: + +.. automodule:: ixdat.plotters.ecms_plotter + :members: + +Spectra +------- + +The ``spectrum_plotter`` module +............................... + +.. _spectrum-plotter: + +.. automodule:: ixdat.plotters.spectrum_plotter + :members: + +Spectroelectrochemistry +----------------------- + +.. _sec-plotter: + +The ``sec_plotter`` module +.......................... + +.. automodule:: ixdat.plotters.sec_plotter + :members: \ No newline at end of file diff --git a/docs/source/reader_docs/index.rst b/docs/source/reader_docs/index.rst new file mode 100644 index 00000000..1c8b3656 --- /dev/null +++ b/docs/source/reader_docs/index.rst @@ -0,0 +1,103 @@ +.. _readers: + +Readers: getting data into ``ixdat`` +==================================== +Source: https://github.com/ixdat/ixdat/tree/user_ready/src/ixdat/readers + +A full list of the readers thus accessible and their names can be viewed by typing: + +>>> from ixdat.readers import READER_CLASSES +>>> READER_CLASSES + +Reading .csv files exported by ixdat: The ``IxdatCSVReader`` +------------------------------------------------------------ + +``ixdat`` can export measurement data in a .csv format with necessary information in the +header. See :ref:`exporters`. It can naturally read the data that it exports itself. Exporting and reading, +however, may result in loss of raw data (unlike ``save()``). + +The ``ixdat_csv`` module +........................ + +.. automodule:: ixdat.readers.ixdat_csv + :members: + +Importing from other experimental data platforms +------------------------------------------------ + +**cinfdata** is a web-based database system for experimental data, developed and used at DTU SurfCat +(formerly CINF) in concert with The ``PyExpLabSys`` suite of experimental data acquisition tools. +Both are available at https://github.com/CINF. + +As of yet, ``ixdat`` only has a text-file reader for data exported from **cinfdata**, but +in the future it will also have a reader which downloads from the website given e.g. a +setup and date. + +The ``cinfdata`` module +....................... + +.. automodule:: ixdat.readers.cinfdata + :members: + +Electrochemistry and sub-techniques +------------------------------------ +These are readers which by default return an ``ECMeasurement``. +(See :ref:`electrochemistry`) + +The ``biologic`` module +........................ + +.. automodule:: ixdat.readers.biologic + :members: + +The ``autolab`` module +...................... + +.. automodule:: ixdat.readers.autolab + :members: + +The ``ivium`` module +.................... + +.. automodule:: ixdat.readers.ivium + :members: + +Mass Spectrometry and sub-techniques +------------------------------------ +These are readers which by default return an ``MSMeasurement``. +(See :ref:`mass-spec`) + +The ``pfeiffer`` module +........................ + +.. automodule:: ixdat.readers.pfeiffer + :members: + +EC-MS and sub-techniques +------------------------ +These are readers which by default return an ``ECMSMeasurement``. +(See :ref:`ec-ms`) + +The ``zilien`` module +..................... + +.. automodule:: ixdat.readers.zilien + :members: + +The ``ec_ms_pkl`` module +........................ + +.. automodule:: ixdat.readers.ec_ms_pkl + :members: + + +EC-MS and sub-techniques +------------------------ +These are readers which by default return a ``SpectroECMeasurement``. +(See :ref:`sec`) + +The ``msrh_sec`` module +....................... + +.. automodule:: ixdat.readers.msrh_sec + :members: \ No newline at end of file diff --git a/docs/source/technique_docs/ec_ms.rst b/docs/source/technique_docs/ec_ms.rst new file mode 100644 index 00000000..f44dd38a --- /dev/null +++ b/docs/source/technique_docs/ec_ms.rst @@ -0,0 +1,71 @@ +.. _ec-ms: + +Electrochemistry - Mass Spectrometry (EC-MS) +============================================ + +The main class for EC-MS data is the ECMSMeasurement. + +It comes with the :ref:`EC-MS plotter ` which makes EC-MS plots like this one: + +.. figure:: ../figures/ec_ms_annotated.svg + :width: 600 + ``ECMSMeasurement.plot_measurement()``. Data from Trimarco, 2018. + +Other than that, it doesn't have much, but inherits from both ``ECMeasurement`` and ``MSMeasurement``. +An ``ECMSMeasurement`` can be created either by adding an ``ECMeasurement`` and an ``MSMeasurement`` +using the ``+`` operator, or by directly importing data using an EC-MS :ref:`reader ` +such as "zilien". + +``ECMSCyclicVoltammogram`` adds to ``ECMSMeasurement`` the tools for selecting and analyzing data +based on an electrochemical cyclic voltammatry program that are implemented in ``CyclicVoltammogram`` +(see :ref:`cyclic_voltammetry`). + +.. Deconvolution, described in a `recent publication `_, +is implemented in the deconvolution module, in a class inheriting from ``ECMSMeasurement``. + +``ixdat`` has all the functionality and more for EC-MS data and analysis as the +legacy `EC_MS `_ package. This includes the tools +behind the EC-MS analysis and visualization in the puplications: + +- Daniel B. Trimarco and Soren B. Scott, et al. **Enabling real-time detection of electrochemical desorption phenomena with sub-monolayer sensitivity**. `Electrochimica Acta, 268, 520-530 `_, **2018** + +- Claudie Roy, Bela Sebok, Soren B. Scott, et al. **Impact of nanoparticle size and lattice oxygen on water oxidation on NiFeOxHy**. `Nature Catalysis, 1(11), 820-829 `_, **2018** + +- Anna Winiwarter and Luca Silvioli, et al. **Towards an Atomistic Understanding of Electrocatalytic Partial Hydrocarbon Oxidation: Propene on Palladium**. `Energy and Environmental Science, 12, 1055-1067 `_, **2019** + +- Soren B. Scott and Albert Engstfeld, et al. **Anodic molecular hydrogen formation on Ru and Cu electrodes**. `Catalysis Science and Technology, 10, 6870-6878 `_, **2020** + +- Anna Winiwarter, et al. **CO as a Probe Molecule to Study Surface Adsorbates during Electrochemical Oxidation of Propene**. `ChemElectroChem, 8, 250-256 `_, **2021** + +``ixdat`` is used for the following EC-MS articles: + +- Soren B. Scott, et al. **Tracking oxygen atoms in electrochemical CO oxidation –Part I: Oxygen exchange via CO2 hydration**. `Electrochimica Acta, 374, 137842 `_, **2021**. + + Repository: https://github.com/ScottSoren/pyCOox_public + +- Soren B. Scott, et al. **Tracking oxygen atoms in electrochemical CO oxidation –Part II: Lattice oxygen reactivity in oxides of Pt and Ir**. `Electrochimica Acta, 374, 137844 `_, **2021**. + + Repository: https://github.com/ScottSoren/pyCOox_public + +- Kevin Krempl, et al. **Dynamic Interfacial Reaction Rates from Electrochemistry - Mass Spectrometry**. `Journal of Analytical Chemistry. 93, 7022-7028 `_, **2021** + + Repository: https://github.com/kkrempl/Dynamic-Interfacial-Reaction-Rates + +- Junheng Huang, et al. **Online Electrochemistry−Mass Spectrometry Evaluation of the Acidic Oxygen Evolution Reaction at Supported Catalysts**. `ACS Catal. 11, 12745-12753 `_, **2021** + + Repository: https://github.com/ScottSoren/Huang2021 + + +The ``ec_ms`` module +-------------------- +Source: https://github.com/ixdat/ixdat/tree/user_ready/src/ixdat/techniques/ec_ms.py + +.. automodule:: ixdat.techniques.ec_ms + :members: + +The ``deconvolution`` module +---------------------------- +Source: https://github.com/ixdat/ixdat/tree/user_ready/src/ixdat/techniques/deconvolution.py + +.. automodule:: ixdat.techniques.deconvolution + :members: \ No newline at end of file diff --git a/docs/source/technique_docs/electrochemistry.rst b/docs/source/technique_docs/electrochemistry.rst new file mode 100644 index 00000000..11a72320 --- /dev/null +++ b/docs/source/technique_docs/electrochemistry.rst @@ -0,0 +1,58 @@ +.. _electrochemistry: + +Electrochemistry +================ + +The main TechniqueMeasurement class for electrochemistry is the ``ECMeasurement``. +Subclasses of ``ECMeasurement`` include ``CyclicVoltammagram`` and ``CyclicVoltammagramDiff``. + +Direct-current electrochemistry measurements (``ixdat`` does not yet offer specific +functionality for impedance data) are characterized by the essential quantities being +working-electrode current (in loop with the counter electrode) and potential (vs the +reference electrode) as a function of time. Either current or potential can be controlled +as the input variable, so the other acts as the response, and it is common to plot +current vs potential, but in all cases both are tracked or controlled as a function of +time. This results in the essential variables ``t`` (time), ``v`` (potential), and ``j`` +(current). The main job of ``ECMeasurement`` and subclasses is to give standardized, +convenient, and powerful access to these three variables for data selection, analysis, +and visualization, regardless of which hardware the data was acquired with. + +The default plotter, :ref:`ECPlotter `, plots these variables. +The default exporter, :ref:`ECPlotter `, exports these variables as well as an incrementer for +selecting data, ``cycle``. + +Electrochemistry is the most thoroughly developed technique in ``ixdat``. For in-depth +examples of the functionality in the ``ECMeasurement`` class and its subclasses, see +the following Tutorials: + +- `Loading appending and saving `_ + +- `Analyzing cyclic voltammagrams `_ + +The ``ec`` module +----------------- +Source: https://github.com/ixdat/ixdat/tree/user_ready/src/ixdat/techniques/ec.py + +.. figure:: ../figures/ec_subplots.svg + :width: 600 + :alt: Example plots. left: ``ECMeasurement.plot_vs_potential()`` right: ``ECMeasurement.plot_measurement()`` + + left: ``ECMeasurement.plot_vs_potential()`` right: ``ECMeasurement.plot_measurement()``. `See tutorial `_ + +.. automodule:: ixdat.techniques.ec + :members: + +.. _`cyclic_voltammetry`: + +The ``cv`` module +----------------- +Source: https://github.com/ixdat/ixdat/tree/user_ready/src/ixdat/techniques/cv.py + +.. figure:: ../figures/cv_diff.svg + :width: 300 + :alt: Example ``CyclicVoltammagramDiff`` plot + + output of ``CyclicVoltammagramDiff.plot()``. `Tutorial `_. + +.. automodule:: ixdat.techniques.cv + :members: \ No newline at end of file diff --git a/docs/source/technique_docs/index.rst b/docs/source/technique_docs/index.rst new file mode 100644 index 00000000..2fd5143a --- /dev/null +++ b/docs/source/technique_docs/index.rst @@ -0,0 +1,30 @@ +.. _techniques: + +Techniques: ``ixdat``'s measurement subclasses +============================================== +Source: https://github.com/ixdat/ixdat/tree/user_ready/src/ixdat/techniques + +TechniqueMeasurement classes (interchangeable with Techniques or Measurement subclasses) +inherit from the ``Measurement`` class (see :ref:`measurement`) + +A full list of the techniques and there names is in the ``TECHNIQUE_CLASSES`` dictionary:: + + >>> from ixdat.techniques import TECHNIQUE_CLASSES + >>> TECHNIQUE_CLASSES # note, more techniques may have been added since! + { + 'simple': , + 'EC': , + 'CV': , + 'MS': , + 'EC-MS': + 'S-EC': + } + +.. toctree:: + :maxdepth: 2 + + electrochemistry + mass_spec + ec_ms + sec + spectra diff --git a/docs/source/technique_docs/mass_spec.rst b/docs/source/technique_docs/mass_spec.rst new file mode 100644 index 00000000..00620530 --- /dev/null +++ b/docs/source/technique_docs/mass_spec.rst @@ -0,0 +1,20 @@ +.. _mass-spec: + +Mass Spectrometry +================= +Source: https://github.com/ixdat/ixdat/tree/user_ready/src/ixdat/techniques/ms + +Mass spectrometry is commonly used in catalysis and electrocatalysis for two different +types of data - spectra, where intensity is taken while scanning over m/z, and +multiple ion detection (MID) where the intensity of a small set of m/z values are +tracked in time. + +The main TechniqueMeasurement class for MID data is the ``MSMeasurement``. + +Classes dealing with spectra are under development. + +The ``ms`` module +................. + +.. automodule:: ixdat.techniques.ms + :members: \ No newline at end of file diff --git a/docs/source/technique_docs/sec.rst b/docs/source/technique_docs/sec.rst new file mode 100644 index 00000000..bbb03c6b --- /dev/null +++ b/docs/source/technique_docs/sec.rst @@ -0,0 +1,44 @@ +.. _sec: + +Spectro-Electrochemistry +======================== + +Spectro-Electrochemsitry (S-EC) can refer to (i) a broad range of in-situ techniques +hyphenating electrochemistry to some kind of spectrometry (see e.g. +`Lozeman et al, 2020 `_ +) or (ii) more specifically, +the combination of electrochemistry and visible-light spectroscopy. In ``ixdat``, we +use the latter meaning. + +S-EC data is organized in a ``SpectroECMeasurement``, which inherits from ``ECMeasurement`` +(see :ref:`electrochemistry`) and uses a ``SpectrumSeries`` (see :ref:`Spectra `) +for managing the 2-D data array formed by the sequential spectra (see Figure). +To this, the class adds delta optical density (``dOD``) calculations. Methods +such as ``calc_dOD``, ``get_dOD_spectrum``, and ``track_wavelength`` take as +an optional argument a specification of the time/potential/spectrum index to +use as the reference for optical density calculation. If not provided, the +object's ``reference_spectrum`` is used, which itself can be set by the +``set_reference_spectrum`` method. + +.. figure:: ../figures/sec_class.svg + :width: 600 + +The data structure is the same whether the experiment is done as a slow potential scan with +adsorption vs potential in mind, or as a potential jump or release with time-resolved +behavior in mind. + +Plots of S-EC are made by the :ref:`SECPlotter `. These are either heat plots +(``plot_measurement`` and ``plot_vs_potential``) or coplotted cross-sections (``plot_waterfall`` +and ``plot_wavelengths``). :ref:`Exporting SEC data ` results in a master file with +the EC data and any tracked wavelengths and two auxiliary files with (i) the +spectrum series and (ii) the reference spectrum. + +A :ref:`jupyter notebook tutorial ` for S-EC is available. + +Fitting of spectroelectrochemistry data is not yet supported in ``ixdat``. + +The ``spectroelectrochemistry`` module +...................................... + +.. automodule:: ixdat.techniques.spectroelectrochemistry + :members: \ No newline at end of file diff --git a/docs/source/technique_docs/spectra.rst b/docs/source/technique_docs/spectra.rst new file mode 100644 index 00000000..4c789234 --- /dev/null +++ b/docs/source/technique_docs/spectra.rst @@ -0,0 +1,28 @@ +.. _spectra: + +Spectrum +======== + +The position of spectra is not yet completely set in ixdat. + +A spectrum is in essence just a 1-D field where a response variable (e.g. counts, +detector current, adsorption) lives on a space defined by a scanning variable (e.g. wavelength, +mass-to-charge ratio, two-theta). +As such it could be a DataSeries on par with ValueSeries (a 1-D field with a value living in +a space defined by a TimeSeries). +A Spectrum is however a stand-alone output of an experimental technique. It could also be a type of Measurement. + +As it is now, ``Spectrum`` is its own base class on par with ``Measurement``, with its own table (i.e. the ``Spectrum`` class +inherits directly from ``Saveable``). It has properties which give quick access to the scanning +variable and response as ``x`` and ``y``, respectively. It also has its +own :ref:`plotter ` and :ref:`exporter `. + +Similar questions can be raised about a sequence of spectra - whether it is a Measurement or a 2-D field. +As it is now, sequences of spectra are represented by ``SpectrumSeries``, which inherits from +``Spectrum``. + +The ``spectra`` module +...................... + +.. automodule:: ixdat.spectra + :members: \ No newline at end of file diff --git a/docs/source/tutorials.rst b/docs/source/tutorials.rst new file mode 100644 index 00000000..427ec52b --- /dev/null +++ b/docs/source/tutorials.rst @@ -0,0 +1,81 @@ +.. _tutorials: + +========= +Tutorials +========= + +``ixdat`` has a growing number of tutorials and examples available. + +Jupyter notebook tutorials +-------------------------- +Jupyter notebooks are available in the ixdat Tutorials repository: +https://github.com/ixdat/tutorials/ + +This repository is a bit of a mess at the moment, apologies, but the tutorials themselves are +not bad, if we may say so ourselves. More are needed. Right now there are two, +both based on electrochemistry data: + +Loading, selecting, calibrating, and exporting data +*************************************************** +Location: `loading_appending_and_saving/export_demo_data_as_csv.ipynb `_ + +This tutorial shows with electrochemistry data how to load, append, and export data. +It shows, among other things, the **appending + operator** and how to use the **backend** (save() and get()). + +It requires the data files `here `_. + + +Comparing cycles of a cyclic voltammagram +***************************************** + +Location: `simple_ec_analysis/difference_between_two_cvs.ipynb `_ + +This tutorial, together with the previous one, shows the ``ixdat``'s API for electrochemistry data. +It demonstrates, with CO stripping as an example, the following features: + +- Selecting cyclic voltammatry cycles + +- Integrating current to get charge passed + +- Lining seperate cycles up with respect to potential + +It reads ixdat-exported data directly from github. + + +Spectroelectrochemistry +*********************** + +.. _sec-tutorial: + +Location:`spectroelectrochemistry/ `_ + +This tutorial demonstrates importing, plotting, and exporting spectroelectrochemistry (S-EC) data +It shows delta optical density calculation and both calculation and plotting of the full 2-D data field and +cross sections (i.e. spectra and wavelength-vs-time). + +The example data is not yet publically available. + +Article repositories +-------------------- + +Calibrating EC-MS data +********************** +See these two examples, respectively, for making and using an ixdat EC-MS calibration (here with isotope-labeled data): + +- https://github.com/ScottSoren/pyCOox_public/blob/main/paper_I_fig_S1/paper_I_fig_S1.py + +- https://github.com/ScottSoren/pyCOox_public/blob/main/paper_I_fig_2/paper_I_fig_2.py + +EC-MS data analysis +******************* + +This article has examples of analyzing and manually plotting data imported by ixdat + +https://github.com/ScottSoren/Huang2021 + + +Development scripts +------------------- +The basics of importing and plotting from each reader are demonstrated in +the **development_scripts/reader_testers** folder of the repository: +https://github.com/ixdat/ixdat/tree/user_ready/development_scripts/reader_testers \ No newline at end of file diff --git a/requirements-dev.txt b/requirements-dev.txt index 1f0a43af..a401181a 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -5,6 +5,7 @@ sphinx-rtd-theme sphinx_automodapi EC_MS flake8 +twine +setuputils tox -pytest invoke diff --git a/requirements.txt b/requirements.txt index e6d06cae..223ec5f0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,6 @@ numpy>=1.16 -matplotlib>=3.2 \ No newline at end of file +matplotlib>=3.2 +EC_MS>=0.7.4 # Temporary! For Zilien reading. +scipy>=1.5 # for deconvolution (should be plugin?) +mpmath>=1 # for deconvolution (should be plugin? +pandas>=1 # for some readers and an exporter. diff --git a/setup.py b/setup.py index d80d4738..6adca55e 100644 --- a/setup.py +++ b/setup.py @@ -1,13 +1,4 @@ """Initial setup.py - -TODO: This file is rudimentary and setup mainly to enable tox to -run. The main points missing are: - -* Proper and correct trove classifiers (https://pypi.org/classifiers/) -* A read through of metadata in ``__init__.py`` -* Handling of data files (necessary for the package to run, not for - development) when we start to get those - """ import os @@ -32,7 +23,8 @@ def read(*parts): Assume UTF-8 encoding. """ - with codecs.open(os.path.join(HERE, *parts), "rb", "utf-8") as f: + path_to_file = os.path.join(HERE, *parts) + with open(path_to_file, "r") as f: return f.read() @@ -54,6 +46,7 @@ def find_meta(meta): r"^__{meta}__ = ['\"]([^'\"]*)['\"]".format(meta=meta), META_FILE, re.M ) if meta_match: + print(f"found {meta}: '{meta_match.group(1)}'") # debugging return meta_match.group(1) raise RuntimeError("Unable to find __{meta}__ string.".format(meta=meta)) @@ -75,5 +68,6 @@ def find_meta(meta): "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", ], + install_requires=read("requirements.txt").split("\n"), python_requires=">=3.6", ) diff --git a/src/ixdat/__init__.py b/src/ixdat/__init__.py index afbb2968..35d8b55a 100644 --- a/src/ixdat/__init__.py +++ b/src/ixdat/__init__.py @@ -1,15 +1,15 @@ """initialize ixdat, giving top-level access to a few of the important structures """ -__version__ = "0.0.2dev" +__version__ = "0.2.0dev" __title__ = "ixdat" __description__ = "The in-situ experimental data tool" -__url__ = "https://github.com/ixdat/ixdat" -__author__ = "Soren B. Scott, Kevin Krempl, Kenneth Nielsen" -__email__ = "scott.soren@gmail.com" # maybe we should get an orgianization email? -# __copyright__ = "Copyright (c) 2020 ixdat" +__url__ = "https://ixdat.readthedocs.io" +__author__ = "Soren B. Scott, Kenneth Nielsen, et al" +__email__ = "sbscott@ic.ac.uk" # maybe we should get an organization email? __license__ = "MIT" from .measurements import Measurement +from .spectra import Spectrum from . import db from . import techniques from . import plotters diff --git a/src/ixdat/backends/directory_backend.py b/src/ixdat/backends/directory_backend.py index f9fbbccd..8c9df83a 100644 --- a/src/ixdat/backends/directory_backend.py +++ b/src/ixdat/backends/directory_backend.py @@ -141,7 +141,7 @@ def save_data(self, data, table_name, i, fixed_name=None): np.save(folder / data_file_name, data) def get(self, cls, i): - """Open a Savable object represented as row i of table cls.table_name""" + """Open a Saveable object represented as row i of table cls.table_name""" table_name = cls.table_name obj_as_dict = self.get_row_as_dict(table_name, i) i = obj_as_dict.pop("id", i) diff --git a/src/ixdat/config.py b/src/ixdat/config.py index c63beb18..fa560242 100644 --- a/src/ixdat/config.py +++ b/src/ixdat/config.py @@ -20,6 +20,13 @@ def __init__(self): self.standard_data_directory = Path.home() / "ixdat" self.default_project_name = "test" + @property + def ixdat_temp_dir(self): + temp_dir = self.standard_data_directory / "temp" + if not temp_dir.exists(): + temp_dir.mkdir(parents=True) + return temp_dir + CFG = Config() diff --git a/src/ixdat/constants.py b/src/ixdat/constants.py new file mode 100644 index 00000000..eeb2cf7c --- /dev/null +++ b/src/ixdat/constants.py @@ -0,0 +1,52 @@ +from scipy import constants as scipy_constants + +# short-form aliases for a few scipy constants +c = scipy_constants.c # speed of light / (m/s) +qe = scipy_constants.e # fundamental charge / (C) +h = scipy_constants.h # planck's constant / (J*s) +hbar = scipy_constants.hbar # reduced planck's constant / (J*s) +NA = scipy_constants.N_A # Avogadro's number /(mol) or dimensionless +me = scipy_constants.m_e # mass of electron / (kg) +kB = scipy_constants.k # Boltzman constant / (J/K) +u0 = scipy_constants.mu_0 # permeability of free space / (J*s^2/(m*C^2)) +e0 = scipy_constants.epsilon_0 # permittivity of free space / (C^2/(J*m)) +R = scipy_constants.R # gas constant / (J/(mol*K)) + +# a few extra derived constants +amu = 1e-3 / NA # atomic mass unit / (kg) # amu=(1g/mol)/NA +Far = NA * qe # Faraday's constant, C/mol + +# long-form aliases +FARADAY_CONSTANT = Far +AVOGADROS_CONSTANT = NA +BOLTZMAN_CONSTANT = kB + +# standard conditions +STANDARD_TEMPERATURE = 298.15 # Standard temperature of 25 C in [K] +STANDARD_PRESSURE = 1e5 # Standard pressure of 1 bar in [Pa] + +# molecule properties (should probably come from elsewhere). +DYNAMIC_VISCOSITIES = { + "O2": 2.07e-05, + "N2": 1.79e-05, + "Ar": 2.27e-05, + "He": 1.99e-05, + "CO": 1.78e-05, + "H2": 8.90e-06, +} # in [Pa*s] +MOLECULAR_DIAMETERS = { + "O2": 3.55e-10, + "N2": 3.7e-10, + "Ar": 3.58e-10, + "He": 2.15e-10, + "CO": 3.76e-10, + "H2": 2.71e-10, +} # in [m] +MOLAR_MASSES = { + "O2": 31.998, + "N2": 28.014, + "Ar": 39.948, + "He": 4.002, + "CO": 28.010, + "H2": 2.016, +} # in [g/mol] diff --git a/src/ixdat/data_series.py b/src/ixdat/data_series.py index 36f4aa73..519f49b5 100644 --- a/src/ixdat/data_series.py +++ b/src/ixdat/data_series.py @@ -178,6 +178,16 @@ def data(self): ) return self._data + @property + def tstamp(self): + """The unix time corresponding to t=0 for the time-resolved axis of the Field + + The timestamp of a Field is the timestamp of its TimeSeries or ValueSeries + """ + for s in self.axes_series: + if isinstance(s, (ValueSeries, TimeSeries)): + return s.tstamp + class ValueSeries(Field): """Class to store scalar values that are measured over time. diff --git a/src/ixdat/db.py b/src/ixdat/db.py index 938b6d7c..6d5e41b4 100644 --- a/src/ixdat/db.py +++ b/src/ixdat/db.py @@ -16,7 +16,7 @@ managed attribute, from an object in memory. `load` and `get` convention holds vertically - i.e. the Backend, the DataBase, - up through the Savable parent class for all ixdat classes corresponding to + up through the Saveable parent class for all ixdat classes corresponding to database tables have `load` and `get` methods which call downwards. TODO. see: https://github.com/ixdat/ixdat/pull/1#discussion_r546400793 """ @@ -27,7 +27,7 @@ class DataBase: - """This class is a kind of middle-man between a Backend and a Savealbe class + """This class is a kind of middle-man between a Backend and a Saveable class The reason for a middle man here is that it enables different databases (backends) to be switched between and kept track of in a single ixdat session. @@ -42,27 +42,31 @@ def __init__(self, backend=None): self.new_object_backend = "none" def save(self, obj): - """Save a Savable object with the backend""" + """Save a Saveable object with the backend""" return self.backend.save(obj) def get(self, cls, i, backend=None): - """Select and return object of Savable class cls with id=i from the backend""" + """Select and return object of Saveable class cls with id=i from the backend""" backend = backend or self.backend obj = backend.get(cls, i) # obj will already have obj.id = i and obj.backend = self.backend from backend return obj def load(self, cls, name): - """Select and return object of Savable class cls with name=name from backend""" + """Select and return object of Saveable class cls with name=name from backend""" def load_obj_data(self, obj): - """Load and return the numerical data (obj.data) for a Savable object""" + """Load and return the numerical data (obj.data) for a Saveable object""" return self.backend.load_obj_data(obj) def set_backend(self, backend_name, **db_kwargs): """Change backend to the class given by backend_name initiated with db_kwargs""" - if backend_name in BACKEND_CLASSES: + if not isinstance(backend_name, str): + # Then we assume that it is the backend itself, not the backend name + self.backend = backend_name + elif backend_name in BACKEND_CLASSES: BackendClass = BACKEND_CLASSES[backend_name] + self.backend = BackendClass(**db_kwargs) else: raise NotImplementedError( f"ixdat doesn't recognize db_name = '{backend_name}'. If this is a new" @@ -70,7 +74,7 @@ def set_backend(self, backend_name, **db_kwargs): "constant in ixdat.backends." "Or manually set it directly with DB.backend = " ) - self.backend = BackendClass(**db_kwargs) + return self.backend DB = DataBase() # initate the database. It functions as a global "constant" @@ -150,7 +154,7 @@ class Saveable: child_attrs = None # THIS SHOULD BE OVERWRITTEN IN CLASSES WITH DATA REFERENCES def __init__(self, backend=None, **self_as_dict): - """Initialize a Savable object from its dictionary serialization + """Initialize a Saveable object from its dictionary serialization This is the default behavior, and should be overwritten using an argument-free call to super().__init__() in inheriting classes. @@ -192,7 +196,7 @@ def short_identity(self): FIXME: The overloaded return here is annoying and dangerous, but necessary for `Measurement.from_dict(m.as_dict())` to work as a copy, since the call to `fill_object_list` has to specify where the objects represented by - PlaceHolderObjects live. Note that calling save() on a Savable object will + PlaceHolderObjects live. Note that calling save() on a Saveable object will turn the backends into DB.backend, so this will only give id's when saving. This is (usually) sufficient to tell if two objects refer to the same thing, when used together with the class attribute table_name @@ -211,7 +215,7 @@ def full_identity(self): @property def backend(self): - """The backend the Savable object was loaded from or last saved to.""" + """The backend the Saveable object was loaded from or last saved to.""" if not self._backend: self._backend = database_backends["none"] return self._backend @@ -239,11 +243,11 @@ def backend_type(self): return self.backend.backend_type def set_id(self, i): - """Backends set obj.id here after loading/saving a Savable obj""" + """Backends set obj.id here after loading/saving a Saveable obj""" self._id = i def set_backend(self, backend): - """Backends set obj.backend here after loading/saving a Savable obj""" + """Backends set obj.backend here after loading/saving a Saveable obj""" self.backend = backend def get_main_dict(self, exclude=None): @@ -331,8 +335,11 @@ def __eq__(self, other): return False if self.extra_linkers: linker_id_names = [ - id_name for (linker_table_name, (linked_table_name, id_name)) - in self.extra_linkers.items() + id_name + for ( + linker_table_name, + (linked_table_name, id_name), + ) in self.extra_linkers.items() ] # FIXME: This will be made much simpler with coming metaprogramming else: linker_id_names = [] @@ -376,6 +383,18 @@ def save(self, db=None): db = db or self.db return db.save(self) + @classmethod + def get_all_column_attrs(cls): + """List all attributes of objects of cls that correspond to table columns""" + all_attrs = cls.column_attrs + if cls.extra_column_attrs: + for table, attrs in cls.extra_column_attrs.items(): + all_attrs = all_attrs.union(attrs) + if cls.extra_linkers: + for table, (ref_table, attr) in cls.extra_linkers.items(): + all_attrs.add(attr) + return all_attrs + @classmethod def from_dict(cls, obj_as_dict): """Return an object built from its serialization.""" @@ -384,8 +403,11 @@ def from_dict(cls, obj_as_dict): @classmethod def get(cls, i, backend=None): """Open an object of cls given its id (the table is cls.table_name)""" - backend = backend or DB.backend - return backend.get(cls, i) + old_backend = DB.backend + DB.set_backend(backend or old_backend) + obj = DB.get(cls, i) # gets it from the requested backend. + DB.set_backend(old_backend) + return obj def load_data(self, db=None): """Load the data of the object, if ixdat in its laziness hasn't done so yet""" @@ -394,17 +416,21 @@ def load_data(self, db=None): class PlaceHolderObject: - """A tool for ixdat's laziness, instances sit in for Savable objects.""" + """A tool for ixdat's laziness, instances sit in for Saveable objects.""" - def __init__(self, i, cls, backend): + def __init__(self, i, cls, backend=None): """Initiate a PlaceHolderObject with info for loading the real obj when needed Args: i (int): The id (principle key) of the object represented - cls (class): Class inheriting from Savable and thus specifiying the table + cls (class): Class inheriting from Saveable and thus specifiying the table + backend (Backend, optional): by default, placeholders objects must live in + the active backend. This is the case if loaded with get(). """ self.id = i self.cls = cls + if not backend: # + backend = DB.backend if not backend or backend == "none" or backend is database_backends["none"]: raise DataBaseError( f"Can't make a PlaceHolderObject with backend={backend}" @@ -433,7 +459,7 @@ def fill_object_list(object_list, obj_ids, cls=None): obj_ids (list of ints or None): The id's of objects to ensure are in the list. Any id in obj_ids not already represented in object_list is added to the list as a PlaceHolderObject - cls (Savable class): the class remembered by any PlaceHolderObjects + cls (Saveable class): the class remembered by any PlaceHolderObjects added to the object_list, so that eventually the right object will be loaded. Must be specified if object_list is empty. """ @@ -454,7 +480,7 @@ def fill_object_list(object_list, obj_ids, cls=None): def with_memory(function): - """Decorator for saving all new Savable objects initiated in the memory backend""" + """Decorator for saving all new Saveable objects initiated in the memory backend""" def function_with_memory(*args, **kwargs): DB.new_object_backend = "memory" diff --git a/src/ixdat/exceptions.py b/src/ixdat/exceptions.py index 716a66db..e12b6277 100644 --- a/src/ixdat/exceptions.py +++ b/src/ixdat/exceptions.py @@ -29,5 +29,5 @@ class TechniqueError(Exception): """ixdat errors having to do with techniques and their limitations""" -# TODO: Make a depreciation warning. -# See https://github.com/ixdat/ixdat/pull/11#discussion_r747816670 +class QuantificationError(Exception): + """ixdat errors having to do with techniques and their limitations""" diff --git a/src/ixdat/exporters/__init__.py b/src/ixdat/exporters/__init__.py index 02ca5a19..dcb14834 100644 --- a/src/ixdat/exporters/__init__.py +++ b/src/ixdat/exporters/__init__.py @@ -1,2 +1,3 @@ from .csv_exporter import CSVExporter from .ec_exporter import ECExporter +from .ecms_exporter import ECMSExporter diff --git a/src/ixdat/exporters/csv_exporter.py b/src/ixdat/exporters/csv_exporter.py index d216278c..e70bdf5d 100644 --- a/src/ixdat/exporters/csv_exporter.py +++ b/src/ixdat/exporters/csv_exporter.py @@ -1,74 +1,137 @@ """Classes for exporting measurement data""" from pathlib import Path +import json class CSVExporter: """The default exporter, which writes delimited measurement data row-wise to file""" - def __init__(self, measurement=None, delimiter=",\t", default_v_list=None): + default_export_columns = None # Typically overwritten by inheriting Exporters + """The names of the value series to export by default.""" + aliases = None # This will typically be overwritten by inheriting Exporters + """The aliases, needed for techniques with essential series that get renamed.""" + + def __init__(self, measurement=None, delim=",\t"): """Initiate the exported with a measurement (Measurement) and delimiter (str)""" self.measurement = measurement - self.delimiter = delimiter - self._default_v_list = default_v_list - - @property - def default_v_list(self): - """This will typically be overwritten by inheriting Exporters""" - return self._default_v_list + self.delim = delim + self.header_lines = None + self.s_list = None + self.columns_data = None + self.path_to_file = None - def export(self, *args, **kwargs): - """Export the exporter's measurement via exporter.export_measurement()""" - return self.export_measurement(self.measurement, *args, **kwargs) - - def export_measurement(self, measurement, path_to_file, v_list=None, tspan=None): + def export(self, path_to_file=None, measurement=None, columns=None, tspan=None): """Export a given measurement to a specified file. + To improve flexibility with inheritance, this method allocates its work to: + - CSVExporter.prepare_header_and_data() + - CSVExporter.write_header() + - CSVExporter.write_data() + Args: - measurement (Measurement): The measurement to export - path_to_file (Path): The path to the file to measure. If it has no suffix, - a .csv suffix is appended. - v_list (list of str): The names of the data series to include. Defaults in + measurement (Measurement): The measurement to export. + Defaults to self.measurement. + TODO: remove this kwarg. See conversation here: + https://github.com/ixdat/ixdat/pull/30/files#r810926968 + path_to_file (Path): The path to the file to write. If it has no suffix, + a .csv suffix is appended. Defaults to f"{measurement.name}.csv" + columns (list of str): The names of the data series to include. Defaults in CSVExporter to all VSeries and TSeries in the measurement. This default may be overwritten in inheriting exporters. tspan (timespan): The timespan to include in the file, defaults to all of it """ - columns_data = {} - s_list = [] - v_list = v_list or self.default_v_list or list(measurement.data_cols) + measurement = measurement or self.measurement + if not path_to_file: + path_to_file = f"{measurement.name}.csv" if isinstance(path_to_file, str): path_to_file = Path(path_to_file) if not path_to_file.suffix: path_to_file = path_to_file.with_suffix(".csv") + self.path_to_file = path_to_file + self.prepare_header_and_data(measurement, columns, tspan) + self.prepare_column_header() + self.write_header() + self.write_data() + + def prepare_header_and_data(self, measurement, v_list, tspan): + """Prepare self.header_lines to include metadata and value-time pairs + + Args: + measurement (Measurement): The measurement being exported + v_list (list of str): The names of the ValueSeries to include + tspan (timespan): The timespan of the data to include in the export + """ + columns_data = {} + # list of the value names to export: + v_list = v_list or self.default_export_columns or list(measurement.value_names) + s_list = [] # list of the series names to export. + # s_list will also include names of TimeSeries. + + timecols = {} # Will be {time_name: value_names}, for the header. for v_name in v_list: + # Collect data and names for each ValueSeries and TimeSeries t_name = measurement[v_name].tseries.name t, v = measurement.grab(v_name, tspan=tspan) - if t_name not in columns_data: + if t_name in timecols: + # We've already collected the data for this time column + timecols[t_name].append(v_name) + else: + # New time column. Collect its data and add it to the timecols. columns_data[t_name] = t s_list.append(t_name) + timecols[t_name] = [v_name] columns_data[v_name] = v s_list.append(v_name) - header_line = ( - "".join([s_name + self.delimiter for s_name in s_list])[ - : -len(self.delimiter) - ] - + "\n" - ) + header_lines = [] + for attr in ["name", "technique", "tstamp", "backend_name", "id"]: + line = f"{attr} = {getattr(measurement, attr)}\n" + header_lines.append(line) + # TODO: This should be more automated... the exporter should put all + # the appropriate metadata attributes of the object, read from its + # table definition, in the header. + for t_name, v_names in timecols.items(): + # Header includes a line for each time column stating which values use it: + line = ( + f"timecol '{t_name}' for: " + + " and ".join([f"'{v_name}'" for v_name in v_names]) + + "\n" + ) + header_lines.append(line) + if self.aliases: + # For now, aliases is nice after the timecol lines. But see the to-do above. + aliases_line = f"aliases = {json.dumps(self.aliases)}\n" + header_lines.append(aliases_line) + self.header_lines = header_lines + self.s_list = s_list + self.columns_data = columns_data + + def prepare_column_header(self): + """Prepare the column header line and finish the header_lines""" + N_header_lines = len(self.header_lines) + 3 + self.header_lines.append(f"N_header_lines = {N_header_lines}\n") + self.header_lines.append("\n") - lines = [header_line] - # All columns must have the length of the longest vector to keep data aligned: - max_length = max([len(data) for data in columns_data.values()]) + col_header_line = self.delim.join(self.s_list) + "\n" + self.header_lines.append(col_header_line) + + def write_header(self): + """Create the file and write the header lines.""" + with open(self.path_to_file, "w") as f: + f.writelines(self.header_lines) + + def write_data(self): + """Write data to the file one line at a time.""" + max_length = max([len(data) for data in self.columns_data.values()]) for n in range(max_length): - line = "" - for s_name in s_list: - if len(columns_data[s_name]) > n: - # Then put the data there: - line = line + str(columns_data[s_name][n]) + self.delimiter + data_strings = [] + for s_name in self.s_list: + if len(self.columns_data[s_name]) > n: + # Then there's more data to write for this series + data_strings.append(str(self.columns_data[s_name][n])) else: - # No more data in this column. Just hold alignment: - line += self.delimiter - line += "\n" - lines.append(line) - - with open(path_to_file, "w") as f: - f.writelines(lines) + # Then all this series is written. Just leave space. + data_strings.append("") + line = self.delim.join(data_strings) + "\n" + with open(self.path_to_file, "a") as f: + f.write(line) diff --git a/src/ixdat/exporters/ec_exporter.py b/src/ixdat/exporters/ec_exporter.py index f07c87e5..d6f14cdf 100644 --- a/src/ixdat/exporters/ec_exporter.py +++ b/src/ixdat/exporters/ec_exporter.py @@ -5,7 +5,7 @@ class ECExporter(CSVExporter): """A CSVExporter that by default exports potential, current, and selector""" @property - def default_v_list(self): + def default_export_columns(self): """The default v_list for ECExporter is V_str, J_str, and sel_str""" return [ # self.measurement.t_name, @@ -13,3 +13,12 @@ def default_v_list(self): self.measurement.j_name, self.measurement.selector_name, ] + + @property + def aliases(self): + return { + "t": (self.measurement.t_name,), + "raw_potential": (self.measurement.v_name,), + "raw_current": (self.measurement.j_name,), + "selector": (self.measurement.selector_name,), + } diff --git a/src/ixdat/exporters/ecms_exporter.py b/src/ixdat/exporters/ecms_exporter.py new file mode 100644 index 00000000..c8d33897 --- /dev/null +++ b/src/ixdat/exporters/ecms_exporter.py @@ -0,0 +1,36 @@ +from .csv_exporter import CSVExporter +from .ec_exporter import ECExporter + + +class ECMSExporter(CSVExporter): + """A CSVExporter that by default exports potential, current, selector, and all MID""" + + @property + def default_export_columns(self): + """The default v_list for ECExporter is V_str, J_str, and sel_str""" + v_list = ( + ECExporter(measurement=self.measurement).default_v_list + + self.measurement.mass_list + ) + + return v_list + + def export( + self, + path_to_file=None, + measurement=None, + v_list=None, + tspan=None, + mass_list=None, + mol_list=None, + ): + if not v_list: + if mass_list: + v_list = ECExporter(measurement=self.measurement).default_v_list + else: + v_list = self.default_v_list + if mass_list: + v_list += mass_list + if mol_list: + v_list += [f"n_dot_{mol}" for mol in mol_list] + return super().export(path_to_file, measurement, v_list, tspan) diff --git a/src/ixdat/exporters/sec_exporter.py b/src/ixdat/exporters/sec_exporter.py new file mode 100644 index 00000000..689227d3 --- /dev/null +++ b/src/ixdat/exporters/sec_exporter.py @@ -0,0 +1,71 @@ +from .csv_exporter import CSVExporter +from .ec_exporter import ECExporter +from .spectrum_exporter import SpectrumExporter, SpectrumSeriesExporter + + +class SECExporter(CSVExporter): + """Adds to CSVExporter the export of the Field with the SEC spectra""" + + def __init__(self, measurement, delim=",\t"): + super().__init__(measurement, delim=delim) + # FIXME: The lines below don't work because this __init__ gets called before + # the measurement's __init__ is finished. + # self.reference_exporter = SpectrumExporter(measurement.reference_spectrum) + # self.spectra_exporter = SpectrumSeriesExporter(measurement.spectrum_series) + self._reference_exporter = None + self._spectra_exporter = None + + @property + def reference_exporter(self): + if not self._reference_exporter: + self._reference_exporter = SpectrumExporter( + self.measurement.reference_spectrum + ) + return self._reference_exporter + + @property + def spectra_exporter(self): + if not self._spectra_exporter: + self._spectra_exporter = SpectrumSeriesExporter( + self.measurement.spectrum_series + ) + return self._spectra_exporter + + @property + def default_export_columns(self): + """The default v_list for SECExporter is that from EC and tracked wavelengths""" + v_list = ( + ECExporter(measurement=self.measurement).default_v_list + + self.measurement.tracked_wavelengths + ) + return v_list + + aliases = ECExporter.aliases + + def prepare_header_and_data(self, measurement, v_list, tspan): + """Do the standard ixdat csv export header preparation, plus SEC stuff. + + The SEC stuff is: + - export the spectroelectrochemistry spectra + - export the actual reference spectrum + - add lines to the main file header pointing to the files with the + above two exports. + """ + super().prepare_header_and_data(measurement, v_list, tspan) + path_to_spectra_file = self.path_to_file.parent / ( + self.path_to_file.stem + "_spectra.csv" + ) + measurement = measurement or self.measurement + self.header_lines.append(f"'spectra' in file: '{path_to_spectra_file.name}'\n") + self.spectra_exporter.export(measurement.spectrum_series, path_to_spectra_file) + path_to_reference_spectrum_file = self.path_to_file.parent / ( + self.path_to_file.stem + "_reference.csv" + ) + self.header_lines.append( + f"'reference' in file: '{path_to_reference_spectrum_file.name}'\n" + ) + self.reference_exporter.export( + measurement.reference_spectrum, path_to_reference_spectrum_file + ) + + print(f"writing {self.path_to_file}!") diff --git a/src/ixdat/exporters/spectrum_exporter.py b/src/ixdat/exporters/spectrum_exporter.py new file mode 100644 index 00000000..427e7ed1 --- /dev/null +++ b/src/ixdat/exporters/spectrum_exporter.py @@ -0,0 +1,132 @@ +import pandas as pd +from collections import OrderedDict + + +class SpectrumExporter: + """An ixdat CSV exporter for spectra. Uses pandas.""" + + def __init__(self, spectrum, delim=","): + """Initiate the SpectrumExporter. + + Args: + spectrum (Spectrum): The spectrum to export by default + delim (char): The separator for the .csv file. Note that this cannot be + the ",\t" used by ixdat's main exporter since pandas only accepts single + character delimiters. + """ + self.spectrum = spectrum + self.delim = delim + + def export(self, spectrum, path_to_file): + """Export spectrum to path_to_file. + + Args: + spectrum (Spectrum): The spectrum to export if different from self.spectrum + TODO: remove this kwarg. See conversation here: + https://github.com/ixdat/ixdat/pull/30/files#r810926968 + path_to_file (str or Path): The path of the file to export to. Note that if a + file already exists with this path, it will be overwritten. + """ + spectrum = spectrum or self.spectrum + df = pd.DataFrame({spectrum.x_name: spectrum.x, spectrum.y_name: spectrum.y}) + + header_lines = [] + for attr in ["name", "technique", "tstamp", "backend_name", "id"]: + line = f"{attr} = {getattr(spectrum, attr)}\n" + header_lines.append(line) + + N_header_lines = len(header_lines) + 3 + header_lines.append(f"N_header_lines = {N_header_lines}\n") + header_lines.append("\n") + + with open(path_to_file, "w") as f: + f.writelines(header_lines) + with open(path_to_file, "a") as f: + df.to_csv(f, index=False, sep=self.delim, line_terminator="\n") + + print(f"wrote {path_to_file}!") + + +class SpectrumSeriesExporter: + """An exporter for ixdat spectrum series.""" + + def __init__(self, spectrum_series, delim=","): + """Initiate the SpectrumSeriesExporter. + + Args: + spectrum_series (SpectrumSeries): The spectrum to export by default + delim (char): The separator for the .csv file. Note that this cannot be + the ",\t" used by ixdat's main exporter since pandas only accepts single + character delimiters. + """ + self.spectrum_series = spectrum_series + self.delim = delim + + def export(self, spectrum_series=None, path_to_file=None, spectra_as_rows=True): + """Export spectrum series to path_to_file. + + Args: + spectrum_series (Spectrum): The spectrum_series to export if different from + self.spectrum_series + TODO: remove this kwarg. See conversation here: + https://github.com/ixdat/ixdat/pull/30/files#r810926968 + path_to_file (str or Path): The path of the file to export to. Note that if a + file already exists with this path, it will be overwritten. + spectra_as_rows (bool): This specifies the orientation of the data exported. + If True, the scanning variabe (e.g. wavelength) increases to the right + and the time variable increases downward. If False, the scanning + variable increases downwards and the time variable increases to the + right. Either way it is clarified in the file header. Defaults to True. + """ + + spectrum_series = spectrum_series or self.spectrum_series + + field = spectrum_series.field + data = field.data + tseries, xseries = spectrum_series.field.axes_series + t = tseries.t + tseries.tstamp - spectrum_series.tstamp + x = xseries.data + + header_lines = [] + for attr in ["name", "technique", "tstamp", "backend_name", "id"]: + line = f"{attr} = {getattr(spectrum_series, attr)}\n" + header_lines.append(line) + + N_header_lines = len(header_lines) + 3 + header_lines.append(f"N_header_lines = {N_header_lines}\n") + + header_lines.append( + f"values are y='{field.name}' with units [{field.unit_name}]\n" + ) + + if spectra_as_rows: # columns are ValueSeries + data_as_list_of_tuples = [(spectrum_series.t_name, t)] + [ + (x_i, data[:, i]) for i, x_i in enumerate(x) + ] + df = pd.DataFrame(OrderedDict(data_as_list_of_tuples)) + header_lines.append( + f"first row is x='{xseries.name}' with units [{xseries.unit_name}]\n" + ) + header_lines.append( + f"first column is t='{tseries.name}' with units [{tseries.unit_name}]\n" + ) + else: # spectra as columns. rows are ValueSeries + data_as_list_of_tuples = [(spectrum_series.x_name, x)] + [ + (t_i, data[i, :]) for i, t_i in enumerate(t) + ] + df = pd.DataFrame(OrderedDict(data_as_list_of_tuples)) + header_lines.append( + f"first row is t='{tseries.name}' with units [{tseries.unit_name}]\n" + ) + header_lines.append( + f"first column is x='{xseries.name}' with units [{xseries.unit_name}]\n" + ) + + header_lines.append("\n") + + with open(path_to_file, "w") as f: + f.writelines(header_lines) + with open(path_to_file, "a") as f: + df.to_csv(f, index=False, sep=self.delim, line_terminator="\n") + + print(f"wrote {path_to_file}!") diff --git a/src/ixdat/measurements.py b/src/ixdat/measurements.py index aa2365e1..d8766bbd 100644 --- a/src/ixdat/measurements.py +++ b/src/ixdat/measurements.py @@ -1,6 +1,6 @@ """This module defines the Measurement class, the central data structure of ixdat -An ixdat Measurement is a collection of references to DataSeries with the metadata needed +An ixdat Measurement is a collection of references to DataSeries and the metadata needed to combine them, i.e. "build" the combined dataset. It has a number of general methods to visualize and analyze the combined dataset. Measurement is also the base class for a number of technique-specific Measurement-derived classes. @@ -9,6 +9,7 @@ also defines the base class for Calibration, while technique-specific Calibration classes will be defined in the corresponding module in ./techniques/ """ +from pathlib import Path import json import numpy as np from .db import Saveable, PlaceHolderObject, fill_object_list @@ -21,8 +22,8 @@ time_shifted, get_tspans_from_mask, ) -from .samples import Sample -from .lablogs import LabLog +from .projects.samples import Sample +from .projects.lablogs import LabLog from .exporters.csv_exporter import CSVExporter from .plotters.value_plotter import ValuePlotter from .exceptions import BuildError, SeriesNotFoundError @@ -43,7 +44,7 @@ class Measurement(Saveable): } extra_linkers = { "component_measurements": ("measurements", "m_ids"), - "measurement_calibrations": ("calibration", "c_ids"), + "measurement_calibrations": ("calibrations", "c_ids"), "measurement_series": ("data_series", "s_ids"), } child_attrs = ["component_measurements", "calibration_list", "series_list"] @@ -144,6 +145,9 @@ def __init__( self.plot_measurement = self.plotter.plot_measurement self.plot = self.plotter.plot_measurement self.export = self.exporter.export + # TODO: ... but we need to think a bit more about how to most elegantly and + # dynamically choose plotters (Nice idea from Anna: + # https://github.com/ixdat/ixdat/issues/32) @classmethod def from_dict(cls, obj_as_dict): @@ -168,7 +172,7 @@ def from_dict(cls, obj_as_dict): # obj_as_dict["sample_name"] needs to be renamed obj_as_dict["sample"] before # obj_as_dict can be passed to __init__. # TODO: This is a rather general problem (see, e.g. DataSeries.unit vs - # DataSeries.unit_name) and as such should be moved to db.Savable + # DataSeries.unit_name) and as such should be moved to db.Saveable # see: https://github.com/ixdat/ixdat/pull/5#discussion_r565090372. # Will be fixed with the table definition PR. objects_saved_as_their_name = [ @@ -181,22 +185,36 @@ def from_dict(cls, obj_as_dict): del obj_as_dict[object_name_str] if obj_as_dict["technique"] in TECHNIQUE_CLASSES: + # This makes it so that from_dict() can be used to initiate for any more + # derived technique, so long as obj_as_dict specifies the technique name! technique_class = TECHNIQUE_CLASSES[obj_as_dict["technique"]] + if not issubclass(technique_class, cls): + # But we never want obj_as_dict["technique"] to take us to a *less* + # specific technique, if the user has been intentional about which + # class they call `as_dict` from (e.g. via a Reader)! + technique_class = cls else: + # Normally, we're going to want to make sure that we're in technique_class = cls measurement = technique_class(**obj_as_dict) return measurement @classmethod def read(cls, path_to_file, reader, **kwargs): - """Return a Measurement object from parsing a file with the specified reader""" + """Return a Measurement object from parsing a file with the specified reader + + Args: + path_to_file (Path or str): The path to the file to read + reader (str or Reader class): The (name of the) reader to read the file with. + kwargs: key-word arguments are passed on to the reader's read() method. + """ if isinstance(reader, str): # TODO: see if there isn't a way to put the import at the top of the module. # see: https://github.com/ixdat/ixdat/pull/1#discussion_r546437471 from .readers import READER_CLASSES reader = READER_CLASSES[reader]() - obj = reader.read(path_to_file, **kwargs) # TODO: take cls as kwarg + obj = reader.read(path_to_file, cls=cls, **kwargs) if obj.__class__.essential_series_names: for series_name in obj.__class__.essential_series_names: @@ -209,6 +227,150 @@ def read(cls, path_to_file, reader, **kwargs): ) return obj + @classmethod + def read_url(cls, url, reader, **kwargs): + """Read a url (via a temporary file) using the specified reader""" + from .readers.reading_tools import url_to_file + + path_to_temp_file = url_to_file(url) + measurement = cls.read(path_to_temp_file, reader=reader, **kwargs) + path_to_temp_file.unlink() + return measurement + + @classmethod + def read_set( + cls, path_to_file_start, reader, suffix=None, file_list=None, **kwargs + ): + """Read and append a set of files. + + Args: + path_to_file_start (Path or str): The path to the files to read including + the shared start of the file name: `Path(path_to_file).parent` is + interpreted as the folder where the file are. + `Path(path_to_file).name` is interpreted as the shared start of the files + to be appended. + reader (str or Reader class): The (name of the) reader to read the files with + file_list (list of Path): As an alternative to path_to_file_start, the + exact files to append can be specified in a list + suffix (str): If a suffix is given, only files with the specified ending are + added to the file list + kwargs: Key-word arguments are passed via cls.read() to the reader's read() + method, AND to cls.from_component_measurements() + """ + base_name = None + if not file_list: + folder = Path(path_to_file_start).parent + base_name = Path(path_to_file_start).name + file_list = [f for f in folder.iterdir() if f.name.startswith(base_name)] + if suffix: + file_list = [f for f in file_list if f.suffix == suffix] + + component_measurements = [ + cls.read(f, reader=reader, **kwargs) for f in file_list + ] + + measurement = None + for meas in component_measurements: + measurement = measurement + meas if measurement else meas + return measurement + + @classmethod + def from_component_measurements( + cls, component_measurements, keep_originals=True, sort=True, **kwargs + ): + """Return a measurement with the data contained in the component measurements + + TODO: This function "builds" the resulting measurement, i.e. it appends series + of the same name rather than keeping all the original copies. This should be + made more explicit, and a `build()` method should take over some of the work. + + Args: + component_measurements (list of Measurement) + keep_originals: Whether to keep a list of component_measurements referenced. + This may result in redundant numpy arrays in RAM. + sort (bool): Whether to sort the series according to time + kwargs: key-word arguments are added to the dictionary for cls.from_dict() + + Returns cls: the combined measurement. + """ + + # First prepare everything but the series_list in the object dictionary + obj_as_dict = component_measurements[0].as_dict() + obj_as_dict.update(kwargs) + del obj_as_dict["m_ids"], obj_as_dict["s_ids"] + if keep_originals: + obj_as_dict["component_measurements"] = component_measurements + + # Now, prepare the built series. First, we loop through the component + # measurements and get all the data and metadata organized in a dictionary: + series_as_dicts = {} + tstamp = component_measurements[0].tstamp + for meas in component_measurements: + tstamp_i = meas.tstamp # save this for later. + meas.tstamp = tstamp # so that the time vectors share a t=0 + for s_name in meas.series_names: + series = meas[s_name] + if s_name in series_as_dicts: + series_as_dicts[s_name]["data"] = np.append( + series_as_dicts[s_name]["data"], series.data + ) + else: + series_as_dicts[s_name] = series.as_dict() + series_as_dicts[s_name]["data"] = series.data + if isinstance(series, ValueSeries): + # This will serve to match it to a TimeSeries later: + series_as_dicts[s_name]["t_name"] = series.tseries.name + meas.tstamp = tstamp_i # so it's not changed in the outer scope + + # Now we make DataSeries, starting with all the TimeSeries + tseries_dict = {} + sort_indeces = {} + for name, s_as_dict in series_as_dicts.items(): + if "tstamp" in s_as_dict: + if sort: + sort_indeces[name] = np.argsort(s_as_dict["data"]) + s_as_dict["data"] = s_as_dict["data"][sort_indeces[name]] + tseries_dict[name] = TimeSeries.from_dict(s_as_dict) + # And then ValueSeries, and put both in with the TimeSeries + series_list = [] + for name, s_as_dict in series_as_dicts.items(): + if name in tseries_dict: + series_list.append(tseries_dict[name]) + elif "t_name" in s_as_dict: + tseries = tseries_dict[s_as_dict["t_name"]] + if s_as_dict["data"].shape == tseries.shape: + # Then we assume that the time and value data have lined up + # successfully! :D + if sort: + s_as_dict["data"] = s_as_dict["data"][ + sort_indeces[tseries.name] + ] + vseries = ValueSeries( + name=name, + data=s_as_dict["data"], + unit_name=s_as_dict["unit_name"], + tseries=tseries, + ) + else: + # this will be the case if vseries sharing the same tseries + # are not present in the same subset of component_measurements. + # In that case just append the vseries even though some tdata gets + # duplicated. + vseries = append_series( + [ + s + for m in component_measurements + for s in m.series_list + if s.name == name + ], + sort=sort, + ) + series_list.append(vseries) + + # Finally, add this series to the dictionary representation and return the object + obj_as_dict["series_list"] = series_list + return cls.from_dict(obj_as_dict) + @property def metadata_json_string(self): """Measurement metadata as a JSON-formatted string""" @@ -304,7 +466,7 @@ def time_names(self): @property def value_series(self): - """Set of the VSeries in the measurement's DataSeries""" + """List of the VSeries in the measurement's DataSeries""" return [ series for series in self.series_list if isinstance(series, ValueSeries) ] @@ -322,6 +484,18 @@ def aliases(self): """ return self._aliases.copy() + @property + def reverse_aliases(self): + """{series_name: standard_names} indicating how raw data can be accessed""" + rev_aliases = {} + for name, other_names in self.aliases.items(): + for other_name in other_names: + if other_name in rev_aliases: + rev_aliases[other_name].append(name) + else: + rev_aliases[other_name] = [name] + return rev_aliases + def get_series_names(self, key): """Return list: series names for key found by (recursive) lookup in aliases""" keys = [key] if key in self.series_names else [] @@ -363,7 +537,7 @@ def __getitem__(self, key): >>> ec_meas["raw_potential"] # first lookup, explained below ValueSeries("Ewe/V", ...) >>> ec_meas.calibrate_RE(RE_vs_RHE=0.7) - >>> ec_meas["potential"] # second lookup, explained below + >>> ec_meas["potential"] # second lookup, explained below ValueSeries("U_{RHE} / [V]", ...) - The first lookup, with `key="raw_potential"`, (1) checks for @@ -379,14 +553,15 @@ def __getitem__(self, key): Now we're back in the original lookup, from which __getitem__ (3) caches the data series (which still has the name "Ewe/V") as "raw_potential" and returns it. - - The second lookup, with `key="potential"`, (1) checks for "potential" in the - cache, doesn't find it; then (2A) checks in `series_constructors`, doesn't find - it; and then (2B) asks the calibration for "potential". The calibration knows - that when asked for "potential" it should look for "raw_potential" and add - `RE_vs_RHE`. So it does a lookup with `key="raw_potential"` and (1) finds it - in the cache. The calibration does the math and returns a new data series for - the calibrated potential, bringing us back to the original lookup. The data - series returned by the calibration is then (3) cached and returned to the user. + - The second lookup, with `key="potential"`, (1) checks for "potential" in + the cache, doesn't find it; then (2A) checks in `series_constructors`, + doesn't find it; and then (2B) asks the calibration for "potential". The + calibration knows that when asked for "potential" it should look for + "raw_potential" and add `RE_vs_RHE`. So it does a lookup with + `key="raw_potential"` and (1) finds it in the cache. The calibration does + the math and returns a new data series for the calibrated potential, bringing + us back to the original lookup. The data series returned by the + calibration is then (3) cached and returned to the user. Note that, if the user had not looked up "raw_potential" before looking up "potential", "raw_potential" would not have been in the cache and the first @@ -417,7 +592,7 @@ def get_series(self, key): See more detailed documentation under `__getitem__`, for which this is a helper method. This method (A) looks for a method for `key` in the measurement's `series_constructors`; (B) requests its `calibration` for `key`; and if those - fails appends the data series that either (Ci) are returned by looking up the + fail appends the data series that either (Ci) are returned by looking up the key's `aliases` or (Cii) have `key` as their name; and finally (D) check if the user was using a key with a suffix. @@ -440,16 +615,23 @@ def get_series(self, key): return series # C series_to_append = [] - if key in self.aliases: # i + if key in self.series_names: # ii + # Then we'll append any series matching the desired name + series_to_append += [s for s in self.series_list if s.name == key] + elif key in self.aliases: # i # Then we'll look up the aliases instead and append them for k in self.aliases[key]: + if k == key: # this would result in infinite recursion. + print( # TODO: Real warnings. + "WARNING!!!\n" + f"\t{self} has {key} in its aliases for {key}:\n" + f"\tself.aliases['{key}'] = {self.aliases[key]}" + ) + continue try: series_to_append.append(self[k]) except SeriesNotFoundError: continue - elif key in self.series_names: # ii - # Then we'll append any series matching the desired name - series_to_append += [s for s in self.series_list if s.name == key] # If the key is something in the data, by now we have series to append. if series_to_append: # the following if's are to do as little extra manipulation as possible: @@ -498,24 +680,101 @@ def clear_cache(self): """Clear the cache so derived series are constructed again with updated info""" self._cached_series = {} - def grab(self, item, tspan=None): - """Return the time and value vectors for a given VSeries name cut by tspan""" - series = self[item] - v = series.v - t = series.t - if tspan: - mask = np.logical_and(tspan[0] < t, t < tspan[-1]) + def correct_data(self, value_name, new_data): + """Replace the old data for ´value_name´ (str) with ´new_data` (np array)""" + old_vseries = self[value_name] + new_vseries = ValueSeries( + name=value_name, + unit_name=old_vseries.unit_name, + data=new_data, + tseries=old_vseries.tseries, + ) + self.replace_series(value_name, new_vseries) + + def grab(self, item, tspan=None, include_endpoints=False, tspan_bg=None): + """Return a value vector with the corresponding time vector + + Grab is the *canonical* way to retrieve numerical time-dependent data from a + measurement in ixdat. The first argument is always the name of the value to get + time-resolved data for (the name of a ValueSeries). The second, optional, + argument is a timespan to select the data for. + Two vectors are returned: first time (t), then value (v). They are of the same + length so that `v` can be plotted against `t`, integrated over `t`, interpolated + via `t`, etc. `t` and `v` are returned in the units of their DataSeries. + TODO: option to specifiy desired units + + Typical usage:: + t, v = measurement.grab("potential", tspan=[0, 100]) + + Args: + item (str): The name of the DataSeries to grab data for + TODO: Should this be called "name" or "key" instead? And/or, should + the argument to __getitem__ be called "item" instead of "key"? + tspan (iter of float): Defines the timespan with its first and last values. + Optional. By default the entire time of the measurement is included. + include_endpoints (bool): Whether to add a points at t = tspan[0] and + t = tspan[-1] to the data returned. This makes trapezoidal integration + less dependent on the time resolution. Default is False. + tspan_bg (iterable): Optional. A timespan defining when `item` is at its + baseline level. The average value of `item` in this interval will be + subtracted from the values returned. + """ + vseries = self[item] + tseries = vseries.tseries + v = vseries.data + t = tseries.data + tseries.tstamp - self.tstamp + if tspan is not None: # np arrays don't boolean well :( + if include_endpoints: + if t[0] < tspan[0]: # then add a point to include tspan[0] + v_0 = np.interp(tspan[0], t, v) + t = np.append(tspan[0], t) + v = np.append(v_0, v) + if tspan[-1] < t[-1]: # then add a point to include tspan[-1] + v_end = np.interp(tspan[-1], t, v) + t = np.append(t, tspan[-1]) + v = np.append(v, v_end) + mask = np.logical_and(tspan[0] <= t, t <= tspan[-1]) t, v = t[mask], v[mask] + if tspan_bg: + t_bg, v_bg = self.grab(item, tspan=tspan_bg) + v = v - np.mean(v_bg) return t, v - def grab_for_t(self, item, t): - """Return a numpy array with the value of item interpolated to time t""" - series = self[item] - v_0 = series.v - t_0 = series.t + def grab_for_t(self, item, t, tspan_bg=None): + """Return a numpy array with the value of item interpolated to time t + + Args: + item (str): The name of the value to grab + t (np array): The time vector to grab the value for + tspan_bg (iterable): Optional. A timespan defining when `item` is at its + baseline level. The average value of `item` in this interval will be + subtracted from what is returned. + """ + vseries = self[item] + tseries = vseries.tseries + v_0 = vseries.data + t_0 = tseries.data + tseries.tstamp - self.tstamp v = np.interp(t, t_0, v_0) + if tspan_bg: + t_bg, v_bg = self.grab(item, tspan=tspan_bg) + v = v - np.mean(v_bg) return v + def integrate(self, item, tspan=None, ax=None): + """Return the time integral of item in the specified timespan""" + t, v = self.grab(item, tspan, include_endpoints=True) + if ax: + if ax == "new": + ax = self.plotter.new_ax(ylabel=item) + # FIXME: xlabel=self[item].tseries.name gives a problem :( + ax.plot(t, v, color="k", label=item) + ax.fill_between(t, v, np.zeros(t.shape), where=v > 0, color="g", alpha=0.3) + ax.fill_between( + t, v, np.zeros(t.shape), where=v < 0, color="g", alpha=0.1, hatch="//" + ) + + return np.trapz(v, t) + @property def t(self): return self[self.control_series_name].t @@ -527,7 +786,7 @@ def t_name(self): def _build_file_number_series(self): """Build a `file_number` series based on component measurements times.""" series_to_append = [] - for i, m in enumerate(self.component_measurements): + for i, m in enumerate(self.component_measurements or [self]): if ( self.control_technique_name and not m.technique == self.control_technique_name @@ -787,7 +1046,7 @@ def select_values(self, *args, **kwargs): can be single acceptable values or lists of acceptable values. In the latter case, each acceptable value is selected for on its own and the resulting measurements added together. - # FIXME: That is sloppy because it mutliplies the number of DataSeries + # FIXME: That is sloppy because it multiplies the number of DataSeries containing the same amount of data. Arguments without key-word are considered valid values of the default selector, which is named by `self.selelector_name`. Multiple criteria are @@ -874,7 +1133,7 @@ def __add__(self, other): all the raw series (or their placeholders) are just stored in the lists. """ new_name = self.name + " AND " + other.name - new_technique = self.technique + " AND " + other.technique + new_technique = get_combined_technique(self.technique, other.technique) # TODO: see if there isn't a way to put the import at the top of the module. # see: https://github.com/ixdat/ixdat/pull/1#discussion_r546437410 @@ -904,8 +1163,14 @@ def __add__(self, other): else: new_aliases[key] = other.aliases[key] obj_as_dict = self.as_dict() + other_as_dict = other.as_dict() + for k, v in other_as_dict.items(): + # Looking forward to the "|" operator! + if k not in obj_as_dict: + obj_as_dict[k] = v obj_as_dict.update( name=new_name, + technique=new_technique, series_list=new_series_list, component_measurements=new_component_measurements, calibration_list=new_calibration_list, @@ -917,6 +1182,23 @@ def __add__(self, other): del obj_as_dict["s_ids"] return cls.from_dict(obj_as_dict) + def join(self, other, join_on=None): + """Join two measurements based on a shared data series + + This involves projecting all timeseries from other's data series so that the + variable named by `join_on` is shared between all data series. + This is analogous to an explicit inner join. + + Args: + other (Measurement): a second measurement to join to self + join_on (str or tuple): Either a string, if the value to join on is called + the same thing in both measurements, or a tuple of two strings where + the first is the name of the variable in self and the second in other. + The variable described by join_on must be monotonically increasing in + both measurements. + """ + raise NotImplementedError + class Calibration(Saveable): """Base class for calibrations.""" @@ -928,7 +1210,7 @@ class Calibration(Saveable): "tstamp", } - def __init__(self, name=None, technique=None, tstamp=None, measurement=None): + def __init__(self, *, name=None, technique=None, tstamp=None, measurement=None): """Initiate a Calibration Args: @@ -961,10 +1243,24 @@ def from_dict(cls, obj_as_dict): else: calibration_class = cls try: - measurement = calibration_class(**obj_as_dict) + calibration = calibration_class(**obj_as_dict) except Exception: raise - return measurement + return calibration + + def export(self, path_to_file=None): + """Export an ECMSCalibration as a json-formatted text file""" + path_to_file = path_to_file or (self.name + ".ix") + self_as_dict = self.as_dict() + with open(path_to_file, "w") as f: + json.dump(self_as_dict, f, indent=4) + + @classmethod + def read(cls, path_to_file): + """Read a Calibration from a json-formatted text file""" + with open(path_to_file) as f: + obj_as_dict = json.load(f) + return cls.from_dict(obj_as_dict) def calibrate_series(self, key, measurement=None): """This should be overwritten in real calibration classes. @@ -972,3 +1268,32 @@ def calibrate_series(self, key, measurement=None): FIXME: Add more documentation about how to write this in inheriting classes. """ raise NotImplementedError + + +def get_combined_technique(technique_1, technique_2): + """Return the name of the technique resulting from adding two techniques""" + # TODO: see if there isn't a way to put the import at the top of the module. + # see: https://github.com/ixdat/ixdat/pull/1#discussion_r546437410 + if technique_1 == technique_2: + return technique_1 + + # if we're a component technique of a hyphenated technique to that hyphenated + # technique, the result is still the hyphenated technique. e.g. EC-MS + MS = EC-MS + if "-" in technique_1 and technique_2 in technique_1.split("-"): + return technique_1 + elif "-" in technique_2 and technique_1 in technique_2.split("-"): + return technique_2 + + # if we're adding two independent technique which are components of a hyphenated + # technique, then we want that hyphenated technique. e.g. EC + MS = EC-MS + from .techniques import TECHNIQUE_CLASSES + + for hyphenated in [ + technique_1 + "-" + technique_2, + technique_2 + "-" + technique_1, + ]: + if hyphenated in TECHNIQUE_CLASSES: + return hyphenated + + # if all else fails, we just join them with " and ". e.g. MS + XRD = MS and XRD + return technique_1 + " and " + technique_2 diff --git a/src/ixdat/plotters/base_mpl_plotter.py b/src/ixdat/plotters/base_mpl_plotter.py new file mode 100644 index 00000000..3620bab4 --- /dev/null +++ b/src/ixdat/plotters/base_mpl_plotter.py @@ -0,0 +1,62 @@ +"""Base class for plotters using matplotlib""" + +from matplotlib import pyplot as plt +from matplotlib import gridspec + + +class MPLPlotter: + """Base class for plotters based on matplotlib. Has methods for making mpl axes.""" + + def new_ax(self, xlabel=None, ylabel=None): + """Return a new matplotlib axis optionally with the given x and y labels""" + fig, ax = plt.subplots() + if xlabel: + ax.set_xlabel(xlabel) + if ylabel: + ax.set_ylabel(ylabel) + return ax + + def new_two_panel_axes(self, n_bottom=1, n_top=1, emphasis="top"): + """Return the axes handles for a bottom and top panel. + + TODO: maybe fix order of axes returned. + see https://github.com/ixdat/ixdat/pull/30/files#r811198719 + + Args: + n_top (int): 1 for a single y-axis, 2 for left and right y-axes on top panel + n_bottom (int): 1 for a single y-axis, 2 for left and right y-axes on bottom + emphasis (str or None): "top" for bigger top panel, "bottom" for bigger + bottom panel, None for equal-sized panels + + Returns list of axes: top left, bottom left(, top right, bottom right) + """ + self.new_ax() # necessary to avoid deleting an open figure, I don't know why + if emphasis == "top": + gs = gridspec.GridSpec(5, 1) + # gs.update(hspace=0.025) + axes = [plt.subplot(gs[0:3, 0])] + axes += [plt.subplot(gs[3:5, 0])] + elif emphasis == "bottom": + gs = gridspec.GridSpec(5, 1) + # gs.update(hspace=0.025) + axes = [plt.subplot(gs[0:2, 0])] + axes += [plt.subplot(gs[2:5, 0])] + else: + gs = gridspec.GridSpec(6, 1) + # gs.update(hspace=0.025) + axes = [plt.subplot(gs[0:3, 0])] + axes += [plt.subplot(gs[3:6, 0])] + + axes[0].xaxis.set_label_position("top") + axes[0].tick_params( + axis="x", top=True, bottom=False, labeltop=True, labelbottom=False + ) + + if n_bottom == 2 or n_top == 2: + axes += [None, None] + if n_top == 2: + axes[2] = axes[0].twinx() + if n_bottom == 2: + axes[3] = axes[1].twinx() + + return axes diff --git a/src/ixdat/plotters/ec_plotter.py b/src/ixdat/plotters/ec_plotter.py index 2d7f4efb..c3b9ab68 100644 --- a/src/ixdat/plotters/ec_plotter.py +++ b/src/ixdat/plotters/ec_plotter.py @@ -1,9 +1,11 @@ +"""Plotter for Electrochemistry""" + import numpy as np -from matplotlib import pyplot as plt +from .base_mpl_plotter import MPLPlotter from .plotting_tools import color_axis -class ECPlotter: +class ECPlotter(MPLPlotter): """A matplotlib plotter specialized in electrochemistry measurements.""" def __init__(self, measurement=None): @@ -61,33 +63,17 @@ def plot_measurement( "DEPRECIATION WARNING! V_str has been renamed v_name and J_str has " "been renamed j_name. Get it right next time." ) - v_name = ( - v_name - or V_str - or ( - measurement.v_name - if measurement.RE_vs_RHE is not None - else measurement.E_name - ) - ) + v_name = v_name or V_str or measurement.v_name # FIXME: We need a better solution for V_str and J_str that involves the # Calibration and is generalizable. see: # https://github.com/ixdat/ixdat/pull/11#discussion_r679290123 - j_name = ( - j_name - or J_str - or ( - measurement.j_name - if measurement.A_el is not None - else measurement.I_name - ) - ) + j_name = j_name or J_str or measurement.j_name t_v, v = measurement.grab(v_name, tspan=tspan) t_j, j = measurement.grab(j_name, tspan=tspan) if axes: ax1, ax2 = axes else: - fig, ax1 = plt.subplots() + ax1 = self.new_ax() ax2 = ax1.twinx() axes = [ax1, ax2] ax1.plot(t_v, v, "-", color=v_color, label=v_name, **plot_kwargs) @@ -113,9 +99,11 @@ def plot_vs_potential( This can actually plot with anything on the x-axis, by specifying what you want on the x-axis using V_str. The y-axis variable, which can be specified by J_str, is interpolated onto the time corresponding to the x-axis variable. - TODO: This is a special case of the not-yet-implemented generalized - `plot_vs`. Consider an inheritance structure to reduce redundancy in - future plotters. + .. TODO:: + This is a special case of the not-yet-implemented generalized + `plot_vs`. Consider an inheritance structure to reduce redundancy in + future plotters. + sub-TODO: hide or fix TODO's using sphix boxes. All arguments are optional. By default it will plot current vs potential in black on a single axis for the whole experiment. TODO: color gradient (cmap=inferno) from first to last cycle. @@ -137,6 +125,7 @@ def plot_vs_potential( Returns matplotlib.pyplot.axis: The axis plotted on. """ + measurement = measurement or self.measurement v_name = v_name or ( measurement.v_name @@ -151,7 +140,7 @@ def plot_vs_potential( j_v = np.interp(t_v, t_j, j) if not ax: - fig, ax = plt.subplots() + ax = self.new_ax() if "color" not in plot_kwargs: plot_kwargs["color"] = "k" @@ -159,3 +148,82 @@ def plot_vs_potential( ax.set_xlabel(v_name) ax.set_ylabel(j_name) return ax + + +class CVDiffPlotter(MPLPlotter): + """A matplotlib plotter for highlighting the difference between two cv's.""" + + def __init__(self, measurement=None): + """Initiate the ECPlotter with its default CyclicVoltammagramDiff to plot""" + self.measurement = measurement + + def plot(self, measurement=None, ax=None): + """Plot the two cycles of the CVDiff measurement and fill in the areas between + + example: https://ixdat.readthedocs.io/en/latest/_images/cv_diff.svg + """ + measurement = measurement or self.measurement + # FIXME: This is probably the wrong use of plotter functions. + # see https://github.com/ixdat/ixdat/pull/30/files#r810926968 + ax = ECPlotter.plot_vs_potential( + self, measurement=measurement.cv_compare_1, axes=ax, color="g" + ) + ax = ECPlotter.plot_vs_potential( + self, measurement=measurement.cv_compare_2, ax=ax, color="k", linestyle="--" + ) + t1, v1 = measurement.cv_compare_1.grab("potential") + j1 = measurement.cv_compare_1.grab_for_t("current", t=t1) + j_diff = measurement.grab_for_t("current", t=t1) + # a mask which is true when cv_1 had bigger current than cv_2: + v_scan = measurement.scan_rate.data + mask = np.logical_xor(0 < j_diff, v_scan < 0) + + ax.fill_between(v1, j1 - j_diff, j1, where=mask, alpha=0.2, color="g") + ax.fill_between( + v1, + j1 - j_diff, + j1, + where=np.logical_not(mask), + alpha=0.1, + hatch="//", + color="g", + ) + + return ax + + def plot_measurement(self, measurement=None, axes=None, **kwargs): + """Plot the difference between the two cv's vs time""" + measurement = measurement or self.measurement + # FIXME: not correct useage of + return ECPlotter.plot_measurement( + self, measurement=measurement, axes=axes, **kwargs + ) + + def plot_diff(self, measurement=None, tspan=None, ax=None): + """Plot the difference between the two cv's vs potential. + + The trace is solid where the current in cv_2 is greater than cv_1 in the anodic + scan or the current cv_2 is more negative than cv_1 in the cathodic scan. + """ + measurement = measurement or self.measurement + t, v = measurement.grab("potential", tspan=tspan, include_endpoints=False) + j_diff = measurement.grab_for_t("current", t) + v_scan = measurement.scan_rate.data + # a mask which is true when cv_1 had bigger current than cv_2: + mask = np.logical_xor(0 < j_diff, v_scan < 0) + + if not ax: + ax = self.new_ax() + + ax.plot(v[mask], j_diff[mask], "k-", label="cv1 > cv2") + ax.plot( + v[np.logical_not(mask)], + j_diff[np.logical_not(mask)], + "k--", + label="cv1 < cv2", + ) + return ax + + def plot_vs_potential(self): + """FIXME: This is needed to satisfy ECMeasurement.__init__""" + pass diff --git a/src/ixdat/plotters/ecms_plotter.py b/src/ixdat/plotters/ecms_plotter.py new file mode 100644 index 00000000..a71ada25 --- /dev/null +++ b/src/ixdat/plotters/ecms_plotter.py @@ -0,0 +1,228 @@ +from .base_mpl_plotter import MPLPlotter +from .ec_plotter import ECPlotter +from .ms_plotter import MSPlotter + + +class ECMSPlotter(MPLPlotter): + """A matplotlib plotter for EC-MS measurements.""" + + def __init__(self, measurement=None): + """Initiate the ECMSPlotter with its default Measurement to plot""" + self.measurement = measurement + self.ec_plotter = ECPlotter(measurement=measurement) + self.ms_plotter = MSPlotter(measurement=measurement) + + def plot_measurement( + self, + *, + measurement=None, + axes=None, + mass_list=None, + mass_lists=None, + mol_list=None, + mol_lists=None, + tspan=None, + tspan_bg=None, + remove_background=None, + unit=None, + v_name=None, # TODO: Depreciate, replace with v_name, j_name + j_name=None, + v_color="k", + j_color="r", # TODO: Depreciate, replace with v_name, j_name + logplot=None, + legend=True, + emphasis="top", + **kwargs, + ): + """Make an EC-MS plot vs time and return the axis handles. + + Allocates tasks to ECPlotter.plot_measurement() and MSPlotter.plot_measurement() + + Args: + measurement (ECMSMeasurement): Defaults to the measurement to which the + plotter is bound (self.measurement) + axes (list of three matplotlib axes): axes[0] plots the MID data, + axes[1] the variable given by V_str (potential), and axes[2] the + variable given by J_str (current). By default three axes are made with + axes[0] a top panel with 3/5 the area, and axes[1] and axes[2] are + the left and right y-axes of the lower panel with 2/5 the area. + mass_list (list of str): The names of the m/z values, eg. ["M2", ...] to + plot. Defaults to all of them (measurement.mass_list) + mass_lists (list of list of str): Alternately, two lists can be given for + masses in which case one list is plotted on the left y-axis and the other + on the right y-axis of the top panel. + mol_list (list of str): The names of the molecules, eg. ["H2", ...] to + plot. Defaults to all of them (measurement.mass_list) + mol_lists (list of list of str): Alternately, two lists can be given for + molecules in which case one list is plotted on the left y-axis and the + other on the right y-axis of the top panel. + tspan (iter of float): The time interval to plot, wrt measurement.tstamp + tspan_bg (timespan): A timespan for which to assume the signal is at its + background. The average signals during this timespan are subtracted. + If `mass_lists` are given rather than a single `mass_list`, `tspan_bg` + must also be two timespans - one for each axis. Default is `None` for no + background subtraction. + remove_background (bool): Whether otherwise to subtract pre-determined + background signals if available. Defaults to (not logplot) + unit (str): the unit for the MS data. Defaults to "A" for Ampere + v_name (str): The name of the value to plot on the lower left y-axis. + Defaults to the name of the series `measurement.potential` + j_name (str): The name of the value to plot on the lower right y-axis. + Defaults to the name of the series `measurement.current` + v_color (str): The color to plot the variable given by 'V_str' + j_color (str): The color to plot the variable given by 'J_str' + logplot (bool): Whether to plot the MS data on a log scale (default True + unless mass_lists are given) + legend (bool): Whether to use a legend for the MS data (default True) + emphasis (str or None): "top" for bigger top panel, "bottom" for bigger + bottom panel, None for equal-sized panels + kwargs (dict): Additional kwargs go to all calls of matplotlib's plot() + + Returns: + list of Axes: (top_left, bottom_left, top_right, bottom_right) where: + axes[0] is top_left is MS data; + axes[1] is bottom_left is potential; + axes[2] is top_right is additional MS data if left and right mass_lists + or mol_lists were plotted (otherwise axes[2] is None); and + axes[3] is bottom_right is current. + """ + measurement = measurement or self.measurement + + logplot = (not mass_lists) if logplot is None else logplot + + if not axes: + axes = self.new_two_panel_axes( + n_bottom=2, + n_top=(2 if (mass_lists or mol_lists) else 1), + emphasis=emphasis, + ) + + if not tspan: + if hasattr(measurement, "potential") and measurement.potential: + t, _ = measurement.grab("potential") + tspan = [t[0], t[-1]] + else: + tspan = measurement.tspan + + if hasattr(measurement, "potential") and measurement.potential: + # then we have EC data! + self.ec_plotter.plot_measurement( + measurement=measurement, + axes=[axes[1], axes[3]], + tspan=tspan, + v_name=v_name, + j_name=j_name, + v_color=v_color, + j_color=j_color, + **kwargs, + ) + if ( + mass_list + or mass_lists + or mol_list + or mol_lists + or hasattr(measurement, "mass_list") + ): + # then we have MS data! + self.ms_plotter.plot_measurement( + measurement=measurement, + ax=axes[0], + axes=[axes[0], axes[2]] if (mass_lists or mol_lists) else axes[0], + tspan=tspan, + tspan_bg=tspan_bg, + removebackground=remove_background, + mass_list=mass_list, + mass_lists=mass_lists, + mol_list=mol_list, + mol_lists=mol_lists, + unit=unit, + logplot=logplot, + legend=legend, + **kwargs, + ) + axes[1].set_xlim(axes[0].get_xlim()) + return axes + + def plot_vs_potential( + self, + *, + measurement=None, + axes=None, + mass_list=None, + mass_lists=None, + mol_list=None, + mol_lists=None, + tspan=None, + tspan_bg=None, + remove_background=None, + unit=None, + logplot=False, + legend=True, + emphasis="top", + **kwargs, + ): + """Make an EC-MS plot vs time and return the axis handles. + + Allocates tasks to ECPlotter.plot_measurement() and MSPlotter.plot_measurement() + + Args: + measurement (ECMSMeasurement): Defaults to the measurement to which the + plotter is bound (self.measurement) + axes (list of three matplotlib axes): axes[0] plots the MID data, + axes[1] the current vs potential. By default three axes are made with + axes[0] a top panel with 3/5 the area. + mass_list (list of str): The names of the m/z values, eg. ["M2", ...] to + plot. Defaults to all of them (measurement.mass_list) + mass_lists (list of list of str): Alternately, two lists can be given for + masses in which case one list is plotted on the left y-axis and the other + on the right y-axis of the top panel. + mol_list (list of str): The names of the molecules, eg. ["H2", ...] to + plot. Defaults to all of them (measurement.mass_list) + mol_lists (list of list of str): Alternately, two lists can be given for + molecules in which case one list is plotted on the left y-axis and the + other on the right y-axis of the top panel. + tspan (iter of float): The time interval to plot, wrt measurement.tstamp + tspan_bg (timespan): A timespan for which to assume the signal is at its + background. The average signals during this timespan are subtracted. + If `mass_lists` are given rather than a single `mass_list`, `tspan_bg` + must also be two timespans - one for each axis. Default is `None` for no + background subtraction. + remove_background (bool): Whether otherwise to subtract pre-determined + background signals if available. Defaults to (not logplot) + unit (str): the unit for the MS data. Defaults to "A" for Ampere + logplot (bool): Whether to plot the MS data on a log scale (default False) + legend (bool): Whether to use a legend for the MS data (default True) + emphasis (str or None): "top" for bigger top panel, "bottom" for bigger + bottom panel, None for equal-sized panels + kwargs (dict): Additional kwargs go to all calls of matplotlib's plot() + """ + + if not axes: + axes = self.new_two_panel_axes( + n_bottom=1, + n_top=(2 if (mass_lists or mol_lists) else 1), + emphasis=emphasis, + ) + + self.ec_plotter.plot_vs_potential( + measurement=measurement, tspan=tspan, ax=axes[1], **kwargs + ) + self.ms_plotter.plot_vs( + x_name="potential", + measurement=measurement, + ax=axes[0], + axes=[axes[0], axes[2]] if (mass_lists or mol_lists) else axes[0], + tspan=tspan, + tspan_bg=tspan_bg, + removebackground=remove_background, + mass_list=mass_list, + mass_lists=mass_lists, + mol_list=mol_list, + mol_lists=mol_lists, + unit=unit, + logplot=logplot, + legend=legend, + **kwargs, + ) + axes[1].set_xlim(axes[0].get_xlim()) + return axes diff --git a/src/ixdat/plotters/ms_plotter.py b/src/ixdat/plotters/ms_plotter.py new file mode 100644 index 00000000..bed7bfea --- /dev/null +++ b/src/ixdat/plotters/ms_plotter.py @@ -0,0 +1,452 @@ +"""Plotter for Mass Spectrometry""" + +import numpy as np +from .base_mpl_plotter import MPLPlotter + + +class MSPlotter(MPLPlotter): + """A matplotlib plotter specialized in mass spectrometry MID measurements.""" + + def __init__(self, measurement=None): + """Initiate the ECMSPlotter with its default Meausurement to plot""" + self.measurement = measurement + + def plot_measurement( + self, + *, + measurement=None, + ax=None, + axes=None, + mass_list=None, + mass_lists=None, + mol_list=None, + mol_lists=None, + tspan=None, + tspan_bg=None, + removebackground=None, + unit=None, + logplot=True, + legend=True, + **kwargs, + ): + """Plot m/z signal vs time (MID) data and return the axis. + + There are four ways to specify what to plot. Only specify one of these:: + mass_list: Uncalibrated signals in [(u/n/p)A] on on axis + mass_lists: Uncalibrated signals in [(u/n/p)A] on two axes + mol_list: Calibrated signals in [(u/n/p)mol/s] on on axis + mol_lists: Calibrated signals in [(u/n/p)mol/s] on two axes + + Two axes refers to separate left and right y-axes. Default is to use all + available masses as mass_list. + + Args: + measurement (MSMeasurement): Defaults to the one that initiated the plotter + ax (matplotlib axis): Defaults to a new axis + axes (list of matplotlib axis): Left and right y-axes if mass_lists are given + mass_list (list of str): The names of the m/z values, eg. ["M2", ...] to + plot. Defaults to all of them (measurement.mass_list) + mass_lists (list of list of str): Alternately, two lists can be given for + masses in which case one list is plotted on the left y-axis and the other + on the right y-axis of the top panel. + mol_list (list of str): The names of the molecules, eg. ["H2", ...] to + plot. Defaults to all of them (measurement.mass_list) + mol_lists (list of list of str): Alternately, two lists can be given for + molecules in which case one list is plotted on the left y-axis and the + other on the right y-axis of the top panel. + tspan (iter of float): The time interval to plot, wrt measurement.tstamp + tspan_bg (timespan): A timespan for which to assume the signal is at its + background. The average signals during this timespan are subtracted. + If `mass_lists` are given rather than a single `mass_list`, `tspan_bg` + must also be two timespans - one for each axis. Default is `None` for no + background subtraction. + removebackground (bool): Whether otherwise to subtract pre-determined + background signals if available. Defaults to (not logplot) + unit (str): defaults to "A" or "mol/s" + logplot (bool): Whether to plot the MS data on a log scale (default True) + legend (bool): Whether to use a legend for the MS data (default True) + kwargs: extra key-word args are passed on to matplotlib's plot() + """ + measurement = measurement or self.measurement + if removebackground is None: + removebackground = not logplot + + # Figure out, based on the inputs, whether or not to plot calibrated results + # (`quantified`), specifications for the axis to plot on now (`specs_this_axis`) + # and specifications for the next axis to plot on, if any (`specs_next_axis`): + quantified, specs_this_axis, specs_next_axis = self._parse_overloaded_inputs( + mass_list, + mass_lists, + mol_list, + mol_lists, + unit, + tspan_bg, + ax, + axes, + measurement, + ) + ax = specs_this_axis["ax"] + v_list = specs_this_axis["v_list"] + tspan_bg = specs_this_axis["tspan_bg"] + unit = specs_this_axis["unit"] + unit_factor = specs_this_axis["unit_factor"] + for v_or_v_name in v_list: + if isinstance(v_or_v_name, str): + v_name = v_or_v_name + color = STANDARD_COLORS.get(v_name, "k") + else: + v_name = v_or_v_name.name + color = v_or_v_name.color + if quantified: + t, v = measurement.grab_flux( + v_or_v_name, + tspan=tspan, + tspan_bg=tspan_bg, + removebackground=removebackground, + include_endpoints=False, + ) + else: + t, v = measurement.grab_signal( + v_or_v_name, + tspan=tspan, + tspan_bg=tspan_bg, + removebackground=removebackground, + include_endpoints=False, + ) + if logplot: + v[v < MIN_SIGNAL] = MIN_SIGNAL + ax.plot( + t, + v * unit_factor, + color=color, + label=v_name, + **kwargs, + ) + ax.set_ylabel(f"signal / [{unit}]") + ax.set_xlabel("time / [s]") + if specs_next_axis: + self.plot_measurement( + measurement=measurement, + ax=specs_next_axis["ax"], + mass_list=specs_next_axis["mass_list"], + mol_list=specs_next_axis["mol_list"], + unit=specs_next_axis["unit"], + tspan=tspan, + tspan_bg=specs_next_axis["tspan_bg"], + logplot=logplot, + legend=legend, + **kwargs, + ) + axes = [ax, specs_next_axis["ax"]] + else: + axes = None + + if logplot: + ax.set_yscale("log") + if legend: + ax.legend() + + return axes if axes else ax + + def plot_vs( + self, + *, + x_name, + measurement=None, + ax=None, + axes=None, + mass_list=None, + mass_lists=None, + mol_list=None, + mol_lists=None, + tspan=None, + tspan_bg=None, + removebackground=None, + unit=None, + logplot=True, + legend=True, + **kwargs, + ): + """Plot m/z signal (MID) data against a specified variable and return the axis. + + There are four ways to specify what to plot. Only specify one of these:: + mass_list: Uncalibrated signals in [(u/n/p)A] on on axis + mass_lists: Uncalibrated signals in [(u/n/p)A] on two axes + mol_list: Calibrated signals in [(u/n/p)mol/s] on on axis + mol_lists: Calibrated signals in [(u/n/p)mol/s] on two axes + + Two axes refers to seperate left and right y-axes. Default is to use all + available masses as mass_list. + + Args: + x_name (str): Name of the variable to plot on the x-axis + measurement (MSMeasurement): Defaults to the one that initiated the plotter + ax (matplotlib axis): Defaults to a new axis + axes (list of matplotlib axis): Left and right y-axes if mass_lists are given + mass_list (list of str): The names of the m/z values, eg. ["M2", ...] to + plot. Defaults to all of them (measurement.mass_list) + mass_lists (list of list of str): Alternately, two lists can be given for + masses in which case one list is plotted on the left y-axis and the other + on the right y-axis of the top panel. + mol_list (list of str): The names of the molecules, eg. ["H2", ...] to + plot. Defaults to all of them (measurement.mass_list) + mol_lists (list of list of str): Alternately, two lists can be given for + molecules in which case one list is plotted on the left y-axis and the + other on the right y-axis of the top panel. + tspan (iter of float): The time interval to plot, wrt measurement.tstamp + tspan_bg (timespan): A timespan for which to assume the signal is at its + background. The average signals during this timespan are subtracted. + If `mass_lists` are given rather than a single `mass_list`, `tspan_bg` + must also be two timespans - one for each axis. Default is `None` for no + background subtraction. + removebackground (bool): Whether otherwise to subtract pre-determined + background signals if available + logplot (bool): Whether to plot the MS data on a log scale (default True) + legend (bool): Whether to use a legend for the MS data (default True) + kwargs: key-word args are passed on to matplotlib's plot() + """ + measurement = measurement or self.measurement + if removebackground is None: + removebackground = not logplot + + # The overloaded inputs are a pain in the ass. This function helps: + quantified, specs_this_axis, specs_next_axis = self._parse_overloaded_inputs( + mass_list, + mass_lists, + mol_list, + mol_lists, + unit, + tspan_bg, + ax, + axes, + measurement, + ) + ax = specs_this_axis["ax"] + v_list = specs_this_axis["v_list"] + tspan_bg = specs_this_axis["tspan_bg"] + unit = specs_this_axis["unit"] + unit_factor = specs_this_axis["unit_factor"] + + t, x = measurement.grab(x_name, tspan=tspan, include_endpoints=True) + for v_name in v_list: + if quantified: + t_v, v = measurement.grab_flux( + v_name, + tspan=tspan, + tspan_bg=tspan_bg, + removebackground=removebackground, + include_endpoints=False, + ) + else: + t_v, v = measurement.grab_signal( + v_name, + tspan=tspan, + tspan_bg=tspan_bg, + removebackground=removebackground, + include_endpoints=False, + ) + if logplot: + v[v < MIN_SIGNAL] = MIN_SIGNAL + x_mass = np.interp(t_v, t, x) + ax.plot( + x_mass, + v * unit_factor, + color=STANDARD_COLORS.get(v_name, "k"), + label=v_name, + **kwargs, + ) + ax.set_ylabel(f"signal / [{unit}]") + ax.set_xlabel(x_name) + if specs_next_axis: + self.plot_vs( + x_name=x_name, + measurement=measurement, + ax=specs_next_axis["ax"], + mass_list=specs_next_axis["mass_list"], + mol_list=specs_next_axis["mol_list"], + unit=specs_next_axis["unit"], + tspan=tspan, + tspan_bg=specs_next_axis["tspan_bg"], + logplot=logplot, + legend=legend, + **kwargs, + ) + axes = [ax, specs_next_axis["ax"]] + else: + axes = None + + if logplot: + ax.set_yscale("log") + if legend: + ax.legend() + + return axes if axes else ax + + def _parse_overloaded_inputs( + self, + mass_list, + mass_lists, + mol_list, + mol_lists, + unit, + tspan_bg, + ax, + axes, + measurement, + ): + """From the overloaded function inputs, figure out what the user wants to do. + + This includes: + 1. determine if we're doing quantifed results (mols) or raw (masses) + 2. figure out if there's one or two axes (remaining) and what goes on them. + 3. figure out what to multiply numbers by when plotting to match the unit. + """ + # TODO: Maybe there's a way to do this function as a decorator? + # So this function is overloaded in the sense that the user can give + # exactly one of mol_list, mol_lists, mass_list, mass_lists. + # To manage that complexity, first we reduce it to two options, that down to + # either v_list or v_lists and a boolean "quantified": + quantified = False # default, if they give nothing + v_lists = None # default, if they give nothing + v_list = measurement.mass_list # default, if they give nothing + if mol_list: + quantified = True + v_list = mol_list + elif mol_lists: + quantified = True + v_lists = mol_lists + elif mass_list: + quantified = False + v_list = mass_list + elif mass_lists: + quantified = False + v_lists = mass_lists + + # as the next simplification, if they give two things (v_lists), we pretend we + # got one (v_list) but prepare an axis for a recursive call of this function. + if v_lists: + axes = axes or [ax, ax.twinx()] # prepare an axis unless we were given two. + ax_right = axes[-1] + ax = axes[0] + v_list = v_lists[0] + v_list_right = v_lists[1] + # ah, and to enable different background tspans for the two axes: + try: + tspan_bg_right = tspan_bg[1] + if isinstance(tspan_bg_right, (float, int)): + raise TypeError + except (KeyError, TypeError): + tspan_bg_right = None + else: + tspan_bg = tspan_bg[0] + if isinstance(unit, str) or not unit: + unit_right = unit + else: + unit_right = unit[1] + unit = unit[0] + specs_next_axis = { + "ax": ax_right, + "unit": unit_right, + "mass_list": None if quantified else v_list_right, + "mol_list": v_list_right if quantified else None, + "tspan_bg": tspan_bg_right, + } + else: + specs_next_axis = None + + if quantified: + unit = unit or "mol/s" + unit_factor = { + "pmol/s": 1e12, + "nmol/s": 1e9, + "umol/s": 1e6, + "mol/s": 1, # noqa + "pmol/s/cm^2": 1e12, + "nmol/s/cm^2": 1e9, + "umol/s/cm^2": 1e6, + "mol/s/cm^2": 1, # noqa + }[unit] + if "/cm^2" in unit: + unit_factor = unit_factor / measurement.A_el + else: + unit = unit or "A" + unit_factor = {"pA": 1e12, "nA": 1e9, "uA": 1e6, "A": 1}[unit] + # TODO: Real units with a unit module! This should even be able to figure out the + # unit prefix to put stuff in a nice 1-to-1e3 range + + if not ax: + ax = ( + axes[0] + if axes + else self.new_ax(ylabel=f"signal / [{unit}]", xlabel="time / [s]") + ) + specs_this_axis = { + "ax": ax, + "v_list": v_list, + "unit": unit, + "unit_factor": unit_factor, + "tspan_bg": tspan_bg, + } + + return quantified, specs_this_axis, specs_next_axis + + +# ----- These are the standard colors for EC-MS plots! ------- # + +MIN_SIGNAL = 1e-14 # So that the bottom half of the plot isn't wasted on log(noise) +# TODO: This should probably be customizeable from a settings file. + +STANDARD_COLORS = { + "M2": "b", + "M4": "m", + "M18": "y", + "M28": "0.5", + "M32": "k", + "M40": "c", + "M44": "brown", + "M15": "r", + "M26": "g", + "M27": "limegreen", + "M30": "darkorange", + "M31": "yellowgreen", + "M43": "tan", + "M45": "darkgreen", + "M34": "r", + "M36": "g", + "M46": "purple", + "M48": "darkslategray", + "M20": "slateblue", + "M16": "steelblue", + "M19": "teal", + "M17": "chocolate", + "M41": "#FF2E2E", + "M42": "olive", + "M29": "#001146", + "M70": "purple", + "M3": "orange", + "M73": "crimson", + "M74": "r", + "M60": "g", + "M58": "darkcyan", + "M88": "darkred", + "M89": "darkmagenta", + "M130": "purple", + "M132": "purple", + # and now, molecules: + "H2": "b", + "He": "m", + "H2O": "y", + "CO": "0.5", + "N2": "0.5", + "O2": "k", + "Ar": "c", + "CO2": "brown", + "CH4": "r", + "C2H4": "g", + "O2_M32": "k", + "O2_M34": "r", + "O2_M36": "g", + "CO2_M44": "brown", + "CO2_M46": "purple", + "CO2_M48": "darkslategray", +} diff --git a/src/ixdat/plotters/sec_plotter.py b/src/ixdat/plotters/sec_plotter.py new file mode 100644 index 00000000..75521d79 --- /dev/null +++ b/src/ixdat/plotters/sec_plotter.py @@ -0,0 +1,297 @@ +"""Plotters for spectroelectrochemistry. Makes use of those in spectrum_plotter.py""" + +import matplotlib as mpl + +from .base_mpl_plotter import MPLPlotter +from .ec_plotter import ECPlotter +from .spectrum_plotter import SpectrumSeriesPlotter +from ..exceptions import SeriesNotFoundError + + +class SECPlotter(MPLPlotter): + """An spectroelectrochemsitry (SEC) matplotlib plotter. + + FIXME: This should make use of the code in spectrum_plotter.SpectrumSeriesPlotter + """ + + def __init__(self, measurement=None): + """Initiate the plotter with its default Meausurement to plot""" + self.measurement = measurement + self.ec_plotter = ECPlotter(measurement=measurement) + self.spectrum_series_plotter = SpectrumSeriesPlotter( + spectrum_series=self.measurement + # FIXME: Maybe SpectrumSeries should inherit from Measurement? + ) + + def plot_measurement( + self, + measurement=None, + tspan=None, + wlspan=None, + axes=None, + V_ref=None, + t_ref=None, + cmap_name="inferno", + make_colorbar=False, + **kwargs, + ): + """Plot an SECMeasurement in two panels with time as x-asis. + + The top panel is a heat plot with wavelength on y-axis and color representing + spectrum. At most one of V_ref and t_ref should be given, and if neither are + given the measurement's default reference_spectrum is used to calculate the + optical density. + + Args: + measurement (Measurement): The measurement to be plotted, if different from + self.measurement + tspan (timespan): The timespan of data to keep for the measurement. + wlspan (iterable): The wavelength span of spectral data to plot + axes (list of mpl.Axis): The axes to plot on. axes[0] is for the heat + plot, axes[1] for potential, and axes[2] for current. The axes are + optional and a new set of axes, where axes[1] and axes[2] are twinned on + x, are generated if not provided. + V_ref (float): Potential to use as reference for calculating optical density + t_ref (float): Time to use as a reference for calculating optical density + cmap_name (str): The name of the colormap to use. Defaults to "inferno", + which ranges from black through red and orange to yellow-white. "jet" + is also good. + make_colorbar (bool): Whether to make a colorbar. + FIXME: colorbar at present mis-alignes axes + kwargs: Additional key-word arguments are passed on to + ECPlotter.plot_measurement(). + """ + measurement = measurement or self.measurement + + if not axes: + axes = self.new_two_panel_axes( + n_bottom=2, + n_top=1, + emphasis="top", + ) + self.ec_plotter.plot_measurement( + measurement=measurement, + axes=[axes[1], axes[2]], + tspan=tspan, + **kwargs, + ) + + dOD_series = measurement.calc_dOD(V_ref=V_ref, t_ref=t_ref) + axes[0] = self.spectrum_series_plotter.heat_plot( + field=dOD_series, + tspan=tspan, + xspan=wlspan, + ax=axes[0], + cmap_name=cmap_name, + make_colorbar=make_colorbar, + ) + if make_colorbar: + pass # TODO: adjust EC plot to be same width as heat plot despite colorbar. + + axes[1].set_xlim(axes[0].get_xlim()) + + return axes + + def plot_waterfall( + self, + measurement=None, + ax=None, + V_ref=None, + t_ref=None, + cmap_name="jet", + make_colorbar=True, + ): + """Plot an SECMeasurement as spectra colored based on potential. + + The top panel is a heat plot with wavelength on y-axis and color representing + spectrum. At most one of V_ref and t_ref should be given, and if neither are + given the measurement's default reference_spectrum is used to calculate the + optical density. + + This uses SpectrumSeriesPlotter.plot_waterfall() + + Args: + measurement (Measurement): The measurement to be plotted, if different from + self.measurement + tspan (timespan): The timespan of data to keep for the measurement. + wlspan (iterable): The wavelength span of spectral data to plot + ax (matplotlib Axis): The axes to plot on. A new one is made by default. + V_ref (float): potential to use as reference for calculating optical density + t_ref (float): time to use as a reference for calculating optical density + cmap_name (str): The name of the colormap to use. Defaults to "inferno", + which ranges from black through red and orange to yellow-white. "jet" + is also good. + make_colorbar (bool): Whether to make a colorbar. + """ + measurement = measurement or self.measurement + dOD = measurement.calc_dOD(V_ref=V_ref, t_ref=t_ref) + + return self.spectrum_series_plotter.plot_waterfall( + field=dOD, + cmap_name=cmap_name, + make_colorbar=make_colorbar, + ax=ax, + vs=measurement.v_name, + ) + + def plot_vs_potential( + self, + measurement=None, + tspan=None, + vspan=None, + v_name=None, + j_name=None, + axes=None, + wlspan=None, + V_ref=None, + cmap_name="inferno", + make_colorbar=False, + **kwargs, + ): + """Plot an SECMeasurement in two panels with potential as x-asis. + + The top panel is a heat plot with wavelength on y-axis and color representing + spectrum. At most one of V_ref and t_ref should be given, and if neither are + given the measurement's default reference_spectrum is used to calculate the + optical density. + + Args: + measurement (Measurement): The measurement to be plotted, if different from + self.measurement + tspan (timespan): The timespan of data to keep for the measurement. + vspan (timespan): The potential span of data to keep for the measurement. + v_name (str): Optional. The name of the data series to use as potential. + j_name (str): Optional. The name of the data series to use as current. + wlspan (iterable): The wavelength span of spectral data to plot + axes (list of numpy Axes): The axes to plot on. axes[0] is for the heat + plot and axes[1] for potential. New are made by default. + V_ref (float): potential to use as reference for calculating optical density + t_ref (float): time to use as a reference for calculating optical density + cmap_name (str): The name of the colormap to use. Defaults to "inferno", + which ranges from black through red and orange to yellow-white. "jet" + is also good. + make_colorbar (bool): Whether to make a colorbar. + kwargs: Additional key-word arguments are passed on to + ECPlotter.plot_vs_potential(). + """ + measurement = measurement or self.measurement + + if not axes: + axes = self.new_two_panel_axes( + n_bottom=1, + n_top=1, + emphasis="top", + ) + + self.ec_plotter.plot_vs_potential( + measurement=measurement, + tspan=tspan, + v_name=v_name, + j_name=j_name, + ax=axes[1], + **kwargs, + ) + + dOD_series = measurement.calc_dOD(V_ref=V_ref) + axes[0] = self.spectrum_series_plotter.heat_plot_vs( + field=dOD_series, + vspan=vspan, + xspan=wlspan, + ax=axes[0], + cmap_name=cmap_name, + make_colorbar=make_colorbar, + vs=v_name or measurement.v_name, + ) + axes[1].set_xlim(axes[0].get_xlim()) + return axes + + def plot_wavelengths( + self, + measurement=None, + wavelengths=None, + axes=None, + cmap_name="jet", + tspan=None, + **kwargs, + ): + """Plot the dO.D. for specific wavelength in the top panel and EC in bottom + + Args: + measurement (Measurement): The measurement to be plotted, if different from + self.measurement + wavelengths (list of str): The names of the wavelengths to track as strings, + e.g. "w400" for 400 nm + axes (list of Ax): The axes to plot on, defaults to new matplotlib axes + cmap_name (str): Name of the colormap. Defaults to "jet" + tspan (timespan): The timespan to plot + **kwargs: Additional key-word arguments are passed on to + ECPlotter.plot_measurement + """ + measurement = measurement or self.measurement + wavelengths = wavelengths or measurement.tracked_wavelengths + + cmap = mpl.cm.get_cmap(cmap_name) + norm = mpl.colors.Normalize(vmin=min(measurement.wl), vmax=max(measurement.wl)) + + if not axes: + axes = self.new_two_panel_axes(n_bottom=2) + for wl_str in wavelengths: + x = float(wl_str[1:]) + try: + t, y = measurement.grab(wl_str, tspan=tspan) + except SeriesNotFoundError: + measurement.track_wavelength(x) + t, y = measurement.grab(wl_str, tspan=tspan) + axes[0].plot(t, y, color=cmap(norm(x)), label=wl_str) + axes[0].legend() + axes[0].set_ylabel(r"$\Delta$O.D.") + + self.ec_plotter.plot_measurement( + measurement=measurement, axes=axes[1:], tspan=tspan, **kwargs + ) + + def plot_wavelengths_vs_potential( + self, + measurement=None, + wavelengths=None, + axes=None, + cmap_name="jet", + tspan=None, + **kwargs, + ): + """Plot the dO.D. for specific wavelength in the top panel vs potential + + Args: + measurement (Measurement): The measurement to be plotted, if different from + self.measurement + wavelengths (list of str): The names of the wavelengths to track as strings, + e.g. "w400" for 400 nm + axes (list of Ax): The axes to plot on, defaults to new matplotlib axes + cmap_name (str): Name of the colormap. Defaults to "jet" + tspan (timespan): The timespan to plot + **kwargs: Additional key-word arguments are passed on to + ECPlotter.plot_vs_potential + """ + measurement = measurement or self.measurement + wavelengths = wavelengths or measurement.tracked_wavelengths + + cmap = mpl.cm.get_cmap(cmap_name) + norm = mpl.colors.Normalize(vmin=min(measurement.wl), vmax=max(measurement.wl)) + + if not axes: + axes = self.new_two_panel_axes() + for wl_str in wavelengths: + x = float(wl_str[1:]) + try: + t, y = measurement.grab(wl_str, tspan=tspan) + except SeriesNotFoundError: + measurement.track_wavelength(x) + t, y = measurement.grab(wl_str, tspan=tspan) + v = measurement.v + axes[0].plot(v, y, color=cmap(norm(x)), label=wl_str) + axes[0].legend() + axes[0].set_ylabel(r"$\Delta$O.D.") + + self.ec_plotter.plot_vs_potential( + measurement=measurement, ax=axes[1], tspan=tspan, **kwargs + ) diff --git a/src/ixdat/plotters/spectrum_plotter.py b/src/ixdat/plotters/spectrum_plotter.py new file mode 100644 index 00000000..87d55a8c --- /dev/null +++ b/src/ixdat/plotters/spectrum_plotter.py @@ -0,0 +1,220 @@ +"""Plotters for spectra and spectrumseries.""" + +import numpy as np +import matplotlib as mpl +from matplotlib import pyplot as plt +from .base_mpl_plotter import MPLPlotter + + +class SpectrumPlotter(MPLPlotter): + """A plotter for spectrums""" + + def __init__(self, spectrum=None): + self.spectrum = spectrum + + def plot(self, spectrum=None, ax=None, **kwargs): + """Plot a spectrum as y (signal) vs x (scanning variable) + + Args: + spectrum (Spectrum): The spectrum to plot if different from self.spectrum + ax (mpl.Axis): The axis to plot on. A new one is made by default. + kwargs: additional key-word arguments are given to ax.plot() + """ + spectrum = spectrum or self.spectrum + if not ax: + ax = self.new_ax() + ax.plot(spectrum.x, spectrum.y, **kwargs) + ax.set_xlabel(spectrum.x_name) + ax.set_ylabel(spectrum.y_name) + return ax + + +class SpectrumSeriesPlotter(MPLPlotter): + """A plotter for spectrum series, f.ex. spectra taken continuously over time""" + + def __init__(self, spectrum_series=None): + self.spectrum_series = spectrum_series + + @property + def plot(self): + """The default plot of a SpectrumSeries is heat_plot""" + return self.heat_plot + + def plot_average(self, spectrum_series=None, ax=None, **kwargs): + """Take an average of the spectra and plot that.""" + spectrum_series = spectrum_series or self.spectrum_series + if not ax: + ax = self.new_ax() + ax.plot(spectrum_series.x, spectrum_series.y_average, **kwargs) + ax.set_xlabel(spectrum_series.x_name) + ax.set_ylabel(spectrum_series.y_name + " (average)") + return ax + + def heat_plot( + self, + spectrum_series=None, + field=None, + tspan=None, + xspan=None, + ax=None, + cmap_name="inferno", + make_colorbar=False, + ): + """Plot with time as x, the scanning variable as y, and color as signal + + See SpectrumSeriesPlotter.heat_plot_vs(). This function calls it with vs="t". + """ + return self.heat_plot_vs( + spectrum_series=spectrum_series, + field=field, + vspan=tspan, + xspan=xspan, + ax=ax, + cmap_name=cmap_name, + make_colorbar=make_colorbar, + vs="t", + ) + + def heat_plot_vs( + self, + spectrum_series=None, + field=None, + vspan=None, + xspan=None, + ax=None, + cmap_name="inferno", + make_colorbar=False, + vs=None, + ): + """Plot an SECMeasurement in two panels with time as x-asis. + + The top panel is a heat plot with wavelength on y-axis and color representing + spectrum. At most one of V_ref and t_ref should be given, and if neither are + given the measurement's default reference_spectrum is used to calculate the + optical density. + + Args: + spectrum_series (SpectrumSeries): The spectrum series to be plotted, if + different from self.spectrum_series. + FIXME: spectrum_series needs to actually be a Measurement to have other + FIXME: series to plot against if vs isn't in field.series_axes + field (Field): The field to be plotted, if different from + spectrum_series.field + xspan (iterable): The span of the spectral data to plot + ax (mpl.Axis): The axes to plot on. A new one is made by default + cmap_name (str): The name of the colormap to use. Defaults to "inferno", + which ranges from black through red and orange to yellow-white. "jet" + is also good. + make_colorbar (bool): Whether to make a colorbar. + FIXME: colorbar at present mis-alignes axes + vs (str): The ValueSeries or TimeSeries to plot against. + """ + spectrum_series = spectrum_series or self.spectrum_series + field = field or spectrum_series.field + + xseries = field.axes_series[1] + x = xseries.data + tseries = field.axes_series[0] + + v_name = vs + if vs in ("t", tseries.tseries.name): + v = tseries.t + if hasattr(spectrum_series, "t_str") and spectrum_series.t_str: + v_name = spectrum_series.t_str + else: + v = spectrum_series.grab_for_t(vs, t=tseries.t) + + data = field.data + # ^ FIXME: The heat plot will be distorted if spectra are not taken at even + # spacing on the "vs" variable. They will be especially meaningless if + # the v variable itself is not always increasing or decreasing. + + if vspan: + v_mask = np.logical_and(vspan[0] < v, v < vspan[-1]) + v = v[v_mask] + data = data[v_mask, :] + if (v[0] < v[-1]) != (vspan[0] < vspan[-1]): # this is an XOR. + # Then we need to plot the data against v in the reverse direction: + v = np.flip(v, axis=0) + data = np.flip(data, axis=0) + if xspan: + wl_mask = np.logical_and(xspan[0] < x, x < xspan[-1]) + x = x[wl_mask] + data = data[:, wl_mask] + + ax.imshow( + data.swapaxes(0, 1), + cmap=cmap_name, + aspect="auto", + extent=(v[0], v[-1], x[0], x[-1]), + ) + ax.set_xlabel(v_name) + ax.set_ylabel(xseries.name) + if make_colorbar: + cmap = mpl.cm.get_cmap(cmap_name) + norm = mpl.colors.Normalize(vmin=np.min(data), vmax=np.max(data)) + cb = plt.colorbar( + mpl.cm.ScalarMappable(norm=norm, cmap=cmap), + ax=ax, + use_gridspec=True, + anchor=(0.75, 0), + ) + cb.set_label("intensity") + return ax + + def plot_waterfall( + self, + spectrum_series=None, + field=None, + cmap_name="jet", + make_colorbar=True, + vs=None, + ax=None, + ): + """Plot a SpectrumSeries as spectra colored by the value at which they are taken + + Args: + spectrum_series (SpectrumSeries): The spectrum series to be plotted, if + different from self.spectrum_series. + FIXME: spectrum_series needs to actually be a Measurement to have other + FIXME: ...series to plot against if vs isn't in field.series_axes + field (Field): The field to be plotted, if different from + spectrum_series.field + ax (matplotlib Axis): The axes to plot on. A new one is made by default. + cmap_name (str): The name of the colormap to use. Defaults to "inferno", + which ranges from black through red and orange to yellow-white. "jet" + is also good. + make_colorbar (bool): Whether to make a colorbar. + vs (str): The name of the value to use for the color scale. Defaults to time + """ + spectrum_series = spectrum_series or self.spectrum_series + field = field or spectrum_series.field + + data = field.data + t = field.axes_series[0].t + x = field.axes_series[1].data + + if vs: + v = spectrum_series.grab_for_t(vs, t=t) + else: + v = t + + cmap = mpl.cm.get_cmap(cmap_name) + norm = mpl.colors.Normalize(vmin=np.min(v), vmax=np.max(v)) + + if not ax: + ax = self.new_ax() + + for i, v_i in enumerate(v): + spec = data[i] + color = cmap(norm(v_i)) + ax.plot(x, spec, color=color) + + ax.set_xlabel(field.axes_series[1].name) + ax.set_ylabel(field.name) + + if make_colorbar: + cb = plt.colorbar(mpl.cm.ScalarMappable(norm=norm, cmap=cmap), ax=ax) + cb.set_label(vs) + + return ax diff --git a/src/ixdat/plotters/value_plotter.py b/src/ixdat/plotters/value_plotter.py index 7ac47754..d2fa7c2a 100644 --- a/src/ixdat/plotters/value_plotter.py +++ b/src/ixdat/plotters/value_plotter.py @@ -1,10 +1,10 @@ """Classes for plotting measurement data""" -from matplotlib import pyplot as plt -from ixdat.exceptions import SeriesNotFoundError +from .base_mpl_plotter import MPLPlotter +from ..exceptions import SeriesNotFoundError -class ValuePlotter: +class ValuePlotter(MPLPlotter): """Default plotter. By default plots all of the VSeries vs time on a single axis""" def __init__(self, measurement=None): @@ -15,7 +15,13 @@ def plot(self, *args, **kwargs): return self.plot_measurement(measurement=self.measurement, *args, **kwargs) def plot_measurement( - self, measurement, v_list=None, tspan=None, ax=None, legend=True, logscale=False + self, + measurement=None, + v_list=None, + tspan=None, + ax=None, + legend=True, + logscale=False, ): """Plot a measurement's values vs time @@ -27,8 +33,9 @@ def plot_measurement( legend (bool): Whether to include a legend. Defaults to True. logscale (bool): Whether to use a log-scaled y-axis. Defaults to False. """ + measurement = measurement or self.measurement if not ax: - fig, ax = plt.subplots() + ax = self.new_ax() v_list = v_list or measurement.value_names for v_name in v_list: @@ -41,5 +48,7 @@ def plot_measurement( if legend: ax.legend() + if logscale: + ax.set_yscale("log") return ax diff --git a/src/ixdat/projects/__init__.py b/src/ixdat/projects/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/ixdat/lablogs.py b/src/ixdat/projects/lablogs.py similarity index 94% rename from src/ixdat/lablogs.py rename to src/ixdat/projects/lablogs.py index daa18eff..6617ab14 100644 --- a/src/ixdat/lablogs.py +++ b/src/ixdat/projects/lablogs.py @@ -1,4 +1,4 @@ -from .db import Saveable +from ixdat.db import Saveable class LabLog(Saveable): diff --git a/src/ixdat/samples.py b/src/ixdat/projects/samples.py similarity index 93% rename from src/ixdat/samples.py rename to src/ixdat/projects/samples.py index 73400a55..76161842 100644 --- a/src/ixdat/samples.py +++ b/src/ixdat/projects/samples.py @@ -1,6 +1,6 @@ """The module implements the sample class""" -from .db import Saveable +from ixdat.db import Saveable class Sample(Saveable): diff --git a/src/ixdat/readers/__init__.py b/src/ixdat/readers/__init__.py index 5b1215fa..ac705857 100644 --- a/src/ixdat/readers/__init__.py +++ b/src/ixdat/readers/__init__.py @@ -6,12 +6,41 @@ is the reader class for parsing files. """ from ..techniques import TECHNIQUE_CLASSES -from .ec_ms import EC_MS_CONVERTER -from .zilien import ZilienTSVReader + +# ixdat +from .ixdat_csv import IxdatCSVReader + +# potentiostats from .biologic import BiologicMPTReader +from .autolab import NovaASCIIReader +from .ivium import IviumDatasetReader +from .chi import CHInstrumentsTXTReader + +# mass spectrometers +from .pfeiffer import PVMassSpecReader +from .rgasoft import StanfordRGASoftReader +from .cinfdata import CinfdataTXTReader + +# ec-ms +from .zilien import ZilienTSVReader, ZilienTMPReader, ZilienSpectrumReader +from .ec_ms_pkl import EC_MS_CONVERTER + +# spectroelectrochemistry +from .msrh_sec import MsrhSECReader, MsrhSECDecayReader READER_CLASSES = { - "EC_MS": EC_MS_CONVERTER, - "zilien": ZilienTSVReader, + "ixdat": IxdatCSVReader, "biologic": BiologicMPTReader, + "autolab": NovaASCIIReader, + "ivium": IviumDatasetReader, + "chi": CHInstrumentsTXTReader, + "pfeiffer": PVMassSpecReader, + "rgasoft": StanfordRGASoftReader, + "cinfdata": CinfdataTXTReader, + "zilien": ZilienTSVReader, + "zilien_tmp": ZilienTMPReader, + "zilien_spec": ZilienSpectrumReader, + "EC_MS": EC_MS_CONVERTER, + "msrh_sec": MsrhSECReader, + "msrh_sec_decay": MsrhSECDecayReader, } diff --git a/src/ixdat/readers/autolab.py b/src/ixdat/readers/autolab.py new file mode 100644 index 00000000..576e200a --- /dev/null +++ b/src/ixdat/readers/autolab.py @@ -0,0 +1,83 @@ +"""This module implements the reader for ascii exports from autolab's Nova software""" + +import re +from pathlib import Path +import pandas as pd +from .reading_tools import ( + prompt_for_tstamp, + series_list_from_dataframe, + STANDARD_TIMESTAMP_FORM, + timestamp_string_to_tstamp, +) + +AUTOLAB_ALIASES = { + "raw_potential": ("WE(1).Potential (V)",), + "raw_current": ("WE(1).Current (A)",), + "t": ("Time (s)",), +} + + +class NovaASCIIReader: + """A reader for ascii files exported by Autolab's Nova software""" + + def read( + self, + path_to_file, + cls=None, + name=None, + tstamp=None, + timestring=None, + timestring_form=STANDARD_TIMESTAMP_FORM, + **kwargs + ): + """Read the ASCII export from Autolab's Nova software + + Args: + path_to_file (Path): The full absolute or relative path including the suffix + name (str): The name to use if not the file name + cls (Measurement subclass): The Measurement class to return an object of. + Defaults to `ECMeasurement` and should probably be a subclass thereof in + any case. + tstamp (float): timestamp of the measurement, if known + timestring (str): timestring describing the timestamp of the measurement + timestring_form (str): form of the timestring. Default is "%d/%m/%Y %H:%M:%S" + **kwargs (dict): Key-word arguments are passed to cls.__init__ + """ + self.path_to_file = Path(path_to_file) + name = name or self.path_to_file.name + if not tstamp: + if timestring: + tstamp = timestamp_string_to_tstamp(timestring, form=timestring_form) + else: + tstamp = prompt_for_tstamp(self.path_to_file) + + dataframe = pd.read_csv(self.path_to_file, delimiter=";") + + data_series_list = series_list_from_dataframe( + dataframe, "Time (s)", tstamp, get_column_unit + ) + obj_as_dict = dict( + name=name, + technique="EC", + reader=self, + aliases=AUTOLAB_ALIASES, + series_list=data_series_list, + tstamp=tstamp, + ) + obj_as_dict.update(kwargs) + + if not cls: + from ..techniques.ec import ECMeasurement + + cls = ECMeasurement + return cls.from_dict(obj_as_dict) + + +def get_column_unit(column_name): + """Return the unit name of an autolab column, i.e the last part of the name in ()""" + unit_match = re.search(r"\((.+)\)$", column_name) + if unit_match: + unit_name = unit_match.group(1) + else: + unit_name = None + return unit_name diff --git a/src/ixdat/readers/biologic.py b/src/ixdat/readers/biologic.py index 23e6228e..d71b206b 100644 --- a/src/ixdat/readers/biologic.py +++ b/src/ixdat/readers/biologic.py @@ -4,10 +4,12 @@ """ import re -import time +from pathlib import Path + import numpy as np from . import TECHNIQUE_CLASSES +from .reading_tools import timestamp_string_to_tstamp from ..data_series import TimeSeries, ValueSeries, ConstantValue from ..exceptions import ReadError @@ -97,9 +99,19 @@ def read(self, path_to_file, name=None, cls=ECMeasurement, **kwargs): Args: path_to_file (Path): The full abs or rel path including the ".mpt" extension - cls (Measurement class): The class of the measurement to return **kwargs (dict): Key-word arguments are passed to ECMeasurement.__init__ + name (str): The name to use if not the file name + cls (Measurement subclass): The Measurement class to return an object of. + Defaults to `ECMeasurement` and should probably be a subclass thereof in + any case. + **kwargs (dict): Key-word arguments are passed to cls.__init__ """ + + path_to_file = Path(path_to_file) if path_to_file else self.path_to_file + + if issubclass(ECMeasurement, cls): + cls = ECMeasurement + if self.file_has_been_read: print( f"This {self.__class__.__name__} has already read {self.path_to_file}." @@ -196,7 +208,9 @@ def process_header_line(self, line): timestamp_match = re.search(regular_expressions["timestamp_string"], line) if timestamp_match: self.timestamp_string = timestamp_match.group(1) - self.tstamp = timestamp_string_to_tstamp(self.timestamp_string) + self.tstamp = timestamp_string_to_tstamp( + self.timestamp_string, forms=BIOLOGIC_TIMESTAMP_FORMS + ) return loop_match = re.search(regular_expressions["loop"], line) if loop_match: @@ -241,6 +255,9 @@ def print_header(self): header = "".join(self.header_lines) print(header) + def __repr__(self): + return f"{self.__class__.__name__}({self.path_to_file})" + def get_column_unit(column_name): """Return the unit name of a .mpt column, i.e the part of the name after the '/'""" @@ -251,26 +268,56 @@ def get_column_unit(column_name): return unit_name -def timestamp_string_to_tstamp(timestamp_string, form=None): - """Return the unix timestamp as a float by parsing timestamp_string - - Args: - timestamp_string (str): The timestamp as read in the .mpt file - form (str): The format string used by time.strptime (string-parse time) - TODO: EC-Lab saves time in a couple different ways based on version and - location. In the future this function will need to try multiple forms. - """ - timestamp_forms = ([form] if form else []) + timestamp_form_strings - for form in timestamp_forms: - try: - struct = time.strptime(timestamp_string, form) - except ValueError: - continue - else: - break - - tstamp = time.mktime(struct) - return tstamp +# Formats by which timestamps are saved in various EC-Labs # with example encountered +BIOLOGIC_TIMESTAMP_FORMS = ( + "%m-%d-%Y %H:%M:%S", # like 01-31-2020 10:32:02 + "%m/%d/%Y %H:%M:%S", # like 07/29/2020 10:31:03 + "%m-%d-%Y %H:%M:%S.%f", # (anticipated) + "%m/%d/%Y %H:%M:%S.%f", # like 04/27/2021 11:35:39.227 (EC-Lab v11.34) + "%m/%d/%Y %H.%M.%S", # like 01/31/2022 11.19.17 +) + +# This tuple contains variable names encountered in .mpt files. The tuple can be used by +# other modules to tell which data is from biologic. +BIOLOGIC_COLUMN_NAMES = ( + "mode", + "ox/red", + "error", + "control changes", + "time/s", + "control/V", + "Ewe/V", + "/mA", + "(Q-Qo)/C", + "P/W", + "loop number", + "I/mA", + "control/mA", + "Ns changes", + "counter inc.", + "cycle number", + "Ns", + "(Q-Qo)/mA.h", + "dQ/C", + "Q charge/discharge/mA.h", + "half cycle", + "Capacitance charge/µF", + "Capacitance discharge/µF", + "dq/mA.h", + "Q discharge/mA.h", + "Q charge/mA.h", + "Capacity/mA.h", + "file number", + "file_number", + "Ece/V", + "Ewe-Ece/V", + "/V", + "/V", + "Energy charge/W.h", + "Energy discharge/W.h", + "Efficiency/%", + "Rcmp/Ohm", +) if __name__ == "__main__": @@ -282,7 +329,6 @@ def timestamp_string_to_tstamp(timestamp_string, form=None): Script path = ... """ - from pathlib import Path from matplotlib import pyplot as plt from ixdat.measurements import Measurement diff --git a/src/ixdat/readers/chi.py b/src/ixdat/readers/chi.py new file mode 100644 index 00000000..8bc7c27e --- /dev/null +++ b/src/ixdat/readers/chi.py @@ -0,0 +1,39 @@ +"""A reader for text exports from the RGA Software of Stanford Instruments""" + +from EC_MS import Dataset +from .ec_ms_pkl import measurement_from_ec_ms_dataset +from ..techniques import ECMeasurement + + +CHI_LEGACY_ALIASES = { + # TODO: These should change to what Zilien calls them. Right now the alias's + # reflect the way the lagacy EC_MS code renames essential series + "t": ["time/s"], + "raw_potential": ["Ewe/V", "/V"], + "raw_current": ["I/mA", "/mA"], + "cycle": ["cycle number"], +} + + +class CHInstrumentsTXTReader: + path_to_file = None + + def read(self, path_to_file, cls=None): + """Read a .txt file exported by CH Instruments software. + + TODO: Write a new reader that doesn't use the old EC_MS package + + Args: + path_to_file (Path or str): The file to read + cls (Measurement subclass): The class to return. Defaults to ECMeasuremnt + """ + self.path_to_file = path_to_file + cls = cls if (cls and not issubclass(ECMeasurement, cls)) else ECMeasurement + ec_ms_dataset = Dataset(path_to_file, data_type="CHI") + return measurement_from_ec_ms_dataset( + ec_ms_dataset.data, + cls=cls, + reader=self, + technique="EC", + aliases=CHI_LEGACY_ALIASES, + ) diff --git a/src/ixdat/readers/cinfdata.py b/src/ixdat/readers/cinfdata.py new file mode 100644 index 00000000..418b0ac1 --- /dev/null +++ b/src/ixdat/readers/cinfdata.py @@ -0,0 +1,211 @@ +"""Module defining readers for DTU Surfcat's legendary cinfdata system""" + +from pathlib import Path +import numpy as np +from ..exceptions import ReadError +from ..data_series import ValueSeries, TimeSeries +from ..techniques import MSMeasurement +from .reading_tools import timestamp_string_to_tstamp + + +class CinfdataTXTReader: + """A class that reads the text exported by cinfdata's text export functionality + + TODO: We should also have a reader class that downloads the data from cinfdata like + `EC_MS`'s `download_cinfdata_set`: + https://github.com/ScottSoren/EC_MS/blob/master/src/EC_MS/Data_Importing.py#L711 + + Attributes: + path_to_file (Path): the location and name of the file read by the reader + n_line (int): the number of the last line read by the reader + place_in_file (str): The last location in the file read by the reader. This + is used internally to tell the reader how to parse each line. Options are: + "header", "column names", and "data". + header_lines (list of str): a list of the header lines of the files. This + includes the column name line. The header can be nicely viewed with the + print_header() function. + tstamp (str): The unix time corresponding to t=0 for the measurement + tstamp_list (list of float): list of epoch tstamps in the file's timestamp line + column_tstamps (dict): The unix time corresponding to t=0 for each time column + technique (str): The name of the technique + column_names (list of str): The names of the data columns in the file + t_and_v_cols (dict): {name: (tcol, vcol)} where name is the name of the + ValueSeries (e.g. "M2"), tcol is the name of the corresponding time column + in the file (e.g. "M2-x"), and vcol is the the name of the value column in + the file (e.g. "M2-y). + column_data (dict of str: np.array): The data in the file as a dict. + Note that the np arrays are the same ones as in the measurement's DataSeries, + so this does not waste memory. + file_has_been_read (bool): This is used to make sure read() is only successfully + called once by the Reader. False until read() is called, then True. + measurement (Measurement): The measurement returned by read() when the file is + read. self.measureemnt is None before read() is called. + """ + + delim = "\t" + + def __init__(self): + """Initialize a Reader for cinfdata-exported text files. See class docstring.""" + self.name = None + self.path_to_file = None + self.n_line = 0 + self.place_in_file = "header" + self.header_lines = [] + self.tstamp = None + self.tstamp_list = [] + self.column_tstamps = {} + self.column_names = [] + self.t_and_v_cols = {} + self.column_data = {} + self.technique = "MS" # TODO: Figure out how to tell if it's something else + self.measurement_class = MSMeasurement + self.file_has_been_read = False + self.measurement = None + + def read(self, path_to_file, name=None, cls=None, **kwargs): + """Return an MSMeasurement with the data and metadata recorded in path_to_file + + This loops through the lines of the file, processing one at a time. For header + lines, this involves searching for metadata. For the column name line, this + involves creating empty arrays for each data series. For the data lines, this + involves appending to these arrays. After going through all the lines, it + converts the arrays to DataSeries. + For cinfdata text files, each value column has its own timecolumn, and they are + not necessarily all the same length. + Finally, the method returns an ECMeasurement with these DataSeries. The + ECMeasurement contains a reference to the reader. + All attributes of this reader can be accessed from the + measurement as `measurement.reader.attribute_name`. + + Args: + path_to_file (Path): The full abs or rel path including the ".txt" extension + **kwargs (dict): Key-word arguments are passed to ECMeasurement.__init__ + """ + path_to_file = Path(path_to_file) if path_to_file else self.path_to_file + if self.file_has_been_read: + print( + f"This {self.__class__.__name__} has already read {self.path_to_file}." + " Returning the measurement resulting from the original read. " + "Use a new Reader if you want to read another file." + ) + return self.measurement + self.name = name or path_to_file.name + self.path_to_file = path_to_file + with open(self.path_to_file, "r") as f: + for line in f: + self.process_line(line) + for name in self.column_names: + self.column_data[name] = np.array(self.column_data[name]) + + data_series_list = [] + for name, (tcol, vcol) in self.t_and_v_cols.items(): + tseries = TimeSeries( + name=tcol, + unit_name=get_column_unit(tcol) or "s", + data=self.column_data[tcol], + tstamp=self.column_tstamps[tcol], + ) + vseries = ValueSeries( + name=name, + data=self.column_data[vcol], + tseries=tseries, + unit_name=get_column_unit(vcol), + ) + data_series_list.append(tseries) + data_series_list.append(vseries) + + obj_as_dict = dict( + name=self.name, + technique=self.technique, + reader=self, + series_list=data_series_list, + tstamp=self.tstamp, + ) + # normally MSMeasurement requires mass aliases, but not cinfdata since it uses + # the ixdat convention (actually, ixdat uses the cinfdata convention) of M + obj_as_dict.update(kwargs) + + if issubclass(cls, self.measurement_class): + self.measurement_class = cls + + self.measurement = self.measurement_class.from_dict(obj_as_dict) + self.file_has_been_read = True + return self.measurement + + def process_line(self, line): + """Call the correct line processing method depending on self.place_in_file""" + if self.place_in_file == "header": + self.process_header_line(line) + elif self.place_in_file == "post_header": + if line.strip(): # then we're in the column headers! + self.process_column_line(line) + elif self.place_in_file == "data": + self.process_data_line(line) + else: # just for debugging + raise ReadError(f"place_in_file = {self.place_in_file}") + self.n_line += 1 + + def process_header_line(self, line): + """Search line for important metadata and set the relevant attribute of self""" + self.header_lines.append(line) + if not line.strip(): # the blank lines between the header and the column names + self.place_in_file = "post_header" + elif "Recorded at" in line: + for s in line.split(self.delim): + if "Recorded at" not in s: + self.tstamp_list.append( + timestamp_string_to_tstamp( + s.strip()[1:-1], # remove edge whitespace and quotes. + form="%Y-%m-%d %H:%M:%S", # like "2017-09-20 13:06:00" + ) + ) + self.tstamp = self.tstamp_list[0] + + def process_column_line(self, line): + """Split the line to get the names of the file's data columns""" + self.header_lines.append(line) + self.column_names = [name.strip() for name in line.split(self.delim)] + self.column_data.update({name: [] for name in self.column_names}) + i = 0 # need a counter to map tstamps to timecols. + for col in self.column_names: + if col.endswith("-y"): + name = col[:-2] + tcol = f"{name}-x" + if tcol not in self.column_names: + print(f"Warning! No timecol for {col}. Expected {tcol}. Ignoring.") + continue + self.t_and_v_cols[name] = (tcol, col) + self.column_tstamps[tcol] = self.tstamp_list[i] + i += 1 + + self.place_in_file = "data" + + def process_data_line(self, line): + """Split the line and append the numbers the corresponding data column arrays""" + data_strings_from_line = line.strip().split(self.delim) + for name, value_string in zip(self.column_names, data_strings_from_line): + if value_string: + try: + value = float(value_string) + except ValueError: + raise ReadError(f"can't parse value string '{value_string}'") + self.column_data[name].append(value) + + def print_header(self): + """Print the file header including column names. read() must be called first.""" + header = "".join(self.header_lines) + print(header) + + +def get_column_unit(column_name): + """Return the unit name of an ixdat column, i.e the part of the name after the '/'""" + if column_name.startswith("M") and column_name.endswith("-y"): + unit_name = "A" + elif column_name.startswith("M") and column_name.endswith("-x"): + unit_name = "s" + else: + # TODO: Figure out how cinfdata represents units for other stuff. + # see https://github.com/ixdat/ixdat/pull/30/files#r811432543, and + # https://github.com/CINF/cinfdata/blob/master/sym-files2/export_data.py#L125 + unit_name = None + return unit_name diff --git a/src/ixdat/readers/ec_ms.py b/src/ixdat/readers/ec_ms.py deleted file mode 100644 index 6a30d1cf..00000000 --- a/src/ixdat/readers/ec_ms.py +++ /dev/null @@ -1,9 +0,0 @@ -from . import TECHNIQUE_CLASSES - -ECMSMeasruement = TECHNIQUE_CLASSES["EC-MS"] - - -class EC_MS_CONVERTER: - def read(self): - # return ECMSMeasurement(**obj_as_dict) - pass diff --git a/src/ixdat/readers/ec_ms_pkl.py b/src/ixdat/readers/ec_ms_pkl.py new file mode 100644 index 00000000..6cf08eb2 --- /dev/null +++ b/src/ixdat/readers/ec_ms_pkl.py @@ -0,0 +1,125 @@ +from pathlib import Path +from . import TECHNIQUE_CLASSES +import pickle +from ..data_series import TimeSeries, ValueSeries +from ..measurements import Measurement +from .biologic import BIOLOGIC_COLUMN_NAMES, get_column_unit + + +ECMSMeasurement = TECHNIQUE_CLASSES["EC-MS"] + + +class EC_MS_CONVERTER: + """Imports old .pkl files obtained from the legacy EC-MS package""" + + def __init__(self): + print("Reader of old ECMS .pkl files") + + def read(self, file_path, cls=None, **kwargs): + """Return an ECMSMeasurement with the data recorded in path_to_file + Most of the work is done by module-level function measurement_from_ec_ms_dataset + + Args: + path_to_file (Path): The full abs or rel path including the + ".pkl" extension. + """ + with open(file_path, "rb") as f: + ec_ms_dict = pickle.load(f) + + return measurement_from_ec_ms_dataset( + ec_ms_dict, + name=Path(file_path).name, + cls=cls, + reader=self, + technique="EC-MS", + **kwargs, + ) + + +def measurement_from_ec_ms_dataset( + ec_ms_dict, + name=None, + cls=ECMSMeasurement, + reader=None, + technique=None, + **kwargs, +): + """Return an ixdat Measurement with the data from an EC_MS data dictionary. + + This loops through the keys of the EC-MS dict and searches for MS and + EC data. Names the dataseries according to their names in the original + dict. Omits any other data as well as metadata. + + Args: + ec_ms_dict (dict): The EC_MS data dictionary + name (str): Name of the measurement + cls (Measurement class): The class to return a measurement of + reader (Reader object): The class which read ec_ms_dataset from file + technique (str): The name of the technique + """ + + if "Ewe/V" in ec_ms_dict and "/V" in ec_ms_dict: + # EC_MS duplicates the latter as the former, so here we delete it: + del ec_ms_dict["/V"] + if "I/mA" in ec_ms_dict and "/mA" in ec_ms_dict: + # EC_MS duplicates the latter as the former, so here we delete it: + del ec_ms_dict["/mA"] + + cols_str = ec_ms_dict["data_cols"] + cols_list = [] + + name = name or ec_ms_dict.get("title", None) + + for col in cols_str: + if col.endswith("-x"): + cols_list.append( + TimeSeries(col, "s", ec_ms_dict[col], ec_ms_dict["tstamp"]) + ) + + if "time/s" in ec_ms_dict: + cols_list.append( + TimeSeries("time/s", "s", ec_ms_dict["time/s"], ec_ms_dict["tstamp"]) + ) + + tseries_meas = Measurement("tseries_ms", technique="EC_MS", series_list=cols_list) + + for col in cols_str: + if col not in ec_ms_dict or col in tseries_meas.series_names: + continue + if col.endswith("-y"): + v_name = col[:-2] + tseries = tseries_meas[col[:-1] + "x"] + unit_name = "A" if col.startswith("M") else "" + elif col in BIOLOGIC_COLUMN_NAMES and col not in tseries_meas.series_names: + v_name = col + tseries = tseries_meas["time/s"] + unit_name = get_column_unit(col) + else: + print(f"Not including '{col}' as I don't know what it is.") + continue + data = ec_ms_dict[col] + if not tseries.data.size == data.size: + print(f"Not including '{col}' due to mismatch size with {tseries}") + continue + cols_list.append( + ValueSeries( + name=v_name, + data=data, + unit_name=unit_name, + tseries=tseries, + ) + ) + + aliases = {"t": ["time/s"], "raw_potential": ["Ewe/V"], "raw_current": ["I/mA"]} + obj_as_dict = dict( + name=name, + technique=technique or "EC_MS", + series_list=cols_list, + reader=reader, + tstamp=ec_ms_dict["tstamp"], + aliases=aliases + ) + obj_as_dict.update(kwargs) + + measurement = cls.from_dict(obj_as_dict) + return measurement diff --git a/src/ixdat/readers/ivium.py b/src/ixdat/readers/ivium.py new file mode 100644 index 00000000..97cfc0ff --- /dev/null +++ b/src/ixdat/readers/ivium.py @@ -0,0 +1,142 @@ +"""This module implements the reader for the text export of Ivium's software""" + +import re +from pathlib import Path +import pandas as pd +from ..techniques.ec import ECMeasurement +from .reading_tools import timestamp_string_to_tstamp, series_list_from_dataframe + +IVIUM_ALIASES = { + "raw_potential": ("E/V",), + "raw_current": ("I/A",), + "t": ("time/s",), +} + + +class IviumDataReader: + """Class for reading single ivium files""" + + def read(self, path_to_file, cls=None, name=None, cycle_number=0, **kwargs): + """Read the ASCII export from the Ivium software + + Args: + path_to_file (Path): The full abs or rel path including the suffix (.txt) + cls (Measurement subclass): The Measurement class to return an object of. + Defaults to `ECMeasurement`. + name (str): The name to use if not the file name + cycle_number (int): The cycle number of the data in the file (default is 0) + + **kwargs (dict): Key-word arguments are passed to cls.__init__ + + Returns: + cls: technique measurement object with the ivium data + """ + self.path_to_file = Path(path_to_file) + name = name or self.path_to_file.name + + with open(self.path_to_file, "r") as f: + timestring_line = f.readline() # we need this for tstamp + columns_line = f.readline() # we need this to get the column names + first_data_line = f.readline() # we need this to check the column names + tstamp = timestamp_string_to_tstamp( + timestring_line.strip(), + form="%d/%m/%Y %H:%M:%S", # like '04/03/2021 19:42:30' + ) + + # ivium files do something really dumb. They add an extra column of data, which + # looks like the measured potential (to complement 'E/V' which is presumably the + # setpoint), but don't add the name of this column in the column name line. + # So in order for pandas' csv reader to read it, we need assign a name to this + # extra column (it becomes 'Unlabeled_1') and specify the column names. + # Here we prepare the thus-corrected column name list, `column_names`: + column_names = [col.strip() for col in columns_line.split(" ") if col.strip()] + first_dat = [dat.strip() for dat in first_data_line.split(" ") if dat.strip()] + if len(first_dat) > len(column_names): + for i in range(len(first_dat) - len(column_names)): + column_names.append(f"unlabeled_{i}") + + # And now we can read the data. Notice also the variable whitespace delimiter. + dataframe = pd.read_csv( + self.path_to_file, delimiter=r"\s+", header=1, names=column_names + ) + + # All that's left is getting the data from the dataframe into DataSeries and + # into the Measurement, starting with the TimeSeries: + + data_series_list = series_list_from_dataframe( + dataframe, + time_name="time/s", + tstamp=tstamp, + unit_finding_function=get_column_unit, + cycle=cycle_number, + ) + # With the `series_list` ready, we prepare the Measurement dictionary and + # return the Measurement object: + obj_as_dict = dict( + name=name, + technique="EC", + reader=self, + aliases=IVIUM_ALIASES, + series_list=data_series_list, + tstamp=tstamp, + ) + obj_as_dict.update(kwargs) + + if not issubclass(ECMeasurement, cls): + cls = ECMeasurement + return cls.from_dict(obj_as_dict) + + +class IviumDatasetReader: + """Class for reading sets of ivium files exported together""" + + def read(self, path_to_file, cls=None, name=None, **kwargs): + """Return a measurement containing the data of an ivium dataset, + + An ivium dataset is a group of ivium files exported together. They share a + folder and a base name, and are suffixed "_1", "_2", etc. + + Args: + path_to_file (Path or str): `Path(path_to_file).parent` is interpreted as the + folder where the files of the ivium dataset is. `Path(path_to_file).name` + up to the first "_" is interpreted as the shared start of the files in + the dataset. You can thus use the base name of the exported files or + the full path of any one of them. + cls (Measurement class): The measurement class. Defaults to ECMeasurement. + name (str): The name of the dataset. Defaults to the base name of the dataset + kwargs: key-word arguments are included in the dictionary for cls.from_dict() + + Returns cls or ECMeasurement: A measurement object with the ivium data + """ + self.path_to_file = Path(path_to_file) + + folder = self.path_to_file.parent + base_name = self.path_to_file.name + if re.search(r"_[0-9]", base_name): + base_name = base_name.rpartition("_")[0] + name = name or base_name + + # With two list comprehensions, we get the Measurement object for each file + # in the folder who's name starts with base_name: + all_file_paths = [f for f in folder.iterdir() if f.name.startswith(base_name)] + component_measurements = [ + IviumDataReader().read(f, cls=cls, cycle_number=i) + for i, f in enumerate(all_file_paths) + ] + + # Now we append these using the from_component_measurements class method of the + # right TechniqueMeasurement class, and return the result. + if not cls: + from ..techniques.ec import ECMeasurement + + cls = ECMeasurement + measurement = cls.from_component_measurements( + component_measurements, name=name, **kwargs + ) + return measurement + + +def get_column_unit(column_name): + """Return the unit name of an ivium column, i.e what follows the first '/'.""" + if "/" in column_name: + return column_name.split("/", 1)[1] diff --git a/src/ixdat/readers/ixdat_csv.py b/src/ixdat/readers/ixdat_csv.py new file mode 100644 index 00000000..789152eb --- /dev/null +++ b/src/ixdat/readers/ixdat_csv.py @@ -0,0 +1,353 @@ +"""Module defining the ixdat csv reader, so ixdat can read the files it exports.""" + +from pathlib import Path +import json +import numpy as np +import re +import pandas as pd +from ..exceptions import ReadError +from ..data_series import ValueSeries, TimeSeries, DataSeries, Field +from ..measurements import Measurement +from ..spectra import Spectrum, SpectrumSeries +from ..techniques import TECHNIQUE_CLASSES + +regular_expressions = { + "tstamp": r"tstamp = ([0-9\.]+)", + "technique": r"technique = ([A-Za-z\-]+)\n", + "N_header_lines": r"N_header_lines = ([0-9]+)", + "backend_name": r"backend_name = (\w+)", + "id": r"id = ([0-9]+)", + "timecol": r"timecol '(.+)' for: (?:'(.+)')$", + "unit": r"/ [(.+)]", + "aux_file": r"'(.*)' in file: '(.*)'", +} + + +class IxdatCSVReader: + """A class that reads the csv's made by ixdat.exporters.csv_exporter.CSVExporter + + read() is the important method - it takes the path to the mpt file as argument + and returns an ECMeasurement object (ec_measurement) representing that file. + The ECMeasurement contains a reference to the BiologicMPTReader object, as + ec_measurement.reader. This makes available all the following stuff, likely + useful for debugging. + + Attributes: + path_to_file (Path): the location and name of the file read by the reader + n_line (int): the number of the last line read by the reader + place_in_file (str): The last location in the file read by the reader. This + is used internally to tell the reader how to parse each line. Options are: + "header", "column names", and "data". + header_lines (list of str): a list of the header lines of the files. This + includes the column name line. The header can be nicely viewed with the + print_header() function. + tstamp (str): The unix time corresponding to t=0 + technique (str): The name of the technique + N_header_lines (int): The number of lines in the header of the file + column_names (list of str): The names of the data columns in the file + column_data (dict of str: np.array): The data in the file as a dict. + Note that the np arrays are the same ones as in the measurement's DataSeries, + so this does not waste memory. + file_has_been_read (bool): This is used to make sure read() is only successfully + called once by the Reader. False until read() is called, then True. + measurement (Measurement): The measurement returned by read() when the file is + read. self.measureemnt is None before read() is called. + """ + + delim = "," + + def __init__(self): + """Initialize a Reader for ixdat-exported .csv files. See class docstring.""" + self.name = None + self.path_to_file = None + self.n_line = 0 # TODO: decide if this is part of API. + # as per https://github.com/ixdat/ixdat/pull/30/files#r816204939 + self.place_in_file = "header" + self.header_lines = [] + self.tstamp = None + self.N_header_lines = None + self.timecols = {} + self.column_names = [] + self.column_data = {} + self.technique = None + self.aux_series_list = [] + self.measurement_class = Measurement + self.file_has_been_read = False + self.measurement = None + self.meas_as_dict = {} + + def read(self, path_to_file, name=None, cls=None, **kwargs): + """Return a Measurement with the data and metadata recorded in path_to_file + + This loops through the lines of the file, processing one at a time. For header + lines, this involves searching for metadata. For the column name line, this + involves creating empty arrays for each data series. For the data lines, this + involves appending to these arrays. After going through all the lines, it + converts the arrays to DataSeries. + The technique is specified in the header, and used to pick the + TechniqueMeasurement class. + Finally, the method returns a TechniqueMeasurement object `measurement` + with these DataSeries. All attributes of this reader can be accessed from the + measurement as `measurement.reader.attribute_name`. + + Args: + path_to_file (Path): The full abs or rel path including the ".mpt" extension + name (str): The name of the measurement to return (defaults to path_to_file) + cls (Measurement subclass): The class of measurement to return. By default, + cls will be determined from the technique specified in the header of + path_to_file. + **kwargs (dict): Key-word arguments are passed to ECMeasurement.__init__ + + Returns cls: a Measurement of type cls + """ + path_to_file = Path(path_to_file) if path_to_file else self.path_to_file + if self.file_has_been_read: + print( + f"This {self.__class__.__name__} has already read {self.path_to_file}." + " Returning the measurement resulting from the original read. " + "Use a new Reader if you want to read another file." + ) + return self.measurement + self.name = name or path_to_file.name + self.path_to_file = path_to_file + + with open(self.path_to_file, "r") as f: + for line in f: + self.process_line(line) + + for name in self.column_names: + self.column_data[name] = np.array(self.column_data[name]) + + data_series_dict = {} + + for tcol_name in self.timecols: # then it's time! + data_series_dict[tcol_name] = TimeSeries( + name=tcol_name, + unit_name=get_column_unit(tcol_name) or "s", + data=self.column_data[tcol_name], + tstamp=self.tstamp, + ) + + for column_name, data in self.column_data.items(): + if column_name in self.timecols: + continue + try: + tcol_name = next( + tcol_name + for tcol_name in self.timecols + if column_name in self.timecols[tcol_name] + ) + except StopIteration: # debugging + raise ReadError( + f"can't find tcol for {column_name}. timecols={self.timecols}" + ) + + tseries = data_series_dict[tcol_name] + vseries = ValueSeries( + name=column_name, + data=data, + tseries=tseries, + unit_name=get_column_unit(column_name), + ) + data_series_dict[column_name] = vseries + + data_series_list = list(data_series_dict.values()) + self.aux_series_list + self.meas_as_dict.update( + name=self.name, + technique=self.technique, + reader=self, + series_list=data_series_list, + tstamp=self.tstamp, + ) + self.meas_as_dict.update(kwargs) + + if issubclass(cls, self.measurement_class): + self.measurement_class = cls + + self.measurement = self.measurement_class.from_dict(self.meas_as_dict) + self.file_has_been_read = True + return self.measurement + + def process_line(self, line): + """Call the correct line processing method depending on self.place_in_file""" + if self.place_in_file == "header": + self.process_header_line(line) + elif self.place_in_file == "column names": + self.process_column_line(line) + elif self.place_in_file == "data": + self.process_data_line(line) + else: # just for debugging + raise ReadError(f"place_in_file = {self.place_in_file}") + self.n_line += 1 + + def process_header_line(self, line): + """Search line for important metadata and set the relevant attribute of self""" + self.header_lines.append(line) + N_head_match = re.search(regular_expressions["N_header_lines"], line) + if N_head_match: + self.N_header_lines = int(N_head_match.group(1)) + return + timestamp_match = re.search(regular_expressions["tstamp"], line) + if timestamp_match: + self.tstamp = float(timestamp_match.group(1)) + return + technique_match = re.search(regular_expressions["technique"], line) + if technique_match: + self.technique = technique_match.group(1) + if self.technique in TECHNIQUE_CLASSES: + if issubclass( + TECHNIQUE_CLASSES[self.technique], self.measurement_class + ): + self.measurement_class = TECHNIQUE_CLASSES[self.technique] + return + timecol_match = re.search(regular_expressions["timecol"], line) + if timecol_match: + tcol = timecol_match.group(1) + self.timecols[tcol] = [] + for vcol in timecol_match.group(2).split("' and '"): + self.timecols[tcol].append(vcol) + return + aux_file_match = re.search(regular_expressions["aux_file"], line) + if aux_file_match: + aux_file_name = aux_file_match.group(1) + aux_file = self.path_to_file.parent / aux_file_match.group(2) + self.read_aux_file(aux_file, name=aux_file_name) + return + if " = " in line: + key, value = line.strip().split(" = ") + if key in ("name", "id"): + return + try: + self.meas_as_dict[key] = json.loads(value) + except json.decoder.JSONDecodeError: + print(f"skipping the following line:\n{line}") + return + + if self.N_header_lines and self.n_line >= self.N_header_lines - 2: + self.place_in_file = "column names" + + def process_column_line(self, line): + """Split the line to get the names of the file's data columns""" + self.header_lines.append(line) + self.column_names = [name.strip() for name in line.split(self.delim)] + self.column_data.update({name: [] for name in self.column_names}) + self.place_in_file = "data" + + def process_data_line(self, line): + """Split the line and append the numbers the corresponding data column arrays""" + data_strings_from_line = line.strip().split(self.delim) + for name, value_string in zip(self.column_names, data_strings_from_line): + if value_string: + try: + value = float(value_string) + except ValueError: + # That is probably because different columns are different length. + # so we just skip it! + continue + # raise ReadError(f"can't parse value string '{value_string}'") + self.column_data[name].append(value) + + def read_aux_file(self, path_to_aux_file, name): + """Read an auxiliary file and include its series list in the measurement""" + spec = IxdatSpectrumReader().read(path_to_aux_file, name=name) + self.aux_series_list += spec.series_list + + def print_header(self): + """Print the file header including column names. read() must be called first.""" + header = "".join(self.header_lines) + print(header) + + +def get_column_unit(column_name): + """Return the unit name of an ixdat column, i.e the part of the name after the '/'""" + unit_match = re.search(regular_expressions["unit"], column_name) + if unit_match: + unit_name = unit_match.group(1) + else: + unit_name = None + return unit_name + + +class IxdatSpectrumReader(IxdatCSVReader): + """A reader for ixdat spectra.""" + + def read(self, path_to_file, name=None, cls=None, **kwargs): + """Read an ixdat spectrum. + + This reads the header with the process_line() function inherited from + IxdatCSVReader. Then it uses pandas to read the data. + + Args: + path_to_file (Path): The full absolute or relative path including extension + name (str): The name of the measurement to return (defaults to path_to_file) + cls (Spectrum subclass): The class of measurement to return. By default, + cls will be determined from the technique specified in the header of + path_to_file. + **kwargs (dict): Key-word arguments are passed to ECMeasurement.__init__ + + Returns cls: a Spectrum of type cls + """ + with open(path_to_file, "r") as f: + for line in f: + if self.place_in_file == "header": + self.process_line(line) + else: + break + + df = pd.read_csv(path_to_file, sep=",", header=self.N_header_lines - 2) + if self.technique == "spectrum": + # FIXME: in the future, this needs to cover all spectrum classes + x_name, y_name = tuple(df.keys()) + x = df[x_name].to_numpy() + y = df[y_name].to_numpy() + cls = cls or Spectrum + return cls.from_data( # see Spectrum.from_data() + x, + y, + self.tstamp, + x_name, + y_name, + name=self.name, + technique=self.technique, + reader=self, + ) + + elif self.technique == "spectra": + # FIXME: in the future, this needs to cover all spectrum series classes + names = {} + units = {} + swap_axes = False + for line in self.header_lines: + for line_start in ("values", "first row", "first column"): + if line.startswith(line_start): + t_x_or_y = re.search("([yxt])=", line).group(1) + names[t_x_or_y] = re.search(r"\'(.*)\'", line).group(1) + units[t_x_or_y] = re.search(r"\[(.*)\]", line).group(1) + if "row" in line_start and t_x_or_y == "t": # check! + swap_axes = True + z1 = np.array([float(key) for key in list(df.keys())[1:]]) + z1_and_y = df.to_numpy() + z0 = z1_and_y[:, 0] + y = z1_and_y[:, 1:] + if swap_axes: + # This is the case if the file was export with spectra_as_rows = False. + t = z1 + x = z0 + y = y.swapaxes(0, 1) + else: + t = z0 + x = z1 + tseries = TimeSeries( + name=names["t"], unit_name=units["t"], data=t, tstamp=self.tstamp + ) + xseries = DataSeries(name=names["x"], unit_name=units["x"], data=x) + field = Field( + name=names["y"], + unit_name=units["y"], + data=y, + axes_series=[tseries, xseries], + ) + cls = cls or SpectrumSeries + return cls.from_field( # see SpectrumSeries.from_field() + field, name=self.name, technique=self.technique, tstamp=self.tstamp + ) diff --git a/src/ixdat/readers/msrh_sec.py b/src/ixdat/readers/msrh_sec.py new file mode 100644 index 00000000..9cd2ec12 --- /dev/null +++ b/src/ixdat/readers/msrh_sec.py @@ -0,0 +1,245 @@ +from pathlib import Path # noqa +import numpy as np +import pandas as pd +from .reading_tools import prompt_for_tstamp +from ..techniques import TECHNIQUE_CLASSES +from ..data_series import DataSeries, TimeSeries, ValueSeries, Field +from ..techniques.analysis_tools import calc_t_using_scan_rate + + +class MsrhSECReader: + """A reader for SEC saved in three files: spectra vs v; wavelengths; current vs v""" + + def read( + self, + path_to_file, + path_to_ref_spec_file, + path_to_V_J_file, + scan_rate, + tstamp=None, + cls=None, + ): + """Read potential-dep. SEC data from 3 csv's to return a SpectroECMeasurement + + The function is well-commented so take a look at the source + + Args: + path_to_file (Path or str): The full path to the file containing the + spectra data. This file has voltage in the first row, and a first + column with an arbitrary counter which has to be replaced by wavelength. + path_to_ref_spec_file (Path or str): The full path to the file containing + the wavelenth data, together usually with the adsorption-free spectrum. + The length of the columns should be the same as in the spectrum data + but in practice is a few points longer. The excess points at the starts + of the columns are discarded. + path_to_V_J_file (Path or str): The full path to the file containing the + current data vs potential. The columns may be reversed in order. In the + end the potential in the spectra file will be retained and the potential + here used to interpolate the current onto the spectra file's potential. + scan_rate (float): Scan rate in [mV/s]. This is used to figure out the + measurement's time variable, as time is bizarrely not included in any + of the data files. + tstamp (float): Timestamp. If None, the user will be prompted for the + measurement start time or whether to use the file creation time. This is + necessary because tstamp is also not included in any of the files but is + central to how ixdat organizes data. If you're sure that tstamp doesn't + matter for you, put e.g. tstamp=1 to suppress the prompt. + cls (Measurement subclass): The class of measurement to return. Defaults to + SpectroECMeasurement. + """ + # us pandas to load the data from the csv files into dataframes: + sec_df = pd.read_csv(path_to_file) + # ^ Note the first row, containing potential, will become the keys. The first + # column, containing an arbitrary counter, is included in the data. + ref_df = pd.read_csv(path_to_ref_spec_file, names=["wavelength", "counts"]) + jv_df = pd.read_csv(path_to_V_J_file, names=["v", "j"]) + + # The spectra need (i) the first colum with the arbitrary counter to be + # discarded and (ii) axes switched so that wavelength is the inner axis + # (axis=1). The outer axis (axis=0) then spans potential or, eq., time: + spectra = sec_df.to_numpy()[:, 1:].swapaxes(0, 1) + # The potential comes from the keys of that data, discarding the first column: + v = np.array([float(key) for key in sec_df.keys()])[1:] + # We get time from this potential and the scan rate, with a helper function: + t = calc_t_using_scan_rate(v, dvdt=scan_rate * 1e-3) + # If they didn't provide a tstamp, we have to prompt for it. + tstamp = tstamp or prompt_for_tstamp(path_to_file) + # Ready to define the measurement's TimeSeries: + tseries = TimeSeries( + "time from scan rate", unit_name="s", data=t, tstamp=tstamp + ) + + # The wavelength comes from the reference spectrum file. + wl = ref_df["wavelength"].to_numpy() + excess_wl_points = len(wl) - spectra.shape[1] + # ^ This is how many points to discard to line up with sec data + # (1 or 2 points in the example data). + wl = wl[excess_wl_points:] + ref_signal = ref_df["counts"].to_numpy()[excess_wl_points:] + + # Now we're ready to define all the spectrum DataSeries: + + # wavelength is independent variable --> simple DataSeries + wl_series = DataSeries("wavelength / [nm]", "nm", wl) + # The reference spectrum spans a space defined by wavelength: + reference = Field( + name="reference", + unit_name="counts", + axes_series=[wl_series], + data=ref_signal, + ) + # The spectra span a space defined by time and wavelength: + spectra = Field( + name="spectra", + unit_name="counts", + axes_series=[tseries, wl_series], + data=spectra, + ) + + # Now we process the current and potential: + v_0 = jv_df["v"].to_numpy() # ... but we'll actually use v from the sec data + j_0 = jv_df["j"].to_numpy() * 1e3 # 1e3 converts [A] to [mA] + if v_0[0] > v_0[-1]: # Need the potential in the EC file to be increasing: + v_0 = np.flip(v_0) + j_0 = np.flip(j_0) + # Since the "real" potential is in the sec data, we need to interpolate the + # current onto it: + j = np.interp(v, v_0, j_0) + # and now we're ready to define the electrochemical DataSeries: + v_series = ValueSeries("raw potential / [V]", "V", v, tseries=tseries) + j_series = ValueSeries("raw current / [mA]", "mA", j, tseries=tseries) + + # put all our DataSeries together: + series_list = [ + tseries, + v_series, + j_series, + wl_series, + reference, + spectra, + ] + + # Figure out which measurement class to return. Use S-EC unless this read + # function is provided an even more specific technique class: + measurement_class = TECHNIQUE_CLASSES["S-EC"] + if issubclass(cls, measurement_class): + measurement_class = cls + + # and initiate the measurement: + measurement = measurement_class( + name=str(path_to_file), + tstamp=tstamp, + series_list=series_list, + aliases={ + "raw_potential": (v_series.name,), + "raw_current": (j_series.name,), + "t": (tseries.name,), + }, + ) + + return measurement + + +class MsrhSECDecayReader: + def read( + self, + path_to_file, + path_to_ref_spec_file, + path_to_t_J_file, + path_to_t_V_file, + tstamp=None, + cls=None, + ): + """Read time-dependent SEC data from 4 csv's to return a SpectroECMeasurement + + The function works in a very similar way to MsrhSECReader.read(). + + Args: + path_to_file (Path or str): The full path to the file containing the + spectra data. This file has time in the first row, and a first + column with an arbitrary counter which has to be replaced by wavelength. + path_to_ref_spec_file (Path or str): The full path to the file containing + the wavelenth data, together usually with the adsorption-free spectrum. + The length of the columns should be the same as in the spectrum data + but in practice is a few points longer. The excess points at the starts + of the columns are discarded. + path_to_t_V_file (Path or str): The full path to the file containing the + potential data vs time. + path_to_t_J_file (Path or str): The full path to the file containing the + current data vs time. + tstamp (float): Timestamp. If None, the user will be prompted for the + measurement start time or whether to use the file creation time. This is + necessary because tstamp is also not included in any of the files but is + central to how ixdat organizes data. If you're sure that tstamp doesn't + matter for you, put e.g. tstamp=1 to suppress the prompt. + cls (Measurement subclass): The class of measurement to return. Defaults to + SpectroECMeasurement. + """ + + sec_df = pd.read_csv(path_to_file) + ref_df = pd.read_csv(path_to_ref_spec_file, names=["wavelength", "counts"]) + t_V_df = pd.read_csv(path_to_t_V_file, names=["t", "V"]) + t_J_df = pd.read_csv(path_to_t_J_file, names=["t", "J"]) + + t_and_spectra = sec_df.to_numpy() + spectra = t_and_spectra[:, 1:].swapaxes(0, 1) + t_spectra = np.array([float(key) for key in sec_df.keys()])[1:] + + wl = ref_df["wavelength"].to_numpy() + excess_wl_points = len(wl) - spectra.shape[1] + wl = wl[excess_wl_points:] + ref_signal = ref_df["counts"].to_numpy()[excess_wl_points:] + + wl_series = DataSeries("wavelength / [nm]", "nm", wl) + reference = Field( + name="reference", + unit_name="counts", + axes_series=[wl_series], + data=np.array(ref_signal), + ) + + v = t_V_df["V"].to_numpy() + t_v = t_V_df["t"].to_numpy() + j = t_J_df["J"].to_numpy() * 1e3 # Convert [A] to [mA] + t_j = t_J_df["t"].to_numpy() + + tstamp = tstamp or prompt_for_tstamp(path_to_file) + + tseries_j = TimeSeries("t for current", "s", data=t_j, tstamp=tstamp) + tseries_v = TimeSeries("t for potential", "s", data=t_v, tstamp=tstamp) + tseries_spectra = TimeSeries("t for spectra", "s", t_spectra, tstamp) + v_series = ValueSeries("raw potential / [V]", "V", v, tseries=tseries_v) + j_series = ValueSeries("raw current / [mA]", "mA", j, tseries=tseries_j) + spectra = Field( + name="spectra", + unit_name="counts", + axes_series=[tseries_spectra, wl_series], + data=spectra, + ) + series_list = [ + tseries_j, + tseries_v, + tseries_spectra, + v_series, + j_series, + wl_series, + reference, + spectra, + ] + + measurement_class = TECHNIQUE_CLASSES["S-EC"] + if issubclass(cls, measurement_class): + measurement_class = cls + + measurement = measurement_class( + name=str(path_to_file), + tstamp=tstamp, + series_list=series_list, + aliases={ + "raw_potential": (v_series.name,), + "raw_current": (j_series.name,), + "t": (tseries_v.name,), + }, + ) + + return measurement diff --git a/src/ixdat/readers/pfeiffer.py b/src/ixdat/readers/pfeiffer.py new file mode 100644 index 00000000..66b0db19 --- /dev/null +++ b/src/ixdat/readers/pfeiffer.py @@ -0,0 +1,81 @@ +"""This module implements the reader for Pfeiffer Vacuum's PV Mass Spec software""" + +import re +from pathlib import Path +import pandas as pd +from .reading_tools import timestamp_string_to_tstamp, series_list_from_dataframe +from ..techniques import MSMeasurement + + +class PVMassSpecReader: + """A reader for (advanced) MID files exported from PVMassSpec ('... - Bin.dat')""" + + def read(self, path_to_file, cls=None, name=None, **kwargs): + """Return a Measurement with the (advanced) MID data in the PVMassSpec file + + Args: + path_to_file (Path or str): a path to the file exported by PVMassSpec with + (advanced) MID data. This file is typically exported with a name that + ends in '- Bin.dat', and with the timestamp in the file name. Note + that the file can be renamed, as the original name is in the file, + and the timestamp is read from there. + cls (Measurement subclass): The technique class of which to return an object. + Defaults to MSMeasurement. + name (str): The name of the measurement. Defaults to Path(path_to_file).name + kwargs: key-word args are used to initiate the measurement via cls.as_dict() + + Return cls: The measurement object + """ + self.path_to_file = Path(path_to_file) + name = name or self.path_to_file.name + with open(path_to_file, "r") as f: + # timestamp is on the the third line, which we select here: + tstamp_line = [f.readline() for _ in range(3)][-1] + tstamp = timestamp_string_to_tstamp( + tstamp_line.split(".")[-2][-19:], # last 19 characters before the last '.' + form="%m-%d-%Y %H'%M'%S", # like "03-02-2021 12'58'40" + ) + df = pd.read_csv(self.path_to_file, header=6, delimiter="\t") + # PV MassSpec calls masses _amu, information we need to pass on to + # MSMeasurement, so that the data will be accessible by the 'M' mass string. + aliases = { + mass_from_column_name(key): [key] + for key in df.keys() + if key.endswith("_amu") + } + series_list = series_list_from_dataframe( + df, + tstamp=tstamp, + time_name="Time Relative (sec)", + unit_finding_function=get_column_unit, + ) + meas_as_dict = { + "name": name, + "tstamp": tstamp, + "series_list": series_list, + "aliases": aliases, + "technique": "MS", + } + meas_as_dict.update(kwargs) + cls = cls or MSMeasurement + return cls.from_dict(meas_as_dict) + + +class PVMassSpecScanReader: + """A reader for mass spectra files exported from PVMassSpec ('... - Scan.dat')""" + + pass + + +def mass_from_column_name(mass): + """Return the PVMassSpec mass 'M' given the column name '_amu' as string""" + return f"M{mass[:-4]}" + + +def get_column_unit(column_name): + """Return the unit name of an ivium column, i.e what follows the first '/'.""" + unit_match = re.search(r"\((.*)\)$", column_name) + if unit_match: + return unit_match.group(1) + elif "amu" in column_name: + return "A" diff --git a/src/ixdat/readers/reading_tools.py b/src/ixdat/readers/reading_tools.py new file mode 100644 index 00000000..df0c2ee5 --- /dev/null +++ b/src/ixdat/readers/reading_tools.py @@ -0,0 +1,130 @@ +"""Module with possibly general-use tools for readers""" + +from pathlib import Path +import time +import urllib.request +from ..config import CFG +from ..exceptions import ReadError +from ..measurements import TimeSeries, ValueSeries, ConstantValue + + +STANDARD_TIMESTAMP_FORM = "%d/%m/%Y %H:%M:%S" # like '31/12/2020 23:59:59' +USA_TIMESTAMP_FORM = "%m/%d/%Y %H:%M:%S" # like '12/31/2020 23:59:59' +FLOAT_MATCH = "[-]?\\d+[\\.]?\\d*(?:e[-]?\\d+)?" # matches floats like '5' or '-2.3e5' + + +def timestamp_string_to_tstamp( + timestamp_string, + form=None, + forms=(STANDARD_TIMESTAMP_FORM,), +): + """Return the unix timestamp as a float by parsing timestamp_string + + Args: + timestamp_string (str): The timestamp as read in the .mpt file + form (str): The format string used by time.strptime (string-parse time). This is + optional and overrides `forms` if given. + forms (iter of str): The formats you want to try for time.strptime, defaults to + the standard timestamp form. + """ + if form: + forms = (form, ) + for form in forms: + try: + return time.mktime(time.strptime(timestamp_string, form)) + except ValueError: + continue + + raise ReadError(f"couldn't parse timestamp_string='{timestamp_string}')") + + +def prompt_for_tstamp(path_to_file, default="creation", form=STANDARD_TIMESTAMP_FORM): + """Return the tstamp resulting from a prompt to enter a timestamp, or a default + + Args: + path_to_file (Path): The file of the measurement that we're getting a tstamp for + default (str or float): What to use as the tstamp if the user does not enter one. + This can be a tstamp as a float, or "creation" to use the file creation time, + or "now" to use `time.time()`. + form (str): The specification string for the timestamp format. Defaults to + `ixdat.readers.reading_tools.STANDARD_TIMESTAMP_FORM` + """ + path_to_file = Path(path_to_file) + + if default == "creation": + default_tstamp = path_to_file.stat().st_mtime + elif default == "now": + default_tstamp = time.time() + elif type(default) in (int, float): + default_tstamp = default + else: + raise TypeError("`default` must be a number or 'creation' or 'now'.") + default_timestring = time.strftime(form, time.localtime(default_tstamp)) + + tstamp = None + timestamp_string = "Try at least once." + while timestamp_string: + timestamp_string = input( + f"Please input the timestamp for the measurement at {path_to_file}.\n" + f"Please use the format {form}.\n" + "Enter nothing to use the default default," + f" '{default}', which is '{default_timestring}'." + ) + if timestamp_string: + try: + tstamp = time.mktime(time.strptime(timestamp_string, form)) + except ValueError: + print( + f"Could not parse '{timestamp_string}' according as '{form}'.\n" + f"Try again or enter nothing to use the default." + ) + else: + break + return tstamp or default_tstamp + + +def series_list_from_dataframe( + dataframe, time_name, tstamp, unit_finding_function, **kwargs +): + """Return a list of DataSeries with the data in a pandas dataframe. + + Args: + dataframe (pandas dataframe): The dataframe. Column names are used as series + names, data is taken with series.to_numpy(). The dataframe can only have one + TimeSeries (if there are more than one, pandas is probably not the right + format anyway, since it requires columns be the same length). + time_name (str): The name of the column to use as the TimeSeries + tstamp (float): The timestamp + unit_finding_function (function): A function which takes a column name as a + string and returns its unit. + kwargs: Additional key-word arguments are interpreted as constants to include + in the data series list as `ConstantValue`s. + """ + t = dataframe[time_name].to_numpy() + tseries = TimeSeries(name=time_name, unit_name="s", data=t, tstamp=tstamp) + data_series_list = [tseries] + for column_name, series in dataframe.items(): + if column_name == time_name: + continue + data_series_list.append( + ValueSeries( + name=column_name, + unit_name=unit_finding_function(column_name), + data=series.to_numpy(), + tseries=tseries, + ) + ) + for key, value in kwargs.items(): + data_series_list.append( + ConstantValue(name=key, unit_name="", data=value, tseries=tseries) + ) + return data_series_list + + +def url_to_file(url, file_name="temp", directory=None): + """Copy the contents of the url to a temporary file and return that file's Path.""" + directory = directory or CFG.ixdat_temp_dir + suffix = "." + str(url).split(".")[-1] + path_to_file = (directory / file_name).with_suffix(suffix) + urllib.request.urlretrieve(url, path_to_file) + return path_to_file diff --git a/src/ixdat/readers/rgasoft.py b/src/ixdat/readers/rgasoft.py new file mode 100644 index 00000000..33b6e6a5 --- /dev/null +++ b/src/ixdat/readers/rgasoft.py @@ -0,0 +1,37 @@ +"""A reader for text exports from the potentiostat software of CH Instruments""" + +from EC_MS import Dataset +from .reading_tools import timestamp_string_to_tstamp +from .ec_ms_pkl import measurement_from_ec_ms_dataset +from ..techniques import MSMeasurement + + +class StanfordRGASoftReader: + path_to_file = None + + def read(self, path_to_file, cls=None): + """Read a .txt file exported by CH Instruments software. + + TODO: Write a new reader that doesn't use the old EC_MS package + + Args: + path_to_file (Path or str): The file to read + cls (Measurement subclass): The class to return. Defaults to ECMeasuremnt + """ + + # with open(path_to_file, "r") as f: + # timestamp_string = f.readline().strip() + # tstamp = timestamp_string_to_tstamp( + # timestamp_string, + # form="%b %d, %Y %I:%M:%S %p", # like "Mar 05, 2020 09:50:34 AM" + # ) # ^ For later. EC_MS actually gets this right. + + self.path_to_file = path_to_file + cls = cls if (cls and not issubclass(MSMeasurement, cls)) else MSMeasurement + ec_ms_dataset = Dataset( + path_to_file, + data_type="RGA", # tstamp=tstamp + ) + return measurement_from_ec_ms_dataset( + ec_ms_dataset.data, cls=cls, reader=self, technique="MS" + ) diff --git a/src/ixdat/readers/zilien.py b/src/ixdat/readers/zilien.py index df5fc02d..49fadd9b 100644 --- a/src/ixdat/readers/zilien.py +++ b/src/ixdat/readers/zilien.py @@ -1,9 +1,201 @@ -from . import TECHNIQUE_CLASSES +import re +import pandas as pd +import numpy as np +from pathlib import Path +from ..data_series import DataSeries, TimeSeries, ValueSeries, Field +from ..techniques import ECMSMeasurement, MSMeasurement, ECMeasurement, Measurement +from ..techniques.ms import MSSpectrum +from .reading_tools import timestamp_string_to_tstamp, FLOAT_MATCH +from .ec_ms_pkl import measurement_from_ec_ms_dataset -ECMSMeasruement = TECHNIQUE_CLASSES["EC-MS"] +ZILIEN_TIMESTAMP_FORM = "%Y-%m-%d %H_%M_%S" # like 2021-03-15 18_50_10 +ZILIEN_LEGACY_ALIASES = { + # TODO: These should change to what Zilien calls them. Right now the alias's + # reflect the way the lagacy EC_MS code renames essential series + "t": ["time/s"], + "raw_potential": ["Ewe/V", "/V"], + "raw_current": ["I/mA", "/mA"], + "cycle": ["cycle number"], +} + +# TODO: When, in the future, Zilien files include the whole EC dataset, remove the +# unflattering example presently in the docs. +# https://github.com/ixdat/ixdat/pull/30/files#r810087496 class ZilienTSVReader: - def read(self): - # return ECMSMeasurement(**obj_as_dict) - pass + """Class for reading files saved by Spectro Inlets' Zilien software""" + + def read(self, path_to_file, cls=None, name=None, **kwargs): + """Read a zilien file + + TODO: This is a hack using EC_MS to read the .tsv. Will be replaced. + """ + + from EC_MS import Zilien_Dataset + + if cls is Measurement: + cls = ECMSMeasurement + + if "technique" not in kwargs: + if issubclass(cls, ECMSMeasurement): + kwargs["technique"] = "EC-MS" + elif issubclass(cls, ECMeasurement): + kwargs["technique"] = "EC" + elif issubclass(cls, MSMeasurement): + kwargs["technique"] = "MS" + + ec_ms_dataset = Zilien_Dataset(path_to_file) + + return measurement_from_ec_ms_dataset( + ec_ms_dataset.data, + cls=cls, + name=name, + reader=self, + aliases=ZILIEN_LEGACY_ALIASES, + **kwargs, + ) + + +class ZilienTMPReader: + """A class for stitching the files in a Zilien tmp directory to an ECMSMeasurement + + This is necessary because Zilien often crashes, leaving only the tmp directory. + This is less advanced but more readable than the Spectro Inlets stitching solution. + """ + + def __init__(self, path_to_tmp_dir=None): + self.path_to_tmp_dir = Path(path_to_tmp_dir) if path_to_tmp_dir else None + + def read(self, path_to_tmp_dir, cls=None, **kwargs): + """Make a measurement from all the single-value .tsv files in a Zilien tmp dir + + Args: + path_to_tmp_dir (Path or str): The path to the tmp dir + cls (Measurement class): Defaults to ECMSMeasurement + """ + if path_to_tmp_dir: + self.path_to_tmp_dir = Path(path_to_tmp_dir) + cls = cls or ECMSMeasurement + name = self.path_to_tmp_dir.parent.name + timestamp_string = name[:19] # the zilien timestamp is the first 19 chars + tstamp = timestamp_string_to_tstamp( + timestamp_string, form=ZILIEN_TIMESTAMP_FORM + ) + series_list = [] + for tmp_file in self.path_to_tmp_dir.iterdir(): + series_list += series_list_from_tmp(tmp_file) + obj_as_dict = { + "name": name, + "tstamp": tstamp, + "series_list": series_list, + "technique": "EC-MS", + "reader": self, + } + obj_as_dict.update(kwargs) + return cls.from_dict(obj_as_dict) + + +def series_list_from_tmp(path_to_file): + """Return [ValueSeries, TimeSeries] with the data in a zilien tmp .tsv file""" + file_name = Path(path_to_file).name + timestamp_string = file_name[:19] # the zilien timestamp form is 19 chars long + tstamp = timestamp_string_to_tstamp(timestamp_string, form=ZILIEN_TIMESTAMP_FORM) + column_match = re.search(r"\.([^\.]+)\.data", file_name) + if not column_match: + print(f"could not find column name in {path_to_file}") + return [] + v_name = column_match.group(1) + mass_match = re.search("M[0-9]+", v_name) + if mass_match: + v_name = mass_match.group() + unit = "A" + else: + unit = None + t_name = v_name + "-x" + df = pd.read_csv(path_to_file, delimiter="\t", names=[t_name, v_name], header=0) + t_data, v_data = df[t_name].to_numpy(), df[v_name].to_numpy() + tseries = TimeSeries(name=t_name, unit_name="s", data=t_data, tstamp=tstamp) + vseries = ValueSeries(name=v_name, unit_name=unit, data=v_data, tseries=tseries) + return [tseries, vseries] + + +class ZilienSpectrumReader: + """A reader for individual Zilien spectra + TODO: A Zilien reader which loads all spectra at once in a SpectrumSeries object + """ + + def __init__(self, path_to_spectrum=None): + self.path_to_spectrum = Path(path_to_spectrum) if path_to_spectrum else None + + def read(self, path_to_spectrum, cls=None, **kwargs): + """Reat a Zilien spectrum. + FIXME: This reader was written hastily and could be designed better. + + Args: + path_to_tmp_dir (Path or str): the path to the tmp dir + cls (Spectrum class): Defaults to MSSpectrum + kwargs: Key-word arguments are passed on ultimately to cls.__init__ + """ + if path_to_spectrum: + self.path_to_spectrum = Path(path_to_spectrum) + cls = cls or MSSpectrum + df = pd.read_csv( + path_to_spectrum, + header=9, + delimiter="\t", + ) + x_name = "Mass [AMU]" + y_name = "Current [A]" + x = df[x_name].to_numpy() + y = df[y_name].to_numpy() + with open(self.path_to_spectrum, "r") as f: + for i in range(10): + line = f.readline() + if "Mass scan started at [s]" in line: + tstamp_match = re.search(FLOAT_MATCH, line) + tstamp = float(tstamp_match.group()) + xseries = DataSeries(data=x, name=x_name, unit_name="m/z") + tseries = TimeSeries( + data=np.array([0]), name="spectrum time / [s]", unit_name="s", tstamp=tstamp + ) + field = Field( + data=np.array([y]), + name=y_name, + unit_name="A", + axes_series=[xseries, tseries], + ) + obj_as_dict = { + "name": path_to_spectrum.name, + "technique": "MS", + "field": field, + "reader": self, + } + obj_as_dict.update(kwargs) + return cls.from_dict(obj_as_dict) + + +if __name__ == "__main__": + """Module demo here. + + To run this module in PyCharm, open Run Configuration and set + Module name = ixdat.readers.zilien, + and *not* + Script path = ... + """ + + from pathlib import Path + from ixdat.measurements import Measurement + + path_to_test_file = Path.home() / ( + "Dropbox/ixdat_resources/test_data/" + # "zilien_with_spectra/2021-02-01 14_50_40.tsv" + "zilien_with_ec/2021-02-01 17_44_12.tsv" + ) + + ecms_measurement = Measurement.read( + reader="zilien", + path_to_file=path_to_test_file, + ) + + ecms_measurement.plot_measurement() diff --git a/src/ixdat/spectra.py b/src/ixdat/spectra.py new file mode 100644 index 00000000..3e96c1be --- /dev/null +++ b/src/ixdat/spectra.py @@ -0,0 +1,360 @@ +"""Base classes for spectra and spectrum series""" + +import numpy as np +from .db import Saveable, PlaceHolderObject +from .data_series import DataSeries, TimeSeries, Field +from .exceptions import BuildError +from .plotters.spectrum_plotter import SpectrumPlotter, SpectrumSeriesPlotter + + +class Spectrum(Saveable): + """The Spectrum class. + + A spectrum is a data structure including one-dimensional arrays of x and y variables + of equal length. Typically, information about the state of a sample can be obtained + from a plot of y (e.g. absorbance OR intensity OR counts) vs x (e.g energy OR + wavelength OR angle OR mass-to-charge ratio). Even though in reality it takes time + to require a spectrum, a spectrum is considered to represent one instance in time. + + In ixdat, the data of a spectrum is organized into a Field, where the y-data is + considered to span a space defined by the x-data and the timestamp. If the x-data + has shape (N, ), then the y-data has shape (N, 1) to span the x-axis and the + single-point t axis. + + The Spectrum class makes the data in this field intuitively available. If spec + is a spectrum, spec.x and spec.y give access to the x and y data, respectively, + while spec.xseries and spec.yseries give the corresponding DataSeries. + """ + + table_name = "spectrum" + column_attrs = { + "name", + "technique", + "metadata", + "tstamp", + "sample_name", + "field_id", + } + child_attrs = ["fields"] + + def __init__( + self, + name, + technique="spectrum", + metadata=None, + sample_name=None, + reader=None, + tstamp=None, + field=None, + field_id=None, + ): + """Initiate a spectrum + + Args: + name (str): The name of the spectrum + metadata (dict): Free-form spectrum metadata. Must be json-compatible. + technique (str): The spectrum technique + sample_name (str): The sample name + reader (Reader): The reader, if read from file + tstamp (float): The unix epoch timestamp of the spectrum + field (Field): The Field containing the data (x, y, and tstamp) + field_id (id): The id in the data_series table of the Field with the data, + if the field is not yet loaded from backend. + """ + super().__init__() + self.name = name + self.technique = technique + self.metadata = metadata + self.tstamp = tstamp + self.sample_name = sample_name + self.reader = reader + # Note: the PlaceHolderObject can be initiated without the backend because + # if field_id is provided, then the relevant backend is the active one, + # which PlaceHolderObject uses by default. + self._field = field or PlaceHolderObject(field_id, cls=Field) + + self.plotter = SpectrumPlotter(spectrum=self) + # defining this method here gets it the right docstrings :D + self.plot = self.plotter.plot + + @classmethod + def read(cls, path_to_file, reader, **kwargs): + """Return a Measurement object from parsing a file with the specified reader + + Args: + path_to_file (Path or str): The path to the file to read + reader (str or Reader class): The (name of the) reader to read the file with. + kwargs: key-word arguments are passed on to the reader's read() method. + """ + if isinstance(reader, str): + # TODO: see if there isn't a way to put the import at the top of the module. + # see: https://github.com/ixdat/ixdat/pull/1#discussion_r546437471 + from .readers import READER_CLASSES + + reader = READER_CLASSES[reader]() + # print(f"{__name__}. cls={cls}") # debugging + return reader.read(path_to_file, cls=cls, **kwargs) + + @property + def data_objects(self): + """The data-containing objects that need to be saved when the spectrum is saved. + + For a field to be correctly saved and loaded, its axes_series must be saved + first. So there are three series in the data_objects to return + FIXME: with backend-specifying id's, field could check for itself whether + FIXME: its axes_series are already in the database. + """ + return self.series_list + + @classmethod + def from_data( + cls, + x, + y, + tstamp=None, + x_name="x", + y_name="y", + x_unit_name=None, + y_unit_name=None, + **kwargs, + ): + """Initiate a spectrum from data. Does so via cls.from_series + + Args: + x (np array): x data + y (np array): y data + tstamp (timestamp): The timestamp of the spectrum. Defaults to None. + x_name (str): Name of the x variable. Defaults to 'x' + y_name (str): Name of the y variable. Defaults to 'y' + x_unit_name (str): Name of the x unit. Defaults to None + y_unit_name (str): Name of the y unit. Defaults to None + kwargs: Key-word arguments are passed on ultimately to cls.__init__ + """ + xseries = DataSeries(data=x, name=x_name, unit_name=x_unit_name) + yseries = DataSeries(data=y, name=y_name, unit_name=y_unit_name) + return cls.from_series(xseries, yseries, tstamp, **kwargs) + + @classmethod + def from_series(cls, xseries, yseries, tstamp, **kwargs): + """Initiate a spectrum from data. Does so via cls.from_field + + Args: + xseries (DataSeries): A series with the x data + yseries (DataSeries): A series with the y data. The y data should be a + vector of the same length as the x data. + tstamp (timestamp): The timestamp of the spectrum. Defaults to None. + kwargs: Key-word arguments are passed on ultimately to cls.__init__ + """ + field = Field( + data=yseries.data, + axes_series=[xseries], + name=yseries.name, + unit_name=yseries.unit_name, + ) + kwargs.update(tstamp=tstamp) + return cls.from_field(field, **kwargs) + + @classmethod + def from_field(cls, field, **kwargs): + """Initiate a spectrum from data. Does so via cls.from_field + + Args: + field (Field): The field containing all the data of the spectrum. + field.data is the y-data, which is considered to span x and t. + field.axes_series[0] is a DataSeries with the x data. + field.axes_series[1] is a TimeSeries with one time point. + kwargs: key-word arguments are passed on ultimately to cls.__init__ + """ + spectrum_as_dict = kwargs + spectrum_as_dict["field"] = field + if "name" not in spectrum_as_dict: + spectrum_as_dict["name"] = field.name + return cls.from_dict(spectrum_as_dict) + + @property + def field(self): + """Since a spectrum can be loaded lazily, we make sure the field is loaded""" + if isinstance(self._field, PlaceHolderObject): + self._field = self._field.get_object() + return self._field + + @property + def fields(self): + return [self.field] + + @property + def field_id(self): + """The id of the field""" + return self.field.id + + @property + def xseries(self): + """The x DataSeries is the first axis of the field""" + return self.field.axes_series[0] + + @property + def series_list(self): + """A Spectrum's series list includes its field and its axes_series.""" + return [self.field] + self.field.axes_series + + @property + def x(self): + """The x data is the data attribute of the xseries""" + return self.xseries.data + + @property + def x_name(self): + """The name of the x variable is the name attribute of the xseries""" + return self.xseries.name + + @property + def yseries(self): + """The yseries is a DataSeries reduction of the field""" + return DataSeries( + name=self.field.name, data=self.y, unit_name=self.field.unit_name + ) + + @property + def y(self): + """The y data is the one-dimensional data attribute of the field""" + return self.field.data[0] + + @property + def y_name(self): + """The name of the y variable is the name attribute of the field""" + return self.field.name + + @property + def tseries(self): + """The TimeSeries of a spectrum is a single point [0] and its tstamp""" + return TimeSeries( + name="time / [s]", unit_name="s", data=np.array([0]), tstamp=self.tstamp + ) + + def __add__(self, other): + """Adding spectra makes a (2)x(N_x) SpectrumSeries. self comes before other.""" + if not self.x == other.x: # FIXME: Some depreciation here. How else? + raise BuildError( + "can't add spectra with different `x`. " + "Consider the function `append_spectra` instead." + ) + t = np.array([0, other.tstamp - self.tstamp]) + tseries = TimeSeries( + name="time / [s]", unit_name="s", data=t, tstamp=self.tstamp + ) + new_field = Field( + name=self.name, + unit_name=self.field.unit_name, + data=np.array([self.y, other.y]), + axes_series=[tseries, self.xseries], + ) + spectrum_series_as_dict = self.as_dict() + spectrum_series_as_dict["field"] = new_field + del spectrum_series_as_dict["field_id"] + + return SpectrumSeries.from_dict(spectrum_series_as_dict) + + +class SpectrumSeries(Spectrum): + """The SpectrumSeries class. + + A spectrum series is a data structure including a two-dimensional array, each row of + which is a spectrum, and each column of which is one spot in the spectrum as it + changes with some other variable. + + In ixdat, the data of a spectrum series is organized into a Field, where the y-data + is considered to span a space defined by a DataSeries which is the x data, and a + DataSeries (typically a TimeSeries) which enumerates or specifies when or under + which conditions each spectrum was taken. The spectrum series will consider this + its "time" variable even if it is not actually time. + + The SpectrumSeries class makes the data in this field intuitively available. If + spec is a spectrum series, spec.x is the x data with shape (N, ), spec.t is the + time data with shape (M, ), and spec.y is the spectrum data with shape (M, N). + """ + + def __init__(self, *args, **kwargs): + """Initiate a spectrum series + + Args: + name (str): The name of the spectrum series + metadata (dict): Free-form spectrum metadata. Must be json-compatible. + technique (str): The spectrum technique + sample_name (str): The sample name + reader (Reader): The reader, if read from file + tstamp (float): The unix epoch timestamp of the spectrum + field (Field): The Field containing the data (x, y, and tstamp) + field_id (id): The id in the data_series table of the Field with the data, + if the field is not yet loaded from backend. + """ + if "technique" not in kwargs: + kwargs["technique"] = "spectra" + super().__init__(*args, **kwargs) + self.plotter = SpectrumSeriesPlotter(spectrum_series=self) + + @property + def yseries(self): + # Should this return an average or would that be counterintuitive? + raise BuildError(f"{self} has no single y-series. Index it to get a Spectrum " + "or see `y_average`") + + @property + def tseries(self): + """The TimeSeries of a SectrumSeries is the 0'th axis of its field. + Note that its data is not sorted! + """ + return self.field.axes_series[0] + + @property + def t(self): + """The time array of a SectrumSeries is the data of its tseries. + Note that it it is not sorted! + """ + return self.tseries.data + + @property + def t_name(self): + """The name of the time variable of the spectrum series""" + return self.tseries.name + + @property + def xseries(self): + """The x-axis DataSeries of a SectrumSeries is the 1'st axis of its field""" + return self.field.axes_series[1] + + @property + def x(self): + """The x (scanning variable) data""" + return self.xseries.data + + @property + def x_name(self): + """The name of the scanning variable""" + return self.xseries.name + + @property + def y(self): + """The y data is the multi-dimensional data attribute of the field""" + return self.field.data + + def __getitem__(self, key): + """Indexing a SpectrumSeries with an int n returns its n'th spectrum""" + if isinstance(key, int): + spectrum_as_dict = self.as_dict() + del spectrum_as_dict["field_id"] + spectrum_as_dict["field"] = Field( + # note that it's important in some cases that the spectrum does not have + # the same name as the spectrum series: + name=self.y_name + "_" + str(key), + unit_name=self.field.unit_name, + data=self.y[key], + axes_series=[self.xseries], + ) + spectrum_as_dict["tstamp"] = self.tstamp + self.t[key] + return Spectrum.from_dict(spectrum_as_dict) + raise KeyError + + @property + def y_average(self): + """The y-data of the average spectrum""" + return np.mean(self.y, axis=0) diff --git a/src/ixdat/techniques/__init__.py b/src/ixdat/techniques/__init__.py index 8b6abcf3..0dc10e61 100644 --- a/src/ixdat/techniques/__init__.py +++ b/src/ixdat/techniques/__init__.py @@ -8,9 +8,10 @@ """ from .ec import ECMeasurement, ECCalibration -from .cv import CyclicVoltammagram -from .ms import MSMeasurement -from .ec_ms import ECMSMeasurement +from .cv import CyclicVoltammogram +from .ms import MSMeasurement, MSCalibration +from .ec_ms import ECMSMeasurement, ECMSCalibration +from .spectroelectrochemistry import SpectroECMeasurement from ..measurements import Measurement # for importing in the technique modules # TODO: Is something like DecoMeasurement a Measurement or something else? @@ -18,9 +19,14 @@ TECHNIQUE_CLASSES = { "simple": Measurement, "EC": ECMeasurement, - "CV": CyclicVoltammagram, + "CV": CyclicVoltammogram, "MS": MSMeasurement, "EC-MS": ECMSMeasurement, + "S-EC": SpectroECMeasurement, } -CALIBRATION_CLASSES = {"EC": ECCalibration} +CALIBRATION_CLASSES = { + "EC": ECCalibration, + "MS": MSCalibration, + "EC-MS": ECMSCalibration, +} diff --git a/src/ixdat/techniques/analysis_tools.py b/src/ixdat/techniques/analysis_tools.py new file mode 100644 index 00000000..3b937390 --- /dev/null +++ b/src/ixdat/techniques/analysis_tools.py @@ -0,0 +1,199 @@ +"""Miscellaneous tools for data analysis used in measurement techniques""" + +import numpy as np +from scipy.optimize import minimize + + +def tspan_passing_through(t, v, vspan, direction=None, t_i=None, v_res=None): + """Return the tspan corresponding to t when v first passes through vspan + + Args: + t (np.array): independent varible data (usually time) + v (np.array): dependent variable data + vspan (iter of float): The range of v that we are interested in. + direction (bool): Whether v should be increasing (True) or decreasing + (False) as it passes through vspan. By default, the direction is + defined by whether vspan is increasing or decreasing. + t_i (float): The lowest value of t acceptable for tspan. Optional. + v_res (float): The uncertainty or resolution of the v data. v must be + at in or out of vspan by at least v_res to be considered in or out. + """ + + t_i = t_i if t_i is not None else t[0] - 1 + + # define some things to generalize between anodic and cathodic + if direction is None: + direction = vspan[0] < vspan[-1] + + v_res = v_res if v_res is not None else np.abs(vspan[-1] - vspan[0]) / 100 + v_res = np.abs(v_res) if direction else -np.abs(v_res) + + def before(a, b): + if direction: + # before means more cathodic if we want the anodic sweep + return a < b + else: + # and more anodic if we want the cathodic sweep + return a > b + + if direction: + # we start with the lower limit of V_span if we want the anodic sweep + vspan = np.sort(np.array(vspan)) + else: + # and with the upper limit of V_span if we want the cathodic sweep + vspan = -np.sort(-np.array(vspan)) + + t_before = t[ + np.argmax( + np.logical_and( + t > t_i, before(v, vspan[0] - v_res) + ) # True if after t_i and comfortably out on start side + ) # first index for which V is comfortably out on start side + ] # corresponding time + t_just_before = t[ + np.argmax( + np.logical_and( + t > t_before, np.logical_not(before(v, vspan[0])) + ) # True if after t_i and in on start side + ) + - 1 # last index for which V is out on start side + ] # corresponding time + i_start = np.argmax(np.logical_and(t > t_just_before, before(vspan[0], v))) + # ^ first index of full sweep through range + t_start = t[i_start] + # ^ corresponding time + i_finish = np.argmax(np.logical_and(t > t_start, before(vspan[1], v))) - 1 + # ^ last index of full sweep through range + t_finish = t[i_finish] + # ^ corresponding time + return [t_start, t_finish] + + +def calc_sharp_v_scan(t, v, res_points=10): + """Calculate the discontinuous rate of change of v with respect to t + + Args: + t (np.array): the data of the independent variable, typically time + v (np.array): the data of the dependent variable + res_points (int): the resolution in data points, i.e. the spacing used in + the slope equation v_scan = (v2 - v1) / (t2 - t1) + """ + # the scan rate is dV/dt. This is a numerical calculation of dV/dt: + v_behind = np.append(np.tile(v[0], res_points), v[:-res_points]) + v_ahead = np.append(v[res_points:], np.tile(v[-1], res_points)) + + t_behind = np.append(np.tile(t[0], res_points), t[:-res_points]) + t_ahead = np.append(t[res_points:], np.tile(t[-1], res_points)) + + v_scan_middle = (v_ahead - v_behind) / (t_ahead - t_behind) + # ^ this is "softened" at the anodic and cathodic turns. + + # We can "sharpen" it by selectively looking ahead and behind: + v_scan_behind = (v - v_behind) / (t - t_behind) + v_scan_ahead = (v_ahead - v) / (t_ahead - t) + + # but this gives problems right at the beginning, so set those to zeros + v_scan_behind[:res_points] = np.zeros(res_points) + v_scan_ahead[-res_points:] = np.zeros(res_points) + + # now sharpen the scan rate! + v_scan = v_scan_middle + mask_use_ahead = np.logical_and( + np.abs(v_scan_ahead) > np.abs(v_scan), + np.abs(v_scan_ahead) > np.abs(v_scan_behind), + ) + v_scan[mask_use_ahead] = v_scan_ahead[mask_use_ahead] + + mask_use_behind = np.logical_and( + np.abs(v_scan_behind) > np.abs(v_scan), + np.abs(v_scan_behind) > np.abs(v_scan_ahead), + ) + v_scan[mask_use_behind] = v_scan_behind[mask_use_behind] + + return v_scan + + +def find_signed_sections(x, x_res=0.001, res_points=10): + """Return list of tuples ((i_start, i_finish), section_type) describing the vector x + + `i_start` and `i_finish` are indexes in x defining where sections start and end. + `section_type` can be "positive" (x>0), "negative" (x<0) or "zero" (x~0). + + Args: + x (np array): The data as a vector + x_res (float): The minimum value in x to be considered different from zero, + i.e. the uncertainty or resolution of the data + res_points (int): The minimum number of consecutive data points in x that must + have the same sign (or be ~0) to constitute a section of the data. + """ + mask_negative = x < -x_res + mask_positive = x > x_res + mask_zero = abs(x) < x_res + + section_types = ["negative", "positive", "zero"] + the_masks = [mask_negative, mask_positive, mask_zero] + + for mask in the_masks: + mask[-2] = False + mask[-1] = True + N = len(x) + i_start = 0 + i_finish = 0 + n_sweep = 0 + + the_next_starts = [np.argmax(mask) for mask in the_masks] + section_id = int(np.argmin(the_next_starts)) + + sections = [] + while i_start < N - 1: + I_out = np.argmin(the_masks[section_id][i_finish:]) + the_next_start = i_finish + I_out + res_points + + try: + I_in_again = np.argmax(the_masks[section_id][the_next_start:]) + except ValueError: + the_next_starts[section_id] = N + else: + the_next_starts[section_id] = the_next_start + I_in_again + # ^ and add it. + + next_section_id = int(np.argmin(the_next_starts)) + i_finish = the_next_starts[next_section_id] + + if next_section_id != section_id: + sections.append(((i_start, i_finish), section_types[section_id])) + section_id = next_section_id + n_sweep += 1 + i_start = i_finish + else: + i_start += res_points + + return sections + + +def calc_t_using_scan_rate(v, dvdt): + """Return a numpy array describing the time corresponding to v given scan rate dvdt + + This is useful for data sets where time is missing. It depends on another value + having a constant absolute rate of change (such as electrode potential in cyclic + voltammatry). + It uses the `calc_sharp_v_scan` algorithm to match the scan rate implied by the + timevector returned with the given scan rate. + Args: + v (np array): The value + dvdt (float): The scan rate in units of v's unit per second + Returns: + np array: t, the time vector corresponding to v + """ + + def error(t_tot): + t = np.linspace(0, t_tot[0], v.size) + dvdt_calc = np.abs(calc_sharp_v_scan(t, v)) + error = np.sum(dvdt_calc ** 2 - dvdt ** 2) + return error + + t_total_guess = (max(v) - min(v)) / dvdt + result = minimize(error, np.array(t_total_guess)) + + t_total = result.x[0] + return np.linspace(0, t_total, v.size) diff --git a/src/ixdat/techniques/cv.py b/src/ixdat/techniques/cv.py index c402dd02..9026b140 100644 --- a/src/ixdat/techniques/cv.py +++ b/src/ixdat/techniques/cv.py @@ -1,19 +1,27 @@ import numpy as np from .ec import ECMeasurement -from ..data_series import ValueSeries +from ..data_series import ValueSeries, TimeSeries +from ..exceptions import BuildError, SeriesNotFoundError +from .analysis_tools import ( + tspan_passing_through, + calc_sharp_v_scan, + find_signed_sections, +) +from ..plotters.ec_plotter import CVDiffPlotter -class CyclicVoltammagram(ECMeasurement): - """Class for cyclic voltammatry measurements. +class CyclicVoltammogram(ECMeasurement): + """Class for cyclic voltammetry measurements. Onto ECMeasurement, this adds: - a property `cycle` which is a ValueSeries on the same TimeSeries as potential, - which counts cycles. "cycle" becomes the Measurement's `sel_str`. Indexing with - integer or iterable selects according to `cycle`. + which counts cycles. "cycle" becomes the Measurement's `sel_str`. Indexing with + integer or iterable selects according to `cycle`. - functions for quantitatively comparing cycles (like a stripping cycle, base cycle) - the default plot() is plot_vs_potential() """ + essential_series_names = ("t", "raw_potential", "raw_current", "cycle") selector_name = "cycle" """Name of the default selector""" @@ -22,18 +30,24 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.plot = self.plotter.plot_vs_potential # gets the right docstrings! :D + try: + _ = self["cycle"] + except SeriesNotFoundError: + median_potential = 1 / 2 * (np.max(self.v) + np.min(self.v)) + self.redefine_cycle(start_potential=median_potential, redox=True) + self.start_potential = None # see `redefine_cycle` self.redox = None # see `redefine_cycle` def __getitem__(self, key): - """Given int list or slice key, return a CyclicVoltammagram with those cycles""" + """Given int list or slice key, return a CyclicVoltammogram with those cycles""" if isinstance(key, slice): start, stop, step = key.start, key.stop, key.step if step is None: step = 1 key = list(range(start, stop, step)) if isinstance(key, (int, list)): - if type(key) is list and not all([type(i) is int for i in key]): + if isinstance(key, list) and not all([isinstance(i, int) for i in key]): print("can't get an item of type list unless all elements are int") print(f"you tried to get key = {key}.") raise AttributeError @@ -106,3 +120,218 @@ def redefine_cycle(self, start_potential=None, redox=None, N_points=5): tseries=self.potential.tseries, ) self.replace_series("cycle", new_cycle_series) + + def select_sweep(self, vspan, t_i=None): + """Return the cut of the CV for which the potential is sweeping through vspan + + Args: + vspan (iter of float): The range of self.potential for which to select data. + Vspan defines the direction of the sweep. If vspan[0] < vspan[-1], an + oxidative sweep is returned, i.e. one where potential is increasing. + If vspan[-1] < vspan[0], a reductive sweep is returned. + t_i (float): Optional. Time before which the sweep can't start + """ + tspan = tspan_passing_through( + t=self.t, + v=self.v, + vspan=vspan, + t_i=t_i, + ) + return self.cut(tspan=tspan) + + def integrate(self, item, tspan=None, vspan=None, ax=None): + """Return the time integral of item while time in tspan or potential in vspan + + Args: + item (str): The name of the ValueSeries to integrate + tspan (iter of float): A time interval over which to integrate it + vspan (iter of float): A potential interval over which to integrate it + """ + if vspan: + return self.select_sweep( + vspan=vspan, t_i=tspan[0] if tspan else None + ).integrate(item, ax=ax) + return super().integrate(item, tspan, ax=ax) + + @property + def scan_rate(self, res_points=10): + """The scan rate as a ValueSeries""" + t, v = self.grab("potential") + scan_rate_vec = calc_sharp_v_scan(t, v, res_points=res_points) + scan_rate_series = ValueSeries( + name="scan rate", + unit_name="V/s", # TODO: unit = potential.unit / potential.tseries.unit + data=scan_rate_vec, + tseries=self.potential.tseries, + ) + # TODO: cache'ing, index accessibility + return scan_rate_series + + def get_timed_sweeps(self, v_scan_res=5e-4, res_points=10): + """Return list of [(tspan, type)] for all the potential sweeps in self. + + There are three types: "anodic" (positive scan rate), "cathodic" (negative scan + rate), and "hold" (zero scan rate) + + Args: + v_scan_res (float): The minimum scan rate considered significantly different + than zero, in [V/s]. Defaults to 5e-4 V/s (0.5 mV/s). May need be higher + for noisy potential, and lower for very low scan rates. + res_points (int): The minimum number of points to be considered a sweep. + During a sweep, a potential difference of at least `v_res` should be + scanned through every `res_points` points. + """ + t = self.t + ec_sweep_types = { + "positive": "anodic", + "negative": "cathodic", + "zero": "hold", + } + indexed_sweeps = find_signed_sections( + self.scan_rate.data, x_res=v_scan_res, res_points=res_points + ) + timed_sweeps = [] + for (i_start, i_finish), general_sweep_type in indexed_sweeps: + timed_sweeps.append( + ((t[i_start], t[i_finish]), ec_sweep_types[general_sweep_type]) + ) + return timed_sweeps + + def calc_capacitance(self, vspan): + """Return the capacitance in [F], calculated by the first sweeps through vspan + + Args: + vspan (iter of floats): The potential range in [V] to use for capacitance + """ + sweep_1 = self.select_sweep(vspan) + v_scan_1 = np.mean(sweep_1.grab("scan_rate")[1]) # [V/s] + I_1 = np.mean(sweep_1.grab("raw_current")[1]) # [mA] -> [A] + + sweep_2 = self.select_sweep([vspan[-1], vspan[0]]) + v_scan_2 = np.mean(sweep_2.grab("scan_rate")[1]) # [V/s] + I_2 = np.mean(sweep_2.grab("raw_current")[1]) * 1e-3 # [mA] -> [A] + + cap = 1 / 2 * (I_1 / v_scan_1 + I_2 / v_scan_2) # [A] / [V/s] = [C/V] = [F] + return cap + + def diff_with(self, other, v_list=None, cls=None, v_scan_res=0.001, res_points=10): + """Return a CyclicVotammagramDiff of this CyclicVotammagram with another one + + Each anodic and cathodic sweep in other is lined up with a corresponding sweep + in self. Each variable given in v_list (defaults to just "current") is + interpolated onto self's potential and subtracted from self. + + Args: + other (CyclicVoltammogram): The cyclic voltammogram to subtract from self. + v_list (list of str): The names of the series to calculate a difference + between self and other for (defaults to just "current"). + cls (ECMeasurement subclass): The class to return an object of. Defaults to + CyclicVoltammogramDiff. + v_scan_res (float): see :meth:`get_timed_sweeps` + res_points (int): see :meth:`get_timed_sweeps` + """ + + vseries = self.potential + tseries = vseries.tseries + series_list = [tseries, self["raw_potential"], self["cycle"]] + + v_list = v_list or ["current", "raw_current"] + if "potential" in v_list: + raise BuildError( + f"v_list={v_list} is invalid. 'potential' is used to interpolate." + ) + + my_sweep_specs = [ + spec + for spec in self.get_timed_sweeps( + v_scan_res=v_scan_res, res_points=res_points + ) + if spec[1] in ["anodic", "cathodic"] + ] + others_sweep_specs = [ + spec + for spec in other.get_timed_sweeps( + v_scan_res=v_scan_res, res_points=res_points + ) + if spec[1] in ["anodic", "cathodic"] + ] + if not len(my_sweep_specs) == len(others_sweep_specs): + raise BuildError( + "Can only make diff of CyclicVoltammograms with same number of sweeps." + f"{self} has {my_sweep_specs} and {other} has {others_sweep_specs}." + ) + + diff_values = {name: np.array([]) for name in v_list} + t_diff = np.array([]) + + for my_spec, other_spec in zip(my_sweep_specs, others_sweep_specs): + sweep_type = my_spec[1] + if not other_spec[1] == sweep_type: + raise BuildError( + "Corresponding sweeps must be of same type when making diff." + f"Can't align {self}'s {my_spec} with {other}'s {other_spec}." + ) + my_tspan = my_spec[0] + other_tspan = other_spec[0] + my_t, my_potential = self.grab( + "potential", my_tspan, include_endpoints=False + ) + t_diff = np.append(t_diff, my_t) + other_t, other_potential = other.grab( + "potential", other_tspan, include_endpoints=False + ) + if sweep_type == "anodic": + other_t_interp = np.interp( + np.sort(my_potential), np.sort(other_potential), other_t + ) + elif sweep_type == "cathodic": + other_t_interp = np.interp( + np.sort(-my_potential), np.sort(-other_potential), other_t + ) + else: + continue + for name in v_list: + my_v = self.grab_for_t(name, my_t) + other_v = other.grab_for_t(name, other_t_interp) + diff_v = my_v - other_v + diff_values[name] = np.append(diff_values[name], diff_v) + + t_diff_series = TimeSeries( + name="time/[s] for diffs", unit_name="s", data=t_diff, tstamp=self.tstamp + ) # I think this is the same as self.potential.tseries + + series_list.append(t_diff_series) + for name, data in diff_values.items(): + series_list.append( + ValueSeries( + name=name, + unit_name=self[name].unit_name, + data=data, + tseries=t_diff_series, + ) + ) + + diff_as_dict = self.as_dict() + del diff_as_dict["s_ids"] + + diff_as_dict["series_list"] = series_list + + cls = cls or CyclicVoltammogramDiff + diff = cls.from_dict(diff_as_dict) + # TODO: pass cv_compare_1 and cv_compare_2 to CyclicVoltammogramDiff as dicts + diff.cv_compare_1 = self + diff.cv_compare_2 = other + return diff + + +class CyclicVoltammogramDiff(CyclicVoltammogram): + + default_plotter = CVDiffPlotter + cv_compare_1 = None + cv_compare_2 = None + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.plot = self.plotter.plot + self.plot_diff = self.plotter.plot_diff + self.plotter = CVDiffPlotter(measurement=self) diff --git a/src/ixdat/techniques/deconvolution.py b/src/ixdat/techniques/deconvolution.py new file mode 100644 index 00000000..f06f2842 --- /dev/null +++ b/src/ixdat/techniques/deconvolution.py @@ -0,0 +1,246 @@ +"""Module for deconvolution of mass transport effects.""" + +from .ec_ms import ECMSMeasurement +from scipy.optimize import curve_fit # noqa +from scipy.interpolate import interp1d # noqa +from scipy import signal # noqa +from mpmath import invertlaplace, sinh, cosh, sqrt, exp, erfc, pi, tanh, coth # noqa +import matplotlib.pyplot as plt +from numpy.fft import fft, ifft, ifftshift, fftfreq # noqa +import numpy as np + +# FIXME: too much abbreviation in this module. + + +class DecoMeasurement(ECMSMeasurement): + """Class implementing deconvolution of EC-MS data""" + + def __init__(self, name, **kwargs): + """Initialize a deconvolution EC-MS measurement + + Args: + name (str): The name of the measurement""" + super().__init__(name, **kwargs) + + def grab_partial_current( + self, signal_name, kernel_obj, tspan=None, tspan_bg=None, snr=10 + ): + """Return the deconvoluted partial current for a given signal + + Args: + signal_name (str): Name of signal for which deconvolution is to + be carried out. + kernel_obj (Kernel): Kernel object which contains the mass transport + parameters + tspan (list): Timespan for which the partial current is returned. + tspan_bg (list): Timespan that corresponds to the background signal. + snr (int): signal-to-noise ratio used for Wiener deconvolution. + """ + # TODO: comments in this method so someone can tell what's going on! + + t_sig, v_sig = self.grab_cal_signal(signal_name, tspan=tspan, tspan_bg=tspan_bg) + + kernel = kernel_obj.calculate_kernel( + dt=t_sig[1] - t_sig[0], duration=t_sig[-1] - t_sig[0] + ) + kernel = np.hstack((kernel, np.zeros(len(v_sig) - len(kernel)))) + H = fft(kernel) + # TODO: store this as well. + partial_current = np.real( + ifft(fft(v_sig) * np.conj(H) / (H * np.conj(H) + (1 / snr) ** 2)) + ) + partial_current = partial_current * sum(kernel) + return t_sig, partial_current + + def extract_kernel(self, signal_name, cutoff_pot=0, tspan=None, tspan_bg=None): + """Extracts a Kernel object from a measurement. + + Args: + signal_name (str): Signal name from which the kernel/impule + response is to be extracted. + cutoff_pot (int): Potential which the defines the onset of the + impulse. Must be larger than the resting potential before the + impulse. + tspan(list): Timespan from which the kernel/impulse response is + extracted. + tspan_bg (list): Timespan that corresponds to the background signal. + """ + x_curr, y_curr = self.grab_current(tspan=tspan) + x_pot, y_pot = self.grab_potential(tspan=tspan) + x_sig, y_sig = self.grab_signal(signal_name, tspan=tspan, tspan_bg=tspan_bg) + + if signal_name == "M32": + t0 = x_curr[np.argmax(y_pot > cutoff_pot)] # time of impulse + elif signal_name == "M2" or signal_name == "M17": + t0 = x_curr[np.argmax(y_pot < cutoff_pot)] + else: + print("mass not found") + + x_sig = x_sig - t0 + + y_sig = y_sig[x_sig > 0] + x_sig = x_sig[x_sig > 0] + + y_curr = y_curr[x_curr > t0] + x_curr = x_curr[x_curr > t0] + y_pot = y_pot[x_pot > t0] + x_pot = x_pot[x_pot > t0] + + kernel = Kernel( + MS_data=np.array([x_sig, y_sig]), + EC_data=np.array([x_curr, y_curr, x_pot, y_pot]), + ) + + return kernel + + +class Kernel: + """Kernel class implementing datatreatment of kernel/impulse response data.""" + + # TODO: Make class inherit from Measurement, add properties to store kernel + # TODO: Reference equations to paper. + def __init__( + self, + parameters={}, # FIXME: no mutable default arguments! + MS_data=None, + EC_data=None, + ): + """Initializes a Kernel object either in functional form by defining the + mass transport parameters or in the measured form by passing of EC-MS + data. + + Args: + parameters (dict): Dictionary containing the mass transport + parameters with the following keys: + diff_const: Diffusion constant in liquid + work_dist: Working distance between electrode and gas/liq interface + vol_gas: Gas sampling volume of the chip + volflow_cap: Volumetric capillary flow + henry_vola: Dimensionless Henry volatility + MS_data (list): List of numpy arrays containing the MS signal + data. + EC_data (list): List of numpy arrays containing the EC (time, + current, potential). + """ + + if MS_data and parameters: # TODO: Make two different classes + raise Exception( + "Kernel can only be initialized with data OR parameters, not both" + ) + if EC_data and MS_data: + print("Generating kernel from measured data") + self.type = "measured" + elif parameters: + print("Generating kernel from parameters") + self.type = "functional" + else: + print("Generating blank kernel") + self.type = None + + self.params = parameters + self.MS_data = MS_data + self.EC_data = EC_data # x_curr, y_curr, x_pot, y_pot + + @property + def sig_area(self): + """Integrates a measured impulse response and returns the area.""" + delta_sig = self.MS_data[1] - self.MS_data[1][-1] + sig_area = np.trapz(delta_sig, self.MS_data[0]) + + return sig_area + + @property + def charge(self): + """Integrates the measured current over the time.""" + y_curr = self.EC_data[1] + + mask = np.isclose(y_curr, y_curr[0], rtol=1e-1) + + Q = np.trapz(y_curr[mask], self.EC_data[0][mask]) + + return Q + + def plot(self, dt=0.1, duration=100, ax=None, norm=True, **kwargs): + """Returns a plot of the kernel/impulse response.""" + if ax is None: + fig1 = plt.figure() + ax = fig1.add_subplot(111) + + if self.type == "functional": + t_kernel = np.arange(0, duration, dt) + ax.plot( + t_kernel, + self.calculate_kernel(dt=dt, duration=duration, norm=norm), + **kwargs, + ) + + elif self.type == "measured": + ax.plot( + self.MS_data[0], + self.calculate_kernel(dt=dt, duration=duration, norm=norm), + **kwargs, + ) + + else: + raise Exception("Nothing to plot with blank kernel") + + return ax + + def calculate_kernel(self, dt=0.1, duration=100, norm=True, matrix=False): + """Calculates a kernel/impulse response. + + Args: + dt (int): Timestep for which the kernel/impulse response is calculated. + Has to match the timestep of the measured data for deconvolution. + duration(int): Duration in seconds for which the kernel/impulse response is + calculated. Must be long enough to reach zero. + norm (bool): If true the kernel/impulse response is normalized to its + area. + matrix (bool): If true the circulant matrix constructed from the kernel/ + impulse reponse is returned. + """ + if self.type == "functional": + + t_kernel = np.arange(0, duration, dt) + t_kernel[0] = 1e-6 + + diff_const = self.params["diff_const"] + work_dist = self.params["work_dist"] + vol_gas = self.params["vol_gas"] + volflow_cap = self.params["volflow_cap"] + henry_vola = self.params["henry_vola"] + + tdiff = t_kernel * diff_const / (work_dist ** 2) + + def fs(s): + # See Krempl et al, 2021. Equation 6. + # https://pubs.acs.org/doi/abs/10.1021/acs.analchem.1c00110 + return 1 / ( + sqrt(s) * sinh(sqrt(s)) + + (vol_gas * henry_vola / 0.196e-4 / work_dist) + * (s + volflow_cap / vol_gas * work_dist ** 2 / diff_const) + * cosh(sqrt(s)) + ) + + kernel = np.zeros(len(t_kernel)) + for i in range(len(t_kernel)): + kernel[i] = invertlaplace(fs, tdiff[i], method="talbot") + print(tdiff[i]) + print(kernel[i]) + + elif self.type == "measured": + kernel = self.MS_data[1] + t_kernel = self.MS_data[0] + + if norm: + area = np.trapz(kernel, t_kernel) + kernel = kernel / area + + if matrix: + kernel = np.tile(kernel, (len(kernel), 1)) + i = 1 + while i < len(t_kernel): + kernel[i] = np.concatenate((kernel[0][i:], kernel[0][:i])) + i = i + 1 + + return kernel diff --git a/src/ixdat/techniques/ec.py b/src/ixdat/techniques/ec.py index 368c73bd..3e1b8517 100644 --- a/src/ixdat/techniques/ec.py +++ b/src/ixdat/techniques/ec.py @@ -1,7 +1,5 @@ """Module for representation and analysis of EC measurements""" -import numpy as np - from ..measurements import Measurement, Calibration from ..data_series import ValueSeries from ..exporters.ec_exporter import ECExporter @@ -20,7 +18,7 @@ class ECMeasurement(Measurement): """Class implementing electrochemistry measurements TODO: Implement a unit library for current and potential, A_el and RE_vs_RHE - TODO: so that e.g. current can be seamlessly normalized to mass OR area. + so that e.g. current can be seamlessly normalized to mass OR area. The main job of this class is making sure that the ValueSeries most essential for visualizing and normal electrochemistry measurements (i.e. excluding impedance @@ -84,7 +82,7 @@ class ECMeasurement(Measurement): It turns out that keeping track of current, potential, and selector when combining datasets is enough of a job to fill a class. Thus, the more exciting electrochemistry-related functionality should be implemented in inheriting classes - such as `CyclicVoltammagram`. + such as `CyclicVoltammogram`. """ extra_column_attrs = { @@ -93,14 +91,10 @@ class ECMeasurement(Measurement): } } control_series_name = "raw_potential" - essential_series_names = ("t", "raw_potential", "raw_current", "cycle") + essential_series_names = ("t", "raw_potential", "raw_current") selection_series_names = ("file_number", "loop_number", "cycle") default_exporter = ECExporter default_plotter = ECPlotter - v_name = EC_FANCY_NAMES["potential"] - j_name = EC_FANCY_NAMES["current"] - E_name = EC_FANCY_NAMES["raw_potential"] - I_name = EC_FANCY_NAMES["raw_current"] def __init__( self, @@ -147,6 +141,26 @@ def __init__( self.calibrate(RE_vs_RHE, A_el, R_Ohm) self.plot_vs_potential = self.plotter.plot_vs_potential + @property + def E_name(self): + return self["raw_potential"].name + + @property + def I_name(self): + return self["raw_current"].name + + @property + def v_name(self): + if self.RE_vs_RHE is not None: + return EC_FANCY_NAMES["potential"] + return self.E_name + + @property + def j_name(self): + if self.A_el is not None: + return EC_FANCY_NAMES["current"] + return self.I_name + @property def aliases(self): """A dictionary with the names of other data series a given name can refer to""" @@ -265,30 +279,6 @@ def potential(self): def current(self): return self["current"] - def grab_potential(self, tspan=None): - """Return the time [s] and potential [V] vectors cut by tspan - - TODO: I think this is identical, now that __getitem__ finds potential, to - self.grab("potential", tspan=tspan) - """ - t = self.potential.t.copy() - v = self.potential.data.copy() - if tspan: - mask = np.logical_and(tspan[0] < t, t < tspan[-1]) - t = t[mask] - v = v[mask] - return t, v - - def grab_current(self, tspan=None): - """Return the time [s] and current ([mA] or [mA/cm^2]) vectors cut by tspan""" - t = self.current.t.copy() - j = self.current.data.copy() - if tspan: - mask = np.logical_and(tspan[0] < t, t < tspan[-1]) - t = t[mask] - j = j[mask] - return t, j - @property def v(self): """The potential [V] numpy array of the measurement""" @@ -300,13 +290,13 @@ def j(self): return self.current.data.copy() def as_cv(self): - """Convert self to a CyclicVoltammagram""" - from .cv import CyclicVoltammagram + """Convert self to a CyclicVoltammogram""" + from .cv import CyclicVoltammogram cv_as_dict = self.as_dict() cv_as_dict["technique"] = "CV" # Note, this works perfectly! All needed information is in self_as_dict :) - return CyclicVoltammagram.from_dict(cv_as_dict) + return CyclicVoltammogram.from_dict(cv_as_dict) class ECCalibration(Calibration): @@ -317,9 +307,9 @@ class ECCalibration(Calibration): def __init__( self, + name=None, technique="EC", tstamp=None, - name=None, measurement=None, RE_vs_RHE=None, A_el=None, @@ -355,13 +345,14 @@ def calibrate_series(self, key, measurement=None): Key should be "potential" or "current". Anything else will return None. - - potential: the calibration looks up "raw_potential" in the measurement, shifts - it to the RHE potential if RE_vs_RHE is available, corrects it for Ohmic drop if - R_Ohm is available, and then returns a calibrated potential series with a name - indicative of the corrections done. - - current: The calibration looks up "raw_current" in the measurement, normalizes - it to the electrode area if A_el is available, and returns a calibrated current - series with a name indicative of whether the normalization was done. + - "potential": the calibration looks up "raw_potential" in the measurement, + shifts it to the RHE potential if RE_vs_RHE is available, corrects it for + Ohmic drop if R_Ohm is available, and then returns a calibrated potential + series with a name indicative of the corrections done. + - "current": The calibration looks up "raw_current" in the measurement, + normalizes it to the electrode area if A_el is available, and returns a + calibrated current series with a name indicative of whether the normalization + was done. """ measurement = measurement or self.measurement if key == "potential": diff --git a/src/ixdat/techniques/ec_ms.py b/src/ixdat/techniques/ec_ms.py index 8b7efef2..990e1c26 100644 --- a/src/ixdat/techniques/ec_ms.py +++ b/src/ixdat/techniques/ec_ms.py @@ -1,8 +1,234 @@ """Module for representation and analysis of EC-MS measurements""" - -from .ec import ECMeasurement -from .ms import MSMeasurement +import numpy as np +from ..constants import FARADAY_CONSTANT +from .ec import ECMeasurement, ECCalibration +from .ms import MSMeasurement, MSCalResult, MSCalibration +from .cv import CyclicVoltammogram +from ..exporters.ecms_exporter import ECMSExporter +from ..plotters.ecms_plotter import ECMSPlotter +from ..plotters.ms_plotter import STANDARD_COLORS class ECMSMeasurement(ECMeasurement, MSMeasurement): - """Class implementing raw EC-MS functionality""" + """Class for raw EC-MS functionality. Parents: ECMeasurement and MSMeasurement""" + + extra_column_attrs = { + "ecms_meaurements": {"ec_technique", "tspan_bg"}, + } + # FIXME: It would be much more elegant if this carried over automatically from + # *both* parents, by appending the table columns... + # We'll see how the problem changes with the metaprogramming work. + + default_plotter = ECMSPlotter + default_exporter = ECMSExporter + + def __init__(self, **kwargs): + """FIXME: Passing the right key-word arguments on is a mess""" + ec_kwargs = { + k: v for k, v in kwargs.items() if k in ECMeasurement.get_all_column_attrs() + } + ms_kwargs = { + k: v for k, v in kwargs.items() if k in MSMeasurement.get_all_column_attrs() + } + # ms_kwargs["ms_calibration"] = self.ms_calibration # FIXME: This is a mess. + # FIXME: I think the lines below could be avoided with a PlaceHolderObject that + # works together with MemoryBackend + if "series_list" in kwargs: + ec_kwargs.update(series_list=kwargs["series_list"]) + ms_kwargs.update(series_list=kwargs["series_list"]) + if "component_measurements" in kwargs: + ec_kwargs.update(component_measurements=kwargs["component_measurements"]) + ms_kwargs.update(component_measurements=kwargs["component_measurements"]) + if "calibration_list" in kwargs: + ec_kwargs.update(calibration_list=kwargs["calibration_list"]) + ms_kwargs.update(calibration_list=kwargs["calibration_list"]) + ECMeasurement.__init__(self, **ec_kwargs) + MSMeasurement.__init__(self, **ms_kwargs) + self._ec_plotter = None + self._ms_plotter = None + + @property + def ec_plotter(self): + """A plotter for just plotting the ec data""" + return self.plotter.ec_plotter # the ECPlotter of the measurement's ECMSPlotter + + @property + def ms_plotter(self): + """A plotter for just plotting the ms data""" + return self.plotter.ms_plotter # the MSPlotter of the measurement's ECMSPlotter + + @classmethod + def from_dict(cls, obj_as_dict): + """Initiate an ECMSMeasurement from a dictionary representation. + + This unpacks the ECMSCalibration from its own nested dictionary + TODO: Figure out a way for that to happen automatically. + """ + + if "calibration" in obj_as_dict: + if isinstance(obj_as_dict["calibration"], dict): + # FIXME: This is a mess + obj_as_dict["calibration"] = ECMSCalibration.from_dict( + obj_as_dict["calibration"] + ) + obj = super(ECMSMeasurement, cls).from_dict(obj_as_dict) + return obj + + def as_cv(self): + self_as_dict = self.as_dict() + + # FIXME: The following lines are only necessary because + # PlaceHolderObject.get_object isn't able to find things in the MemoryBackend + del self_as_dict["s_ids"] + self_as_dict["series_list"] = self.series_list + + return ECMSCyclicVoltammogram.from_dict(self_as_dict) + + def ecms_calibration(self, mol, mass, n_el, tspan, tspan_bg=None): + """Calibrate for mol and mass based on one period of steady electrolysis + + Args: + mol (str): Name of the molecule to calibrate + mass (str): Name of the mass at which to calibrate + n_el (str): Number of electrons passed per molecule produced (remember the + sign! e.g. +4 for O2 by OER and -2 for H2 by HER) + tspan (tspan): The timespan of steady electrolysis + tspan_bg (tspan): The time to use as a background + + Return MSCalResult: The result of the ms_calibration + """ + Y = self.integrate_signal(mass, tspan=tspan, tspan_bg=tspan_bg) + Q = self.integrate("raw current / [mA]", tspan=tspan) * 1e-3 + n = Q / (n_el * FARADAY_CONSTANT) + F = Y / n + cal = MSCalResult( + name=f"{mol}_{mass}", mol=mol, mass=mass, cal_type="ecms_calibration", F=F, + ) + return cal + + def ecms_calibration_curve( + self, + mol, + mass, + n_el, + tspan_list=None, + tspan_bg=None, + ax="new", + axes_measurement=None, + ): + """Fit mol's sensitivity at mass based on steady periods of EC production + + Args: + mol (str): Name of the molecule to calibrate + mass (str): Name of the mass at which to calibrate + n_el (str): Number of electrons passed per molecule produced (remember the + sign! e.g. +4 for O2 by OER and -2 for H2 by HER) + tspan_list (list of tspan): The timespans of steady electrolysis + tspan_bg (tspan): The time to use as a background + ax (Axis): The axis on which to plot the ms_calibration curve result. + Defaults to a new axis. + axes_measurement (list of Axes): The EC-MS plot axes to highlight the + ms_calibration on. Defaults to None. + + Return MSCalResult(, Axis(, Axis)): The result of the ms_calibration + (and requested axes) + """ + axis_ms = axes_measurement[0] if axes_measurement else None + axis_current = axes_measurement[0] if axes_measurement else None + Y_list = [] + n_list = [] + for tspan in tspan_list: + Y = self.integrate_signal(mass, tspan=tspan, tspan_bg=tspan_bg, ax=axis_ms) + # FIXME: plotting current by giving integrate() an axis doesn't work great. + Q = self.integrate("raw current / [mA]", tspan=tspan, axis=axis_current) + Q *= 1e-3 # mC --> [C] + n = Q / (n_el * FARADAY_CONSTANT) + Y_list.append(Y) + n_list.append(n) + n_vec = np.array(n_list) + Y_vec = np.array(Y_list) + pfit = np.polyfit(n_vec, Y_vec, deg=1) + F = pfit[0] + if ax: + color = STANDARD_COLORS[mass] + if ax == "new": + ax = self.plotter.new_ax() + ax.set_xlabel("amount produced / [nmol]") + ax.set_ylabel("integrated signal / [nC]") + ax.plot(n_vec * 1e9, Y_vec * 1e9, "o", color=color) + n_fit = np.array([0, max(n_vec)]) + Y_fit = n_fit * pfit[0] + pfit[1] + ax.plot(n_fit * 1e9, Y_fit * 1e9, "--", color=color) + cal = MSCalResult( + name=f"{mol}_{mass}", + mol=mol, + mass=mass, + cal_type="ecms_calibration_curve", + F=F, + ) + if ax: + if axes_measurement: + return cal, ax, axes_measurement + return cal, ax + return cal + + +class ECMSCyclicVoltammogram(CyclicVoltammogram, ECMSMeasurement): + """Class for raw EC-MS functionality. Parents: CyclicVoltammogram, ECMSMeasurement + """ + + +class ECMSCalibration(ECCalibration, MSCalibration): + """Class for calibrations useful for ECMSMeasurements""" + + extra_column_attrs = { + "ecms_calibrations": {"date", "setup", "RE_vs_RHE", "A_el", "L"} + } + # FIXME: The above should be covered by the parent classes. Needs metaprogramming! + # NOTE: technique, name, and tstamp in column_attrs are inherited from Calibration + # NOTE: ms_results_ids in extra_linkers is inherited from MSCalibration. + # NOTE: signal_bgs is left out + + def __init__( + self, + name=None, + date=None, + tstamp=None, + setup=None, + ms_cal_results=None, + signal_bgs=None, + RE_vs_RHE=None, + A_el=None, + L=None, + technique="EC-MS", + ): + """ + Args: + name (str): Name of the ms_calibration + date (str): Date of the ms_calibration + setup (str): Name of the setup where the ms_calibration is made + ms_cal_results (list of MSCalResult): The mass spec calibrations + RE_vs_RHE (float): the RE potential in [V] + A_el (float): the geometric electrode area in [cm^2] + L (float): the working distance in [m] + """ + ECCalibration.__init__(self, name=name, A_el=A_el, RE_vs_RHE=RE_vs_RHE) + MSCalibration.__init__( + self, + date=date, + tstamp=tstamp, + setup=setup, + ms_cal_results=ms_cal_results, + signal_bgs=signal_bgs, + ) + self.technique = technique + self.L = L + + def calibrate_series(self, key, measurement=None): + measurement = measurement or self.measurement + try_1 = ECCalibration.calibrate_series(self, key, measurement) + if try_1: + return try_1 + try_2 = MSCalibration.calibrate_series(self, key, measurement) + if try_2: + return try_2 diff --git a/src/ixdat/techniques/ms.py b/src/ixdat/techniques/ms.py index 104bd584..4cf25e09 100644 --- a/src/ixdat/techniques/ms.py +++ b/src/ixdat/techniques/ms.py @@ -1,7 +1,579 @@ """Module for representation and analysis of MS measurements""" -from ..measurements import Measurement +import re +import numpy as np +import json # FIXME: This is for MSCalibration.export, but shouldn't have to be here. + +from ..measurements import Measurement, Calibration +from ..spectra import Spectrum +from ..plotters.ms_plotter import MSPlotter, STANDARD_COLORS +from ..exceptions import QuantificationError +from ..constants import ( + AVOGADROS_CONSTANT, + BOLTZMAN_CONSTANT, + STANDARD_TEMPERATURE, + STANDARD_PRESSURE, + DYNAMIC_VISCOSITIES, + MOLECULAR_DIAMETERS, + MOLAR_MASSES, +) +from ..data_series import ValueSeries +from ..db import Saveable class MSMeasurement(Measurement): """Class implementing raw MS functionality""" + + extra_column_attrs = {"ms_measurement": ("tspan_bg",)} + default_plotter = MSPlotter + + def __init__(self, name, **kwargs): + tspan_bg = kwargs.pop("tspan_bg", None) + super().__init__(name, **kwargs) + self.tspan_bg = tspan_bg + + @property + def ms_calibration(self): + ms_cal_list = [] + tspan_bg = None + signal_bgs = {} + for cal in self.calibration_list: + ms_cal_list = ms_cal_list + getattr(cal, "ms_cal_list", []) + for mass, bg in getattr(cal, "signal_bgs", {}).items(): + if mass not in signal_bgs: + signal_bgs[mass] = bg + tspan_bg = tspan_bg or getattr(cal, "tspan_bg", None) + return MSCalibration(ms_cal_results=ms_cal_list, signal_bgs=signal_bgs) + + @property + def signal_bgs(self): + return self.ms_calibration.signal_bgs + + def set_bg(self, tspan_bg=None, mass_list=None): + """Set background values for mass_list to the average signal during tspan_bg.""" + mass_list = mass_list or self.mass_list + tspan_bg = tspan_bg or self.tspan_bg + signal_bgs = {} + for mass in mass_list: + t, v = self.grab(mass, tspan_bg) + signal_bgs[mass] = np.mean(v) + self.add_calibration(MSCalibration(signal_bgs=signal_bgs)) + + def reset_bg(self, mass_list=None): + """Reset background values for the masses in mass_list""" + mass_list = mass_list or self.mass_list + for mass in mass_list: + if mass in self.signal_bgs: + del self.signal_bgs[mass] + + def grab( + self, + item, + tspan=None, + tspan_bg=None, + include_endpoints=False, + removebackground=False, + ): + """Returns t, S where S is raw signal in [A] for a given signal name (ie mass) + + Args: + item (str): Name of the signal. + tspan (list): Timespan for which the signal is returned. + tspan_bg (list): Timespan that corresponds to the background signal. + If not given, no background is subtracted. + removebackground (bool): Whether to remove a pre-set background if available. + This is special to MSMeasurement. + Defaults to False, but in grab_flux it defaults to True. + include_endpoints (bool): Whether to ensure tspan[0] and tspan[-1] are in t + """ + time, value = super().grab( + item, tspan=tspan, include_endpoints=include_endpoints + ) + + if tspan_bg: + _, bg = self.grab(item, tspan=tspan_bg) + return time, value - np.average(bg) + elif removebackground: + if item in self.signal_bgs: + return time, value - self.signal_bgs[item] + elif self.tspan_bg: + _, bg = self.grab(item, tspan=self.tspan_bg) + return time, value - np.average(bg) + return time, value + + def grab_for_t(self, item, t, tspan_bg=None, removebackground=False): + """Return a numpy array with the value of item interpolated to time t + + Args: + item (str): The name of the value to grab + t (np array): The time vector to grab the value for + tspan_bg (iterable): Optional. A timespan defining when `item` is at its + baseline level. The average value of `item` in this interval will be + subtracted from what is returned. + removebackground (bool): Whether to remove a pre-set background if available. + This is special to MSMeasurement. + Defaults to False, but in grab_flux it defaults to True. + """ + t_0, v_0 = self.grab(item, tspan_bg=tspan_bg, removebackground=removebackground) + v = np.interp(t, t_0, v_0) + return v + + def grab_signal(self, *args, **kwargs): + """Alias for grab()""" + return self.grab(*args, **kwargs) + + def grab_flux( + self, + mol, + tspan=None, + tspan_bg=None, + removebackground=True, + include_endpoints=False, + ): + """Return the flux of mol (calibrated signal) in [mol/s] + + Note: + `grab_flux(mol, ...)` is identical to `grab(f"n_dot_{mol}", ...)` with + removebackround=True by default. An MSCalibration does the maths. + + Args: + mol (str or MSCalResult): Name of the molecule or a ms_calibration thereof + tspan (list): Timespan for which the signal is returned. + tspan_bg (list): Timespan that corresponds to the background signal. + If not given, no background is subtracted. + removebackground (bool): Whether to remove a pre-set background if available + Defaults to True. + """ + return self.grab( + # grab() invokes __getitem__, which invokes the `Calibration`. Specifically, + # `MSCalibration.calibrate_series()` interprets item names starting with + # "n_" as molecule fluxes, and checks itself for a sensitivity factor. + f"n_dot_{mol}", + tspan=tspan, + tspan_bg=tspan_bg, + removebackground=removebackground, + include_endpoints=include_endpoints, + ) + + def grab_flux_for_t( + self, + mol, + t, + tspan_bg=None, + removebackground=False, + include_endpoints=False, + ): + """Return the flux of mol (calibrated signal) in [mol/s] for a given time vec + + Args: + mol (str): Name of the molecule. + t (np.array): The time vector along which to give the flux + tspan_bg (tspan): Timespan that corresponds to the background signal. + If not given, no background is subtracted. + removebackground (bool): Whether to remove a pre-set background if available + """ + t_0, y_0 = self.grab_flux( + mol, + tspan_bg=tspan_bg, + removebackground=removebackground, + include_endpoints=include_endpoints, + ) + y = np.interp(t, t_0, y_0) + return y + + def get_flux_series(self, mol): + """Return a ValueSeries with the calibrated flux of mol""" + return self[f"n_dot_{mol}"] + + def integrate_signal(self, mass, tspan, tspan_bg, ax=None): + """Integrate a ms signal with background subtraction and evt. plotting + + TODO: Should this, like grab_signal does now, have the option of using a + background saved in the object rather than calculating a new one? + + Args: + mass (str): The mass for which to integrate the signal + tspan (tspan): The timespan over which to integrate + tspan_bg (tspan): Timespan at which the signal is at its background value + ax (Axis): axis to plot on. Defaults to None + """ + t, S = self.grab_signal(mass, tspan=tspan, include_endpoints=True) + if tspan_bg: + t_bg, S_bg_0 = self.grab_signal( + mass, tspan=tspan_bg, include_endpoints=True + ) + S_bg = np.mean(S_bg_0) * np.ones(t.shape) + else: + S_bg = np.zeros(t.shape) + if ax: + if ax == "new": + fig, ax = self.plotter.new_ax() + ax.fill_between(t, S_bg, S, color=STANDARD_COLORS[mass], alpha=0.2) + return np.trapz(S - S_bg, t) + + @property + def mass_list(self): + """List of the masses for which ValueSeries are contained in the measurement""" + return [self.as_mass(col) for col in self.series_names if self.is_mass(col)] + + def is_mass(self, item): + if re.search("^M[0-9]+$", item): + return True + if item in self.reverse_aliases and self.is_mass(self.reverse_aliases[item][0]): + return True + return False + + def as_mass(self, item): + if re.search("^M[0-9]+$", item): + return item + new_item = self.reverse_aliases[item][0] + if self.is_mass(new_item): + return self.as_mass(new_item) + raise TypeError(f"{self} does not recognize '{item}' as a mass.") + + +class MSCalResult(Saveable): + """A class for a mass spec ms_calibration result. + + FIXME: I think that something inheriting directly from Saveable does not belong in + a technique module. + """ + + table_name = "ms_cal_results" + column_attrs = {"name", "mol", "mass", "cal_type", "F"} + + def __init__( + self, + name=None, + mol=None, + mass=None, + cal_type=None, + F=None, + ): + super().__init__() + self.name = name or f"{mol} at {mass}" + self.mol = mol + self.mass = mass + self.cal_type = cal_type + self.F = F + + def __repr__(self): + return ( + f"{self.__class__.__name__}(name={self.name}, mol={self.mol}, " + f"mass={self.mass}, F={self.F})" + ) + + @property + def color(self): + return STANDARD_COLORS[self.mass] + + +class MSCalibration(Calibration): + """Class for mass spec calibrations. TODO: replace with powerful external package""" + + extra_linkers = {"ms_calibration_results": ("ms_cal_results", "ms_cal_result_ids")} + # FIXME: signal_bgs are not saved at present. Should they be a separate table + # of Saveable objects like ms_cal_results or should they be a single json value? + child_attrs = [ + "ms_cal_results", + ] + + def __init__( + self, + name=None, + date=None, + tstamp=None, # FIXME: No need to have both a date and a tstamp? + setup=None, + ms_cal_results=None, + signal_bgs=None, + technique="MS", + measurement=None, + ): + """ + Args: + name (str): Name of the ms_calibration + date (str): Date of the ms_calibration + setup (str): Name of the setup where the ms_calibration is made + ms_cal_results (list of MSCalResult): The mass spec calibrations + measurement (MSMeasurement): The measurement + """ + super().__init__( + name=name or f"EC-MS ms_calibration for {setup} on {date}", + technique=technique, + tstamp=tstamp, + measurement=measurement, + ) + self.date = date + self.setup = setup + self.ms_cal_results = ms_cal_results or [] + self.signal_bgs = signal_bgs or {} + + @property + def ms_cal_result_ids(self): + return [cal.id for cal in self.ms_cal_results] + + @property + def mol_list(self): + return list({cal.mol for cal in self.ms_cal_results}) + + @property + def mass_list(self): + return list({cal.mass for cal in self.ms_cal_results}) + + @property + def name_list(self): + return list({cal.name for cal in self.ms_cal_results}) + + def __contains__(self, mol): + return mol in self.mol_list or mol in self.name_list + + def __iter__(self): + yield from self.ms_cal_results + + def calibrate_series(self, key, measurement=None): + """Return a calibrated series for `key` if possible. + + If key starts with "n_", it is interpreted as a molecule flux. This method then + searches the calibration for a sensitivity factor for that molecule uses it to + divide the relevant mass signal from the measurement. Example acceptable keys: + "n_H2", "n_dot_H2". + If the key does not start with "n_", or the calibration can't find a relevant + sensitivity factor and mass signal, this method returns None. + """ + measurement = measurement or self.measurement + if key.startswith("n_"): # it's a flux! + mol = key.split("_")[-1] + try: + mass, F = self.get_mass_and_F(mol) + except QuantificationError: + # Calibrations just return None when they can't get what's requested. + return + signal_series = measurement[mass] + y = signal_series.data + if mass in measurement.signal_bgs: + # FIXME: How to make this optional to user of MSMeasuremt.grab()? + y = y - measurement.signal_bgs[mass] + n_dot = y / F + return ValueSeries( + name=f"n_dot_{mol}", + unit_name="mol/s", + data=n_dot, + tseries=signal_series.tseries, + ) + + def get_mass_and_F(self, mol): + """Return the mass and sensitivity factor to use for simple quant. of mol""" + cal_list_for_mol = [cal for cal in self if cal.mol == mol or cal.name == mol] + Fs = [cal.F for cal in cal_list_for_mol] + if not Fs: + raise QuantificationError(f"{self} has no sensitivity factor for {mol}") + index = np.argmax(np.array(Fs)) + + the_good_cal = cal_list_for_mol[index] + return the_good_cal.mass, the_good_cal.F + + def get_F(self, mol, mass): + """Return the sensitivity factor for mol at mass""" + cal_list_for_mol_at_mass = [ + cal + for cal in self + if (cal.mol == mol or cal.name == mol) and cal.mass == mass + ] + F_list = [cal.F for cal in cal_list_for_mol_at_mass] + if not F_list: + raise QuantificationError( + f"{self} has no sensitivity factor for {mol} at {mass}" + ) + return np.mean(np.array(F_list)) + + def scaled_to(self, ms_cal_result): + """Return a new ms_calibration w scaled sensitivity factors to match one given""" + F_0 = self.get_F(ms_cal_result.mol, ms_cal_result.mass) + scale_factor = ms_cal_result.F / F_0 + calibration_as_dict = self.as_dict() + new_cal_list = [] + for cal in self.ms_cal_results: + cal = MSCalResult( + name=cal.name, + mass=cal.mass, + mol=cal.mol, + F=cal.F * scale_factor, + cal_type=cal.cal_type + " scaled", + ) + new_cal_list.append(cal) + calibration_as_dict["ms_cal_results"] = [cal.as_dict() for cal in new_cal_list] + calibration_as_dict["name"] = calibration_as_dict["name"] + " scaled" + return self.__class__.from_dict(calibration_as_dict) + + @classmethod + def read(cls, path_to_file): + """Read an MSCalibration from a json-formatted text file""" + with open(path_to_file) as f: + obj_as_dict = json.load(f) + # put the MSCalResults (exported as dicts) into objects: + obj_as_dict["ms_cal_results"] = [ + MSCalResult.from_dict(ms_cal_as_dict) + for ms_cal_as_dict in obj_as_dict["ms_cal_results"] + ] + return cls.from_dict(obj_as_dict) + + def export(self, path_to_file=None): + """Export an ECMSCalibration as a json-formatted text file""" + path_to_file = path_to_file or (self.name + ".ix") + self_as_dict = self.as_dict() + # replace the ms_cal_result ids with the dictionaries of the results themselves: + del self_as_dict["ms_cal_result_ids"] + self_as_dict["ms_cal_results"] = [cal.as_dict() for cal in self.ms_cal_results] + with open(path_to_file, "w") as f: + json.dump(self_as_dict, f, indent=4) + + +class MSInlet: + """A class for describing the inlet to the mass spec + + Every MSInlet describes the rate and composition of the gas entering a mass + spectrometer. The default is a Spectro Inlets EC-MS chip. + TODO: Replace with powerful external package. + """ + + def __init__( + self, + *, + l_cap=1e-3, + w_cap=6e-6, + h_cap=6e-6, + gas="He", + T=STANDARD_TEMPERATURE, + p=STANDARD_PRESSURE, + verbose=True, + ): + """Create an MSInlet object given its properties. + + Args: + l_cap (float): capillary length [m]. Defaults to design parameter. + w_cap (float): capillary width [m]. Defaults to design parameter. + h_cap (float): capillary height [m]. Defaults to design parameter. + p (float): system pressure in [Pa] (if to change from that in medium) + T (float): system temperature in [K] (if to change from that in medium) + gas (str): the gas at the start of the inlet. + verbose (bool): whether to print stuff to the terminal + """ + self.verbose = verbose + self.l_cap = l_cap + self.w_cap = w_cap + self.h_cap = h_cap + self.p = p + self.T = T + self.gas = gas # TODO: Gas mixture class. This must be a pure gas now. + + def calc_n_dot_0( + self, gas=None, w_cap=None, h_cap=None, l_cap=None, T=None, p=None + ): + """Calculate the total molecular flux through the capillary in [s^-1] + + Uses Equation 4.10 of Trimarco, 2017. "Real-time detection of sub-monolayer + desorption phenomena during electrochemical reactions: Instrument development + and applications." PhD Thesis, Technical University of Denmark. + + Args: + w_cap (float): capillary width [m], defaults to self.w_cap + h_cap (float): capillary height [m], defaults to self.h_cap + l_cap (float): capillary length [m], defaults to self.l_cap + gas (dict or str): the gas in the chip, defaults to self.gas + T (float): Temperature [K], if to be updated + p (float): pressure [Pa], if to be updated + Returns: + float: the total molecular flux in [s^-1] through the capillary + """ + + if w_cap is None: + w_cap = self.w_cap # capillary width in [m] + if h_cap is None: + h_cap = self.h_cap # capillary height in [m] + if l_cap is None: + l_cap = self.l_cap # effective capillary length in [m] + if T is None: + T = self.T + if p is None: + p = self.p + + pi = np.pi + eta = DYNAMIC_VISCOSITIES[gas] # dynamic viscosity in [Pa*s] + s = MOLECULAR_DIAMETERS[gas] # molecule diameter in [m] + m = MOLAR_MASSES[gas] * 1e-3 / AVOGADROS_CONSTANT # molecule mass in [kg] + + d = ((w_cap * h_cap) / pi) ** 0.5 * 2 + # d = 4.4e-6 #used in Henriksen2009 + a = d / 2 + p_1 = p + lambda_ = d # defining the transitional pressure + # ...from setting mean free path equal to capillary d + p_t = BOLTZMAN_CONSTANT * T / (2 ** 0.5 * pi * s ** 2 * lambda_) + p_2 = 0 + p_m = (p_1 + p_t) / 2 # average pressure in the transitional flow region + v_m = (8 * BOLTZMAN_CONSTANT * T / (pi * m)) ** 0.5 + # a reciprocal velocity used for short-hand: + nu = (m / (BOLTZMAN_CONSTANT * T)) ** 0.5 + + # ... and now, we're ready for the capillary equation. + # (need to turn of black and flake8 for tolerable format) + # fmt: off + # Equation 4.10 of Daniel Trimarco's PhD Thesis: + N_dot = ( # noqa + 1 / (BOLTZMAN_CONSTANT * T) * 1 / l_cap * ( # noqa + (p_t - p_2) * a**3 * 2 * pi / 3 * v_m + (p_1 - p_t) * ( # noqa + a**4 * pi / (8 * eta) * p_m + a**3 * 2 * pi / 3 * v_m * ( # noqa + (1 + 2 * a * nu * p_m / eta) / ( # noqa + 1 + 2.48 * a * nu * p_m / eta # noqa + ) # noqa + ) # noqa + ) # noqa + ) # noqa + ) # noqa + # fmt: on + n_dot = N_dot / AVOGADROS_CONSTANT + return n_dot + + def gas_flux_calibration( + self, + measurement, + mol, + mass, + tspan=None, + tspan_bg=None, + ax=None, + ): + """ + Args: + measurement (MSMeasurement): The measurement with the ms_calibration data + mol (str): The name of the molecule to calibrate + mass (str): The mass to calibrate at + tspan (iter): The timespan to average the signal over. Defaults to all + tspan_bg (iter): Optional timespan at which the signal is at its background. + ax (matplotlib axis): the axis on which to indicate what signal is used + with a thicker line. Defaults to none + + Returns MSCalResult: a ms_calibration result containing the sensitivity factor + for mol at mass + """ + t, S = measurement.grab_signal(mass, tspan=tspan, tspan_bg=tspan_bg) + if ax: + ax.plot(t, S, color=STANDARD_COLORS[mass], linewidth=5) + + n_dot = self.calc_n_dot_0(gas=mol) + F = np.mean(S) / n_dot + return MSCalResult( + name=f"{mol}_{mass}", + mol=mol, + mass=mass, + cal_type="gas_flux_calibration", + F=F, + ) + + +class MSSpectrum(Spectrum): + """Nothing to add to normal spectrum yet. + TODO: Methods for co-plotting ref spectra from a database + """ + + pass diff --git a/src/ixdat/techniques/spectroelectrochemistry.py b/src/ixdat/techniques/spectroelectrochemistry.py new file mode 100644 index 00000000..44245070 --- /dev/null +++ b/src/ixdat/techniques/spectroelectrochemistry.py @@ -0,0 +1,251 @@ +import numpy as np +from scipy.interpolate import interp1d + +from .ec import ECMeasurement +from ..spectra import Spectrum +from ..data_series import Field, ValueSeries +from ..spectra import SpectrumSeries +from ..exporters.sec_exporter import SECExporter +from ..plotters.sec_plotter import SECPlotter + + +class SpectroECMeasurement(ECMeasurement): + + default_plotter = SECPlotter + default_exporter = SECExporter + + def __init__(self, *args, **kwargs): + """Initialize an SEC measurement. All args and kwargs go to ECMeasurement.""" + ECMeasurement.__init__(self, *args, **kwargs) + self._reference_spectrum = None + self.tracked_wavelengths = [] + self.plot_waterfall = self.plotter.plot_waterfall + self.plot_wavelengths = self.plotter.plot_wavelengths + self.plot_wavelengths_vs_potential = self.plotter.plot_wavelengths_vs_potential + self.technique = "S-EC" + + @property + def reference_spectrum(self): + """The spectrum which will by default be used to calculate dOD""" + if not self._reference_spectrum or self._reference_spectrum == "reference": + self._reference_spectrum = Spectrum.from_field(self["reference"]) + return self._reference_spectrum + + def set_reference_spectrum( + self, + spectrum=None, + t_ref=None, + V_ref=None, + ): + """Set the spectrum used as the reference when calculating dOD. + + Args: + spectrum (Spectrum or str): If a Spectrum is given, it becomes the reference + spectrum. The string "reference" can be given to make the reference + spectrum become (via the reference_spectrum property) one that the + measurement was loaded with (evt. for definition of wavelengths). + t_ref (float): The time (with respect to self.tstamp) to use as the + reference spectrum + V_ref (float): The potential to use as the reference spectrum. This will + only work if the potential is monotonically increasing. + """ + if t_ref and not spectrum: + spectrum = self.get_spectrum(t=t_ref) + if V_ref and not spectrum: + spectrum = self.get_spectrum(V=V_ref) + if not spectrum: + raise ValueError("must provide a spectrum, t_ref, or V_ref!") + self._reference_spectrum = spectrum + + @property + def spectra(self): + """The Field that is the spectra of the SEC Measurement""" + return self["spectra"] + + @property + def spectrum_series(self): + """The SpectrumSeries that is the spectra of the SEC Measurement""" + return SpectrumSeries.from_field( + self.spectra, + tstamp=self.tstamp, + name=self.name + " spectra", + ) + + @property + def wavelength(self): + """A DataSeries with the wavelengths for the SEC spectra""" + return self.spectra.axes_series[1] + + @property + def wl(self): + """A numpy array with the wavelengths in [nm] for the SEC spectra""" + return self.wavelength.data + + def calc_dOD(self, V_ref=None, t_ref=None, index_ref=None): + """Calculate the optical density with respect to a reference + + Provide at most one of V_ref, t_ref, or index. If none are provided the default + reference spectrum (self.reference_spectrum) will be used. + + Args: + V_ref (float): The potential at which to get the reference spectrum + t_ref (float): The time at which to get the reference spectrum + index_ref (int): The index of the reference spectrum + Return Field: the delta optical density spanning time and wavelength + """ + counts = self.spectra.data + if V_ref or t_ref: + ref_spec = self.get_spectrum(V=V_ref, t=t_ref, index=index_ref) + else: + ref_spec = self.reference_spectrum + dOD = -np.log10(counts / ref_spec.y) + dOD_series = Field( + name=r"$\Delta$ O.D.", + unit_name="", + axes_series=self.spectra.axes_series, + data=dOD, + ) + return dOD_series + + def get_spectrum(self, V=None, t=None, index=None, name=None): + """Return the Spectrum at a given potential V, time t, or index + + Exactly one of V, t, and index should be given. If V (t) is out of the range of + self.v (self.t), then first or last spectrum will be returned. + + Args: + V (float): The potential at which to get the spectrum. Measurement.v must + be monotonically increasing for this to work. + t (float): The time at which to get the spectrum + index (int): The index of the spectrum + name (str): Optional. name to give the new spectrum if interpolated + + Return Spectrum: The spectrum. The data is (spectrum.x, spectrum.y) + """ + if V and V in self.v: # woohoo, can skip interpolation! + index = int(np.argmax(self.v == V)) + elif t and t in self.t: # woohoo, can skip interpolation! + index = int(np.argmax(self.t == t)) + if index: # then we're done: + return self.spectrum_series[index] + # otherwise, we have to interpolate: + counts = self.spectra.data + end_spectra = (self.spectrum_series[0].y, self.spectrum_series[-1].y) + if V: + counts_interpolater = interp1d( + self.v, counts, axis=0, fill_value=end_spectra, bounds_error=False + ) + # FIXME: This requires that potential and spectra have same tseries! + y = counts_interpolater(V) + name = name or f"{self.spectra.name}_{V}V" + elif t: + t_spec = self.spectra.axes_series[0].t + counts_interpolater = interp1d( + t_spec, counts, axis=0, fill_value=end_spectra, bounds_error=False + ) + y = counts_interpolater(t) + name = name or f"{self.spectra.name}_{t}s" + else: + raise ValueError("Need t or V or index to select a spectrum!") + + field = Field( + data=y, + name=name, + unit_name=self.spectra.unit_name, + axes_series=[self.wavelength], + ) + return Spectrum.from_field(field, tstamp=self.tstamp) + + def get_dOD_spectrum( + self, + V=None, + t=None, + index=None, + V_ref=None, + t_ref=None, + index_ref=None, + ): + """Return the delta optical density Spectrum given a point and reference point. + + Provide exactly one of V, t, and index, and at most one of V_ref, t_ref, and + index_ref. For V and V_ref to work, the potential in the measurement must be + monotonically increasing. + + Args: + V (float): The potential at which to get the spectrum. + t (float): The time at which to get the spectrum + index (int): The index of the spectrum + V_ref (float): The potential at which to get the reference spectrum + t_ref (float): The time at which to get the reference spectrum + index_ref (int): The index of the reference spectrum + Return: + Spectrum: The dOD spectrum. The data is (spectrum.x, spectrum.y) + """ + if V_ref or t_ref or index_ref: + spectrum_ref = self.get_spectrum(V=V_ref, t=t_ref, index=index_ref) + else: + spectrum_ref = self.reference_spectrum + spectrum = self.get_spectrum(V=V, t=t, index=index) + field = Field( + data=-np.log10(spectrum.y / spectrum_ref.y), + name=r"$\Delta$ OD", + unit_name="", + axes_series=[self.wavelength], + ) + return Spectrum.from_field(field) + + def track_wavelength(self, wl, width=10, V_ref=None, t_ref=None, index_ref=None): + """Return and cache a ValueSeries for the dOD for a specific wavelength. + + The caching adds wl_str to the SECMeasurement's data series, where + wl_str = "w" + int(wl) + This is dOD. The raw is also added as wl_str + "_raw". + So, to get the raw counts for a specific wavelength, call this function and + then use __getitem__, as in: sec_meas[wl_str + "_raw"] + If V_ref, t_ref, or index_ref are provided, they specify what to reference dOD + to. Otherwise, dOD is referenced to the SECMeasurement's reference_spectrum. + + Args: + wl (float): The wavelength to track in [nm] + width (float): The width around wl to average. For example, if wl=400 and + width = 20, the spectra will be averaged between 390 and 410 nm to get + the values. Defaults to 10. To interpolate at the exact wavelength + rather than averaging, specify `width=0`. + V_ref (float): The potential at which to get the reference spectrum + t_ref (float): The time at which to get the reference spectrum + index_ref (int): The index of the reference spectrum + Returns ValueSeries: The dOD value of the spectrum at wl. + """ + if V_ref or t_ref or index_ref: + spectrum_ref = self.get_spectrum(V=V_ref, t=t_ref, index=index_ref) + else: + spectrum_ref = self.reference_spectrum + x = self.wl + if width: # averaging + wl_mask = np.logical_and(wl - width / 2 < x, x < wl + width / 2) + counts_ref = np.mean(spectrum_ref.y[wl_mask]) + counts_wl = np.mean(self.spectra.data[:, wl_mask], axis=1) + else: # interpolation + counts_ref = np.interp(wl, spectrum_ref.x, spectrum_ref.y) + counts_wl = [] + for counts_i in self.spectra.data: + c = np.interp(wl, x, counts_i) + counts_wl.append(c) + counts_wl = np.array(counts_wl) + dOD_wl = -np.log10(counts_wl / counts_ref) + raw_name = f"w{int(wl)} raw" + dOD_name = f"w{int(wl)}" + tseries = self.spectra.axes_series[0] + raw_vseries = ValueSeries( + name=raw_name, unit_name="counts", data=counts_wl, tseries=tseries + ) + dOD_vseries = ValueSeries( + name=dOD_name, unit_name="", data=dOD_wl, tseries=tseries + ) + self.replace_series(raw_name, raw_vseries) + # FIXME: better caching. See https://github.com/ixdat/ixdat/pull/11 + self.replace_series(dOD_name, dOD_vseries) + # FIXME: better caching. See https://github.com/ixdat/ixdat/pull/11 + self.tracked_wavelengths.append(dOD_name) # For the exporter. + + return dOD_vseries diff --git a/src/ixdat/units.py b/src/ixdat/units.py index c48c1b20..24994160 100644 --- a/src/ixdat/units.py +++ b/src/ixdat/units.py @@ -8,7 +8,7 @@ class Unit: """TODO: flesh out this class or find an appropriate 3rd-party to use instead""" def __init__(self, name): - self.name = name + self.name = name or "" self.si_unit = None self.si_conversion_factor = None diff --git a/tasks.py b/tasks.py index 5e620b23..9ac54fbc 100644 --- a/tasks.py +++ b/tasks.py @@ -32,7 +32,7 @@ def flake8(context): """ print("# flake8") - return context.run("flake8").return_code + return context.run("flake8 src tests").return_code @task(aliases=["test", "tests"]) @@ -43,7 +43,8 @@ def pytest(context): """ print("# pytest") - return context.run("pytest").return_code + with context.cd(THIS_DIR): + return context.run("pytest tests").return_code @task(aliases=["QA", "qa", "check"]) diff --git a/tests/functional/test_measurements.py b/tests/functional/test_measurements.py index dc10c2b7..6cdc164b 100644 --- a/tests/functional/test_measurements.py +++ b/tests/functional/test_measurements.py @@ -23,7 +23,7 @@ def test_basic_data(self, ec_measurement): ) def test_calibrate_and_append(self, ec_measurement): - """Test that measurement calibration works""" + """Test that measurement ms_calibration works""" ec_measurement.calibrate_RE(RE_vs_RHE=1) assert ec_measurement.v[0] - ec_measurement["raw_potential"].data[0] == approx( ec_measurement.RE_vs_RHE @@ -34,7 +34,7 @@ def test_calibrate_and_append(self, ec_measurement): cv = ec_measurement.as_cv() cvs_1_plus_2 = cv[1] + cv[2] - # Check that the calibration survived all that: + # Check that the ms_calibration survived all that: assert cvs_1_plus_2.RE_vs_RHE == ec_measurement.RE_vs_RHE # Check that the main time variable, that of potential, wasn't corrupted: assert len(cvs_1_plus_2.grab("potential")[0]) == len( @@ -75,7 +75,7 @@ def test_calibration_over_save_load(self, composed_measurement): # Now, try copying the calibrated measurement by as_dict() and from_dict(): meas12_copied = Measurement.from_dict(composed_measurement_copy.as_dict()) - # And check if it still has the calibration: + # And check if it still has the ms_calibration: assert meas12_copied.A_el == A_el # And that it can still apply it: assert meas12_copied.grab("potential")[1][0] == approx(