# Palmsens Data Parsing

> Parses .csv files from a Palmsens potentiostat

In [None]:
#| default_exp palmsens

In [None]:
#| hide
from nbdev.showdoc import *
from fastcore.all import *

Let's load a typical palmsens file

In [None]:
import os

In [None]:
test_dpvs_root = "../test_files/DPVs_palmsens"
os.listdir(test_dpvs_root)

['multi_column.csv', 'single_column.csv']

In [None]:
with open(os.path.join(test_dpvs_root, "multi_column.csv"), encoding="utf-16") as f: # note the utf-16 encoding
    lines = f.read().splitlines()

lines[:10]

['Date and time:,2025-03-29 23:18:59',
 'Notes:',
 ',,,,,,,,,,,,',
 'Differential Pulse Voltammetry [1]: S49,,Differential Pulse Voltammetry [8]: S48,,Differential Pulse Voltammetry [15]: S47,,Differential Pulse Voltammetry [22]: S46,,Differential Pulse Voltammetry [29]: S45,,Differential Pulse Voltammetry [36]: S44,,Differential Pulse Voltammetry [43]: S43',
 'Date and time measurement:,2025-03-29 13:32:13,Date and time measurement:,2025-03-29 14:28:03,Date and time measurement:,2025-03-29 16:28:02,Date and time measurement:,2025-03-29 17:28:20,Date and time measurement:,2025-03-29 19:41:19,Date and time measurement:,2025-03-29 20:35:10,Date and time measurement:,2025-03-29 21:22:52,',
 'V,µA,V,µA,V,µA,V,µA,V,µA,V,µA,V,µA',
 '0,0.019751112,0,0.040179824,0,0.045964596,0,0.04073982,0,0.044447004,0,0.043287808,0,0.05077498',
 '0.01020757,0.019157514,0.01020757,0.039586228,0.01020757,0.0450126,0.01020757,0.039709428,0.01020757,0.042632616,0.01020757,0.041630216,0.01020757,0.050198176',
 '

palmsens files have a defined line by line format, so we can parse them according to line index. first is the date and time (of export?). This isn't actually useful, the useful information is when the measurements were taken, which is later

We don't really use notes, so I'm also skipping lines 1 and 2. Next is the names line:

In [None]:
lines[3]

'Differential Pulse Voltammetry [1]: S49,,Differential Pulse Voltammetry [8]: S48,,Differential Pulse Voltammetry [15]: S47,,Differential Pulse Voltammetry [22]: S46,,Differential Pulse Voltammetry [29]: S45,,Differential Pulse Voltammetry [36]: S44,,Differential Pulse Voltammetry [43]: S43'

In [None]:
#| export
parse_columns = lambda ps_third_line: ps_third_line.split(",,")

Now we need to parse datetimes for when the DPVs were taken

In [None]:
#| export
from datetime import datetime

def parse_datetime(ps_datetime: str) -> datetime:
    return datetime.strptime(ps_datetime, "%Y-%m-%d %H:%M:%S")

In [None]:
assert parse_datetime("2025-03-29 23:18:59") == datetime(2025, 3, 29, 23, 18, 59)
test_fail(lambda: parse_datetime("not a ps line"), exc=ValueError)

In [None]:
lines[4].split("Date and time measurement:,")[1:]

['2025-03-29 13:32:13,',
 '2025-03-29 14:28:03,',
 '2025-03-29 16:28:02,',
 '2025-03-29 17:28:20,',
 '2025-03-29 19:41:19,',
 '2025-03-29 20:35:10,',
 '2025-03-29 21:22:52,']

In [None]:
#| export
def parse_measurement_datetimes(ps_fourth_line: str) -> list[datetime]:
    datetime_strings = ps_fourth_line.split("Date and time measurement:,")[1:] # note that these have a comma at the end, which we have to get rid of
    return [parse_datetime(dt_str[:-1]) for dt_str in datetime_strings]

In [None]:
test(parse_measurement_datetimes(lines[4]), [datetime(2025, 3, 29, 13, 32, 13),
 datetime(2025, 3, 29, 14, 28, 3),
 datetime(2025, 3, 29, 16, 28, 2),
 datetime(2025, 3, 29, 17, 28, 20),
 datetime(2025, 3, 29, 19, 41, 19),
 datetime(2025, 3, 29, 20, 35, 10),
 datetime(2025, 3, 29, 21, 22, 52)], all_equal)

Next is the data, which is pairs of V, uA data

In [None]:
lines[5].split(",")

['V', 'µA', 'V', 'µA', 'V', 'µA', 'V', 'µA', 'V', 'µA', 'V', 'µA', 'V', 'µA']

In [None]:
#| export
# make sure the file is alternating V and µA. Otherwise, the file format isn't supported
import numpy as np

def validate_cols(ps_fifth_line: str) -> bool: # True if the file has the expected columns
    cols = np.array(ps_fifth_line.split(","))
    if not all(cols[::2] == "V"): # every even element should be "V"
        return False

    if not all(cols[1::2] == "µA"): # every odd element should be "µA"
        return False

    return True

In [None]:
assert validate_cols(lines[5])
assert not validate_cols("not a valid col")

In [None]:
ps_data_lines = lines[6:-1] # the last line is always an end character
ps_data_lines[:5] # cols of V1, uA1, V2, uA2, V3, uA3, ...

['0,0.019751112,0,0.040179824,0,0.045964596,0,0.04073982,0,0.044447004,0,0.043287808,0,0.05077498',
 '0.01020757,0.019157514,0.01020757,0.039586228,0.01020757,0.0450126,0.01020757,0.039709428,0.01020757,0.042632616,0.01020757,0.041630216,0.01020757,0.050198176',
 '0.02041514,0.018810318,0.02041514,0.038824632,0.02041514,0.044525404,0.02041514,0.039082228,0.02041514,0.04155182,0.02041514,0.040493424,0.02041514,0.047666992',
 '0.03062271,0.01848552,0.03062271,0.038460632,0.03062271,0.044407804,0.03062271,0.038936632,0.03062271,0.04089662,0.03062271,0.039731828,0.03062271,0.045947796',
 '0.04083028,0.01798712,0.04083028,0.038225432,0.04083028,0.043971008,0.04083028,0.038634232,0.04083028,0.039759828,0.04083028,0.038651032,0.04083028,0.045707']

In [None]:
step1 = np.array([line.split(",") for line in ps_data_lines], dtype=float).T # first, lets transpose it. so now it's rows of V1, uA1, V2, uA2, V3, uA3, ...
step1[:, :5]

array([[0.        , 0.01020757, 0.02041514, 0.03062271, 0.04083028],
       [0.01975111, 0.01915751, 0.01881032, 0.01848552, 0.01798712],
       [0.        , 0.01020757, 0.02041514, 0.03062271, 0.04083028],
       [0.04017982, 0.03958623, 0.03882463, 0.03846063, 0.03822543],
       [0.        , 0.01020757, 0.02041514, 0.03062271, 0.04083028],
       [0.0459646 , 0.0450126 , 0.0445254 , 0.0444078 , 0.04397101],
       [0.        , 0.01020757, 0.02041514, 0.03062271, 0.04083028],
       [0.04073982, 0.03970943, 0.03908223, 0.03893663, 0.03863423],
       [0.        , 0.01020757, 0.02041514, 0.03062271, 0.04083028],
       [0.044447  , 0.04263262, 0.04155182, 0.04089662, 0.03975983],
       [0.        , 0.01020757, 0.02041514, 0.03062271, 0.04083028],
       [0.04328781, 0.04163022, 0.04049342, 0.03973183, 0.03865103],
       [0.        , 0.01020757, 0.02041514, 0.03062271, 0.04083028],
       [0.05077498, 0.05019818, 0.04766699, 0.0459478 , 0.045707  ]])

In [None]:
step2 = step1.reshape(step1.shape[0]//2, 2, -1)
step2[:,:, :5]

array([[[0.        , 0.01020757, 0.02041514, 0.03062271, 0.04083028],
        [0.01975111, 0.01915751, 0.01881032, 0.01848552, 0.01798712]],

       [[0.        , 0.01020757, 0.02041514, 0.03062271, 0.04083028],
        [0.04017982, 0.03958623, 0.03882463, 0.03846063, 0.03822543]],

       [[0.        , 0.01020757, 0.02041514, 0.03062271, 0.04083028],
        [0.0459646 , 0.0450126 , 0.0445254 , 0.0444078 , 0.04397101]],

       [[0.        , 0.01020757, 0.02041514, 0.03062271, 0.04083028],
        [0.04073982, 0.03970943, 0.03908223, 0.03893663, 0.03863423]],

       [[0.        , 0.01020757, 0.02041514, 0.03062271, 0.04083028],
        [0.044447  , 0.04263262, 0.04155182, 0.04089662, 0.03975983]],

       [[0.        , 0.01020757, 0.02041514, 0.03062271, 0.04083028],
        [0.04328781, 0.04163022, 0.04049342, 0.03973183, 0.03865103]],

       [[0.        , 0.01020757, 0.02041514, 0.03062271, 0.04083028],
        [0.05077498, 0.05019818, 0.04766699, 0.0459478 , 0.045707  ]]])

In [None]:
# and now, if i want to retrieve a specific column's data
col_data = step2[2, :, :5]
{
    "potential": col_data[0],
    "current": col_data[1]*1e-6 # to convert it from uA to A
}

{'potential': array([0.        , 0.01020757, 0.02041514, 0.03062271, 0.04083028]),
 'current': array([4.5964596e-08, 4.5012600e-08, 4.4525404e-08, 4.4407804e-08,
        4.3971008e-08])}

In [None]:
#| export
def organize_column_data(
    ps_data_lines: list[str] # usually lines[6:-1] of a palmsens .csv file
) -> np.ndarray: # numpy array with shape (N, 2, L) where N is the number of columns, the second axis is [potential, current], and L is length of measurement
    step1 = np.array([line.split(",") for line in ps_data_lines], dtype=float).T # first, lets transpose it. so now it's rows of V1, uA1, V2, uA2, V3, uA3, ...
    step2 = step1.reshape(step1.shape[0]//2, 2, -1)
    return step2

def retrieve_data(
    col_index: int, # index of the column
    organized_data: np.ndarray # output of `organize_column_data`
) -> dict[str, np.ndarray]: # a dict with potential (in volts) and current (in A)
    col_data = organized_data[col_index]
    return {
        "potential": col_data[0],
        "current": col_data[1]*1e-6
    }

Putting it all together,

In [None]:
#| export

def parse_palmsens_file(contents: str) -> list[dict[str, [any]]]:
    """
    Takes the contents of a Palmsens .csv file (usually encoded in utf-16) and returns a list of measurements.
    For instance,
    ```python
    [
        {
            'name': 'Differential Pulse Voltammetry [1]: S49',
            'timestamp': '2025-03-29T13:32:13',
            'potential': array([0.        , 0.01020757, 0.02041514, 0.03062271, ...]),
            'current': array([1.97511120e-08, 1.91575140e-08, 1.88103180e-08, ...])
        },
        {
            'name': 'Differential Pulse Voltammetry [8]: S48',
            'timestamp': '2025-03-29T14:28:03',
            'potential': array([0.        , 0.01020757, 0.02041514, ... ]),
            'current': array([4.0179824e-08, 3.9586228e-08, 3.8824632e-08, ...])
        }
    ]```
    """
    lines = contents.splitlines()

    cols = parse_columns(lines[3])
    timestamps = [ts.isoformat() for ts in parse_measurement_datetimes(lines[4])]

    if not len(cols) == len(timestamps):
        raise ValueError(f"Unexpected mismatch between column count ({len(cols)}) and timestamp count ({len(timestamps)})")
    
    # make sure the data is in the expected format
    if not validate_cols(lines[5]):
        raise ValueError("Expected all measurements to be DPVs or CVs, but columns have unexpected structure.")

    organized_data = organize_column_data(lines[6:-1])

    if not organized_data.shape[0] == len(cols):
        raise ValueError(f"Unexpected mismatch between data column count ({organized_data.shape[0]}) and column count ({len(cols)})")

    data_list = []
    for i, (col, timestamp) in enumerate(zip(cols, timestamps)):
        data_dict = retrieve_data(i, organized_data) # dict with potentiostat and current data
        
        data_list.append({
            "name": col,
            "timestamp": timestamp,
        } | data_dict)
    
    return data_list

Now for a few tests:

In [None]:
with open(os.path.join(test_dpvs_root, "multi_column.csv"), encoding="utf-16") as f:
    contents = f.read()

output = parse_palmsens_file(contents)
assert output[0]["name"] == "Differential Pulse Voltammetry [1]: S49"
assert output[0]["timestamp"] == "2025-03-29T13:32:13"
assert len(output) == 7
assert output[3]["current"][-1] == 7.4046544e-08

In [None]:
with open(os.path.join(test_dpvs_root, "single_column.csv"), encoding="utf-16") as f:
    contents = f.read()

output = parse_palmsens_file(contents)
assert len(output) == 1
assert output[0]["name"] == "Differential Pulse Voltammetry [1]: 31.25uM - DPV i vs E [1]"
assert output[0]["timestamp"] == "2025-02-11T16:04:12"
assert output[0]["potential"][2] == 0.02041514
assert output[0]["current"][0] == 3.4395052e-08
assert output[0]["current"][-1] == 7.5359048e-08

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()