Skip to content

Commit

Permalink
update binary to source
Browse files Browse the repository at this point in the history
  • Loading branch information
sebkopf committed Jul 27, 2023
1 parent 1b3d79f commit 53fc1f2
Show file tree
Hide file tree
Showing 9 changed files with 244 additions and 245 deletions.
3 changes: 1 addition & 2 deletions R/isoread.R
Original file line number Diff line number Diff line change
Expand Up @@ -683,9 +683,8 @@ read_iso_file <- function(

}

# cleanup any binary and source content depending on debug setting
# cleanup source content depending on debug setting
if (!default(debug)) {
iso_file$binary <- NULL # @FIXME: binary should be renamed to source throughout
iso_file$source <- NULL
iso_file$temp <- NULL
}
Expand Down
26 changes: 13 additions & 13 deletions R/isoread_caf.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ iso_read_caf <- function(ds, options = list()) {
stop("data structure must be a 'dual_inlet' iso_file", call. = FALSE)

# read binary file
ds$binary <- get_ds_file_path(ds) |> read_binary_isodat_file()
ds$source <- get_ds_file_path(ds) |> read_binary_isodat_file()

# process file info
if(ds$read_options$file_info) {
Expand Down Expand Up @@ -44,19 +44,19 @@ iso_read_caf <- function(ds, options = list()) {
extract_caf_raw_voltage_data <- function(ds) {

# locate masses
ds$binary <- ds$binary |>
ds$source <- ds$source |>
set_binary_file_error_prefix("cannot identify measured masses") |>
move_to_C_block_range("CResultData", "CEvalDataIntTransferPart")

# read all masses
masses_re <- re_combine(re_x_000(), re_text_x(), re_unicode("rIntensity"))
masses <-
tibble(
pos = find_next_patterns(ds$binary, masses_re) + masses_re$size,
pos = find_next_patterns(ds$source, masses_re) + masses_re$size,
# capture cup and mass
data = map(.data$pos, function(pos) {
capture <-
ds$binary |>
ds$source |>
move_to_pos(pos) |>
capture_data_till_pattern("cup", "text", re_text_x(), data_bytes_max = 8, move_past_dots = TRUE) |>
move_to_next_pattern(re_unicode("rIntensity "), max_gap = 0L) |>
Expand All @@ -72,32 +72,32 @@ extract_caf_raw_voltage_data <- function(ds) {
)

# locate voltage data
ds$binary <- ds$binary |>
ds$source <- ds$source |>
set_binary_file_error_prefix("cannot locate voltage data") |>
move_to_C_block_range("CDualInletRawData", "CResultData")

# find binary positions for voltage standards and samples
standard_block_start <- find_next_pattern(
ds$binary, re_combine(re_unicode("Standard Block"), re_null(4), re_x_000()))
ds$source, re_combine(re_unicode("Standard Block"), re_null(4), re_x_000()))
sample_block_start <- find_next_pattern(
ds$binary, re_combine(re_unicode("Sample Block"), re_null(4), re_x_000()))
ds$source, re_combine(re_unicode("Sample Block"), re_null(4), re_x_000()))

# safety checks
if (is.null(standard_block_start) || is.null(sample_block_start) ||
standard_block_start > sample_block_start) {
iso_source_file_op_error(ds$binary, "cannot find standard and sample voltage data blocks at expected positions")
iso_source_file_op_error(ds$source, "cannot find standard and sample voltage data blocks at expected positions")
}

# read voltage data
ds$binary <- set_binary_file_error_prefix(ds$binary, "cannot process voltage data")
ds$source <- set_binary_file_error_prefix(ds$source, "cannot process voltage data")

# right before this sequence there is a 4 byte sequence that could be a date, the last block is the # of masses
read_blocks_re <- re_combine(re_null(4), re_block("etx"), re_x_000())
positions <- find_next_patterns(ds$binary, read_blocks_re)
positions <- find_next_patterns(ds$source, read_blocks_re)

# function to capture voltages
capture_voltages <- function(pos) {
bin <- ds$binary |>
bin <- ds$source |>
move_to_pos(pos - 4) |>
capture_n_data("n_masses", "integer", n = 1)

Expand Down Expand Up @@ -132,7 +132,7 @@ extract_caf_raw_voltage_data <- function(ds) {

# safety check
if (any(notok <- is.na(voltages$column))) {
iso_source_file_op_error(ds$binary, glue("inconsistent cup designations: {collapse(voltages$cup[notok], ', ')}"))
iso_source_file_op_error(ds$source, glue("inconsistent cup designations: {collapse(voltages$cup[notok], ', ')}"))
}

# voltages data frame
Expand All @@ -149,7 +149,7 @@ extract_caf_raw_voltage_data <- function(ds) {
extract_caf_vendor_data_table <- function(ds) {

# reset navigation
ds$binary <- reset_binary_file_navigation(ds$binary)
ds$source <- reset_binary_file_navigation(ds$source)

# get data table
extracted_dt <-
Expand Down
26 changes: 13 additions & 13 deletions R/isoread_cf.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ iso_read_cf <- function(ds, options = list()) {
stop("data structure must be a 'continuous_flow' iso_file", call. = FALSE)

# read binary file
ds$binary <- get_ds_file_path(ds) |> read_binary_isodat_file()
ds$source <- get_ds_file_path(ds) |> read_binary_isodat_file()

# process file info
if(ds$read_options$file_info) {
Expand Down Expand Up @@ -53,51 +53,51 @@ iso_read_cf <- function(ds, options = list()) {
# extract voltage data in cf file
extract_cf_raw_voltage_data <- function(ds) {
# move to beginning of intensity information (the larger block coming
ds$binary <- ds$binary |>
ds$source <- ds$source |>
set_binary_file_error_prefix("cannot identify measured masses") |>
# can have data in multiple positions (e.g. if peak jumping) throughout the rest of the binary
move_to_C_block("CRawDataScanStorage", reset_cap = TRUE)

# get trace positions
gas_positions <- ds$binary |>
gas_positions <- ds$source |>
find_next_patterns(re_text_0(), re_text_x(), re_unicode("Trace Data "), re_block("text"), re_null(4), re_block("stx"))

# raw_data
raw_data <- tibble::tibble()

# loop through gas positions
for (gas_pos in gas_positions) {
ds$binary <- ds$binary |>
ds$source <- ds$source |>
move_to_pos(gas_pos) |>
skip_pos(30) |>
capture_data_till_pattern("gas", "text", re_null(4), re_block("stx"))

gas_config <- ds$binary$data$gas
gas_config <- ds$source$data$gas

# data start
data_start_re <- re_combine(
re_block("stx"), re_text_0(), re_block("stx"),
re_direct(".{4}", size = 4, label = ".{4}"))
ds$binary <- ds$binary |> move_to_next_pattern(data_start_re)
data_start <- ds$binary$pos
ds$source <- ds$source |> move_to_next_pattern(data_start_re)
data_start <- ds$source$pos

# find all masses at end of data
data_end_re <- re_combine(
re_direct(".{2}", size = 2, label = ".{2}"), re_block("stx"),
re_text_0(), re_block("stx"), re_null(4))
ds$binary <- ds$binary |> move_to_next_pattern(data_end_re)
data_end <- ds$binary$pos - data_end_re$size
ds$source <- ds$source |> move_to_next_pattern(data_end_re)
data_end <- ds$source$pos - data_end_re$size

mass_re <- re_combine(re_text_x(), re_unicode("Mass "))
mass_positions <- ds$binary |>
mass_positions <- ds$source |>
cap_at_next_pattern(re_unicode("MS/Clock")) |>
find_next_patterns(mass_re)

masses <- c()
for (pos in mass_positions) {
# a bit tricky to capture but this should do the trick reliably
raw_mass <-
ds$binary |> move_to_pos(pos + mass_re$size) |>
ds$source |> move_to_pos(pos + mass_re$size) |>
capture_data_till_pattern("mass", "raw", re_text_x(), ignore_trailing_zeros = FALSE) |>
purrr::pluck("data", "mass")
text_mass <- parse_raw_data(grepRaw("^([0-9]\\x00)+", raw_mass, value = TRUE), type = "text")
Expand All @@ -112,10 +112,10 @@ extract_cf_raw_voltage_data <- function(ds) {
if (n_data_points %% 1 > 0)
stop("number of data points for ", gas_config, " is not an integer (", n_data_points, ")", call. = FALSE)

ds$binary<- ds$binary |>
ds$source<- ds$source |>
move_to_pos(data_start) |>
capture_n_data("voltages", c("float", rep("double", length(masses))), n_data_points)
voltages <- bind_rows(ds$binary$data$voltages |> dplyr::as_tibble() |> rlang::set_names(c("time.s", masses_columns)))
voltages <- bind_rows(ds$source$data$voltages |> dplyr::as_tibble() |> rlang::set_names(c("time.s", masses_columns)))

# check for data
if (nrow(voltages) == 0)
Expand Down
48 changes: 24 additions & 24 deletions R/isoread_did.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ iso_read_did <- function(ds, options = list()) {
stop("data structure must be a 'dual_inlet' iso_file", call. = FALSE)

# read binary file
ds$binary <- get_ds_file_path(ds) |> read_binary_isodat_file()
ds$source <- get_ds_file_path(ds) |> read_binary_isodat_file()

# process file info
if(ds$read_options$file_info) {
Expand Down Expand Up @@ -39,16 +39,16 @@ iso_read_did <- function(ds, options = list()) {
extract_did_raw_voltage_data <- function(ds) {

# mass information
ds$binary <- ds$binary |>
ds$source <- ds$source |>
set_binary_file_error_prefix("cannot identify measured masses") |>
move_to_C_block("CBinary") |>
move_to_next_C_block_range("CTraceInfoEntry", "CPlotRange")

# read all masses
masses_re <- re_combine(re_text_x(), re_unicode("Mass "))
masses_positions <- find_next_patterns(ds$binary, masses_re)
masses_positions <- find_next_patterns(ds$source, masses_re)
masses <- map_chr(masses_positions, function(pos) {
ds$binary |>
ds$source |>
move_to_pos(pos + masses_re$size) |>
capture_data_till_pattern("mass", "text", re_or(re_text_x(), re_block("C-block")),
data_bytes_max = 8, move_past_dots = FALSE) |>
Expand All @@ -59,7 +59,7 @@ extract_did_raw_voltage_data <- function(ds) {
masses_columns <- str_c("v", masses, ".mV")

# locate voltage data
ds$binary <- ds$binary |>
ds$source <- ds$source |>
set_binary_file_error_prefix("cannot locate voltage data") |>
move_to_C_block_range("CDualInletRawData", "CTwoDoublesArrayData") |>
move_to_next_C_block("CIntegrationUnitTransferPart") |>
Expand All @@ -68,14 +68,14 @@ extract_did_raw_voltage_data <- function(ds) {
# find binary positions for voltage standards and samples
voltages <- list()
standard_voltage_start_re <- re_combine(re_unicode("/"), re_text_x(), re_text_x(), re_unicode("Standard "))
standard_positions <- find_next_patterns(ds$binary, standard_voltage_start_re)
standard_positions <- find_next_patterns(ds$source, standard_voltage_start_re)
sample_voltage_start_re <- re_combine(re_unicode("/"), re_text_0(), re_text_x(), re_unicode("Sample "))
sample_positions <- find_next_patterns(ds$binary, sample_voltage_start_re)
sample_positions <- find_next_patterns(ds$source, sample_voltage_start_re)

# function to capture voltages
capture_voltages <- function(pos) {

bin <- ds$binary |>
bin <- ds$source |>
move_to_pos(pos) |>
capture_data_till_pattern("cycle", "text", re_null(4), re_block("stx"), move_past_dots = TRUE) |>
move_to_next_pattern(re_unicode("/"), re_text_0(), re_text_0(), re_null(4), re_block("stx")) |>
Expand Down Expand Up @@ -119,38 +119,38 @@ extract_did_raw_voltage_data <- function(ds) {
extract_did_vendor_data_table <- function(ds) {

# find vendor data table
ds$binary <- ds$binary |>
ds$source <- ds$source |>
set_binary_file_error_prefix("cannot process vendor computed data table") |>
move_to_C_block_range("CDualInletEvaluatedData", "CParsedEvaluationString")

# cap
if (!is.null(pos <- find_next_pattern(ds$binary, re_unicode("Gas Indices")))) {
ds$binary <- ds$binary |> cap_at_pos(pos - 20)
} else iso_source_file_op_error(ds$binary, "cannot find data deliminter 'Gas Indices'")
if (!is.null(pos <- find_next_pattern(ds$source, re_unicode("Gas Indices")))) {
ds$source <- ds$source |> cap_at_pos(pos - 20)
} else iso_source_file_op_error(ds$source, "cannot find data deliminter 'Gas Indices'")

# find data positions
column_header_re <- re_combine(re_block("etx"), re_unicode("/"), re_text_x(), re_block("text"), # Delta or AT%
re_text_x(), re_block("text"), # actual column name
re_null(4), re_block("stx"))
column_data_re <- re_combine(re_unicode("/"), re_text_0(), re_text_x(), re_block("text"), re_null(4),
re_x_000(), re_x_000()) # data comes after this
column_header_positions <- find_next_patterns(ds$binary, column_header_re)
column_data_positions <- find_next_patterns(ds$binary, column_data_re)
column_header_positions <- find_next_patterns(ds$source, column_header_re)
column_data_positions <- find_next_patterns(ds$source, column_data_re)

# safety checks
if (length(column_header_positions) == 0) {
iso_source_file_op_error(ds$binary, "no column headers found")
iso_source_file_op_error(ds$source, "no column headers found")
} else if (length(column_header_positions) != length(column_data_positions)) {
iso_source_file_op_error(ds$binary, sprintf("unequal number of column headers (%d) and data entries (%d) found",
iso_source_file_op_error(ds$source, sprintf("unequal number of column headers (%d) and data entries (%d) found",
length(column_header_positions), length(column_data_positions)))
} else if (!all(column_header_positions < column_data_positions)) {
iso_source_file_op_error(ds$binary, "found column headers not interspersed with data entries")
iso_source_file_op_error(ds$source, "found column headers not interspersed with data entries")
}

# read the data
vendor_dt <- list()
for (i in 1:length(column_header_positions)) {
ds$binary <- ds$binary |>
ds$source <- ds$source |>
move_to_pos(column_header_positions[i] + 10) |> # skip initial <stx>/<fef-x> at the start of header
# capture column type (typically Delta or AT%) # could skip this to speed up
capture_data_till_pattern("type", "text", re_text_x(), move_past_dots = TRUE) |>
Expand All @@ -163,16 +163,16 @@ extract_did_vendor_data_table <- function(ds) {
capture_data_till_pattern("values", "double", re_text_0(), re_block("stx"), sensible = c(-1e10, 1e10))

# safety check
if (length(ds$binary$data$values) != 2 * ds$binary$data$n_values)
glue::glue("inconsistent number of data entries recovered ({length(ds$binary$data$values)}) - ",
"expected {2 * ds$binary$data$n_values} values from {ds$binary$data$n_values} cycles") |>
if (length(ds$source$data$values) != 2 * ds$source$data$n_values)
glue::glue("inconsistent number of data entries recovered ({length(ds$source$data$values)}) - ",
"expected {2 * ds$source$data$n_values} values from {ds$source$data$n_values} cycles") |>
stop(call. = FALSE)

table_column <- list(
list(
cycle = as.integer(ds$binary$data$values[c(TRUE, FALSE)] + 1L),
value = ds$binary$data$values[c(FALSE, TRUE)]
)) |> rlang::set_names(str_replace(ds$binary$data$column, "\\s*$", "")) # remove trailing white spaces in column names
cycle = as.integer(ds$source$data$values[c(TRUE, FALSE)] + 1L),
value = ds$source$data$values[c(FALSE, TRUE)]
)) |> rlang::set_names(str_replace(ds$source$data$column, "\\s*$", "")) # remove trailing white spaces in column names
vendor_dt <- c(vendor_dt, table_column)
}

Expand Down
Loading

0 comments on commit 53fc1f2

Please sign in to comment.