/
read_iris.R
114 lines (109 loc) · 3.69 KB
/
read_iris.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#' @title Read 13C data from IRIS/Wagner Analysen
#'
#' @description Reads composite files with 13C data from IRIS/Wagner Analysen.
#' The composite files start as follows:
#' \preformatted{
#' "Testergebnis"
#' "Nummer","1330"
#' "Datum","10.10.2013"
#' "Testart"}
#'
#' @param filename name of IRIS/Wagner file in composite format
#' @param text alternatively, text can be given as string
#' @return List of class \code{\link{breathtest_data}} with
#' \code{file_name, patient_name, patient_first_name,
#' test, identifikation}, and data frame \code{data} with \code{time}
#' and \code{dob}
#' @examples
#' filename = btcore_file("IrisMulti.TXT")
#' cat(readLines(filename, n = 10), sep="\n")
#' #
#' iris_data = read_iris(filename)
#' str(iris_data)
#' @export read_iris
read_iris = function(filename = NULL, text = NULL) {
if (is.null(text)) {
if (!file.exists(filename))
stop(paste0("file ", filename, " does not exist."))
text = readLines(filename, encoding = "latin1")
} else {
filename = 'from text'
}
# check if this is the right format
header = str_trim(text[1])
if (header != "\"Testergebnis\"")
stop(paste0(header,
"\nis not from a valid IRIS/Wagner data. First line should be <<Testergebnis>>"
)
)
data_row = which(str_detect(text, "Daten"))
if (length(data_row) == 0)
stop("File does not contain data")
record_date = find_pattern(text, "Datum")
record_date = strptime(record_date, "%d.%m.%y")
# try if there is a patient number. If not, try identification
patient_id = try(find_pattern(text, "Patient"), silent = TRUE)
if (is(patient_id, "try-error"))
patient_id = find_pattern(text, "Identifikation")
test_no = as.integer(find_pattern(text, "Nummer"))
substrate = find_pattern(text, "Substrat")
gender = find_pattern(text, "Geschlecht")
if (nchar(gender) > 0) {
gender = str_sub(tolower(gender), 1, 1)
if (gender != "m")
gender = "f" # make sure to avoid German names
}
dose = as.numeric(find_pattern(text, "Dosis"))
# workaround for "Groesse" (with umlauts and scharf-s) and utf
height = as.numeric(find_pattern(text, "Gr.*e.*", TRUE)) * 100
weight = as.numeric(find_pattern(text, "Gewicht.*", TRUE))
test = find_pattern(text, "Abk.*rzung")
# there are multiple "name" fields; skip the first
name = find_pattern(text[-(1:14)], "Name")
first_name = find_pattern(text, "Vorname")
initials = NA
if (nchar(name) > 0 && nchar(first_name) > 0)
initials = paste0(str_sub(name, 1, 1),
str_sub(first_name, 1, 1))
tc = textConnection(text[-(1:data_row)])
data = utils::read.csv(tc)
close(tc)
data = try(data[, c("Testzeit..min.",
"DOB..o.oo.",
"Atom.ppm.Excess.13C..ppm.")])
data = try(data[, c("Testzeit..min.", "DOB..o.oo.")])
if (inherits(data, "try-error"))
stop("invalid data columns in IRIS/Wagner data file")
names(data) = c("time", "dob")
# remove too small values
data = data[data$dob >= -10, ]
breathtest_data(
file_name = basename(filename),
patient_id = patient_id,
name = name,
first_name = first_name,
initials = initials,
test_no = test_no,
dose = dose,
study = test,
record_date = record_date,
device = "Iris",
height = height,
weight = weight,
substrate = substrate,
data = data
)
}
find_pattern = function(bid, pattern, required = TRUE) {
p = str_match(bid, paste0('\\"', pattern, '\\",\\s*\\"(.*)\\"'))[, 2]
p = p[!is.na(p)]
if (length(p) > 1)
stop(paste0("No unique <<", pattern, ">> in Iris file"))
if (length(p) == 0) {
if (required)
stop(paste0("No <<" , pattern, ">> found in Iris file "))
else
p = ""
}
return(str_trim(p))
}