-
Notifications
You must be signed in to change notification settings - Fork 10
/
sampleCSV.R
56 lines (49 loc) · 1.27 KB
/
sampleCSV.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#
# Should handle a connection, but we'll generalize that later.
# That will illustrate how we can compile down to C code in R.
#
## cat(1:1e8, sep = "\n", file = "sample.csv")
#
# i = runif(10)*1e8
# sampleLines("sample.csv", whichLines = i, header = FALSE)
#
sampleLines <-
#
# input the file to read from. We'll implement connections later.
# whichLines vector of indices. We might want to sample the next line one at a time
# to avoid building the entire vector.
# totalLines is the total number of observations in the file.
# We'll compute this via a system call to wc or with an efficient R function that
# we'll also compile.
#
function(input, N,
whichLines = sample(1:totalLines, N),
totalLines = getTotalLines(input),
header = TRUE)
{
ans = character(length(whichLines))
con = file(input, "r")
on.exit(con)
if(header)
readLines(con, 1)
lineNums = sort(whichLines)
offsets = diff(lineNums)
ans[1] = readUpTo(con, lineNums[1])
for(i in seq(along = offsets))
ans[i + 1] = readUpTo(con, offsets[i])
ans
}
readUpTo =
function(con, to)
{
readLines(con, to)[to]
}
f.readUpTo =
function(con, to)
{
while(ctr <= to) {
ans = readLine(con, 1)
ctr <- ctr + 1
}
ans
}