-
Notifications
You must be signed in to change notification settings - Fork 32
/
autoRefresh.R
187 lines (172 loc) · 6.81 KB
/
autoRefresh.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
# This file contains other utilities to make this a more useful package
# If you have an Excel file template and use xlsx to fill in some data and write
# it back to a file (which is a general technique to produce Excel-based reports),
# then it works beautifully except that Excel will not refresh formulae and pivot
# tables when you open the file.
#
# The first problem of forcing formula calculation is easy to solve but the pivot
# table issue is not fixable by Apache POI library. It turns out that
# Excel can refresh pivot tables when you open the file if the pivot cache definition
# XML file has the refreshOnLoad flag set to 1.
# See https://stackoverflow.com/questions/11670816/how-to-refresh-pivot-cache-of-excel-2010-using-open-xml#16624292
#
# This method automates the hack. Basically, unzip the excel file, update
# the pivot cache definition files and writes it back. It's really a hack.
# This hack should be temporary when apache-poi eventually allows adding the
# refreshOnLoad attribute for existing pivot tables in the workbook.
# Make Excel refresh all formula when the file is open
# If output is NULL then overwrite the original file
#' @title Force Refresh Pivot Tables and Formulae
#'
#' @description Functions to force formula calculation or refresh of pivot
#' tables when the Excel file is opened.
#'
#' @details
#' \code{forcePivotTableRefresh} forces pivot tables to be refreshed when the Excel file is opened.
#' \code{forceFormulaRefresh} forces formulae to be recalculated when the Excel file is opened.
#'
#' @param file the path of the source file where formulae/pivot table needs to be refreshed
#' @param output the path of the output file. If it is \code{NULL} then the source file will be overwritten
#' @param verbose Whether to make logging more verbose
#'
#' @return Does not return any results
#'
#' @examples
#' # Patch a file where its pivot tables are not recalculated when the file is opened
#' \dontrun{
#' forcePivotTableRefresh("/tmp/file.xlsx")
#' forcePivotTableRefresh("/tmp/file.xlsx", "/tmp/fixed_file.xlsx")
#' }
#' # Patch a file where its formulae are not recalculated when the file is opened
#' \dontrun{
#' forceFormulaRefresh("/tmp/file.xlsx")
#' forceFormulaRefresh("/tmp/file.xlsx", "/tmp/fixed_file.xlsx")
#' }
#'
#'
#' @author Tom Kwong
#'
#' @export
#' @rdname autoRefresh
forceFormulaRefresh <- function(file, output=NULL, verbose=FALSE) {
# redirect output to source location?
if (is.null(output)) {
output <- file
if (verbose) {
cat(sprintf("Overwriting source file at %s\n", file))
}
}
wb <- loadWorkbook(file)
wb$setForceFormulaRecalculation(TRUE)
saveWorkbook(wb, output)
if (verbose) {
cat(sprintf("Successfully patched file to auto calculate formulae. File saved at %s\n", output))
}
}
#' @export
#' @rdname autoRefresh
forcePivotTableRefresh <- function(file, output=NULL, verbose=FALSE) {
if (!file.exists(file)) {
stop("File does not exist ", file)
}
# redirect output to source location?
if (is.null(output)) {
output <- file
if (verbose) {
cat(sprintf("Overwriting source file at %s\n", file))
}
}
# create a temp directory to hold the unzip'ed Excel content
tmpDir <- tempfile()
dir.create(tmpDir)
#cat(sprintf("Temp directory: %s", tmpDir))
# unzip the excel file
unzip(file, exdir = tmpDir)
# find pivot cache definition files & patch them
pivotTablesPatched <- 0
pivotCacheDir <- file.path(tmpDir, "xl", "pivotCache")
if (dir.exists(pivotCacheDir)) {
pivotCacheFiles <- list.files(path = pivotCacheDir)
pivotCacheDefFiles <- pivotCacheFiles[grepl("pivotCacheDefinition", pivotCacheFiles)]
result <- lapply(pivotCacheDefFiles,
function(defFile) {
# Read pivot cache definition file
workingFile <- file.path(pivotCacheDir, defFile)
text <- readLines(workingFile, warn = FALSE)
# Add refreshOnLoad attribute
text <- gsub('refreshedBy=', 'refreshOnLoad="1" refreshedBy=', text)
# Write back to the file now.
# Excel is particular about the file format... cannot use writeLines
# or else it will be a unix-style (newline, has EOL at last line)
text <- paste(text, collapse = '\r\n')
cat(text, file = workingFile)
})
pivotTablesPatched <- length(result)
}
if (pivotTablesPatched > 0) {
oldwd <- getwd()
setwd(tmpDir)
tmpOutputFile <- paste0(tempfile(), ".xlsx")
status <- zip(tmpOutputFile,
files = list.files(tmpDir, recursive = TRUE, all.files = TRUE),
flags = "-r9q")
setwd(oldwd)
if (status != 0) {
stop(sprintf("Unable to create zip file at %s", tmpOutputFile))
}
if (!file.copy(tmpOutputFile, output, overwrite = TRUE)) {
stop(sprintf("Unable to save file to %s", output))
}
if (verbose) {
cat(sprintf("Successfully patched file to auto refresh pivot tables. File saved at %s\n", output))
}
} else {
if (file != output) { # only make a copy if output is different from orig file
file.copy(file, output)
}
warning(sprintf("This excel file has no pivot table %s", file))
if (verbose) {
cat("Nothing's done\n")
}
}
}
# # Unit testing
# # Run this file from the xlsx project home directory.
# input <- "resources/test_template1_stale.xlsx"
#
# # unit test 1. Take source file output.xlsx and patch & save to output2.xlsx
# cat("Unit test 1\n")
# tmp <- "/tmp/temp.xlsx" # intermediate file for two operations below
# output <- "/tmp/test_pivot_refresh_1.xlsx"
# cat(sprintf("output file: %s\n", output))
# if (file.exists(tmp)) { file.remove(tmp) }
# if (file.exists(output)) { file.remove(output) }
# forcePivotTableRefresh(input, tmp)
# forceFormulaRefresh(tmp, output)
#
# # unit test 2. Make a copy of the source file. Then patch in-place.
# # this usage seems more natural
# cat("Unit test 2\n")
# output <- "/tmp/test_pivot_refresh_2.xlsx"
# cat(sprintf("output file: %s\n", output))
# file.copy(input, output, overwrite = TRUE)
# forcePivotTableRefresh(output)
# forceFormulaRefresh(output)
#
# # unit test 3. Non-verbose.
# cat("Unit test 3 (expect no output)\n")
# output <- "/tmp/test_pivot_refresh_3.xlsx"
# cat(sprintf("output file: %s\n", output))
# file.copy(input, output, overwrite = TRUE)
# forcePivotTableRefresh(output, verbose = FALSE)
# forceFormulaRefresh(output, verbose = FALSE)
#
# cat("Unit test completed!\n")
#
#
# # experiment how to patch Excel file
# # lines <- readLines("/tmp/good_pivot_cache_def_file.xml", warn = FALSE)
# # lines <- gsub('refreshedBy=', 'refreshOnLoad="1" refreshedBy=', lines)
# # lines <- paste(lines, collapse = '\r\n')
# # cat(lines, file = "/tmp/test_pivot_cache_def_file.xml", fill = FALSE)
#