# PMAP = 883812K

In [1]:
library(swat)
library(ggplot2)
library(reshape2)
library(dplyr)

SWAT 1.6.3
Registered S3 methods overwritten by 'ggplot2':
  method         from 
  [.quosures     rlang
  c.quosures     rlang
  print.quosures rlang

Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union



In [2]:
Sys.setenv(CAS_CLIENT_SSL_CA_LIST = "/opt/sas/viya/config/etc/SASSecurityCertificateFramework/cacerts/trustedcerts.pem")
conn <- CAS('frasepviya35smp.cloud.com', 5570)

NOTE: Connecting to CAS and generating CAS action functions for loaded
      action sets...
NOTE: To generate the functions with signatures (for tab completion), set 
      options(cas.gen.function.sig=TRUE).


In [3]:
# Activate metric tracing and other session parameters
cas.sessionProp.setSessOpt(conn, metrics=TRUE, timeout=1800, caslib='casuser')

NOTE: 'CASUSER(viyademo01)' is now the active caslib.
NOTE: Action 'sessionProp.setSessOpt' used (Total process time):
NOTE:       real time               0.000384 seconds
NOTE:       cpu time                0.000372 seconds (96.88%)
NOTE:       total nodes             1 (16 cores)
NOTE:       total memory            125.75G
NOTE:       memory                  280.44K (0.00%)


In [23]:
#' Convert a CAS Table to a R Data Frame (Download)
#'
#' Downloads the in-memory table that is referenced by
#' the CASTable object and stores it as a data.frame
#' in R. This function is used to download datasets from CAS.
#'
#' @param ct The CASTable object to download.
#' @param obs Number of rows to download, by default 32768
#' 
#' @return Returns a data.frame object that contains
#'         a copy of the in-memory data.
#' @export
#' @rawRd % Copyright SAS Institute
#'
#' @examples
#' \dontrun{
#' rdf = to.r.data.frame(CASTable)
#' }
#' 

to.r.data.frame <-  function(ct, obs=32768) {
  if (class(ct) != 'CASTable') {
    stop("The first parameter must be a CASTable object")
  }

  tp = gen.table.parm(ct)
  fv = c(tp$vars, tp$computedVars)
  fv = fv[fv != ""]
  if (sum(nchar(ct@XcomputedVars)))
    for (Xcmp in ct@XcomputedVars)
      if (!(Xcmp %in% ct@computedVars))
        fv = fv[fv != Xcmp]

  if (length(tp$orderby))
    res <- casRetrieve(ct@conn, 'table.fetch', table=tp, fetchVars=fv, index=FALSE, from=1, to=obs, maxRows=1000, sortby=tp$orderby)
  else
    res <- casRetrieve(ct@conn, 'table.fetch', table=tp, fetchVars=fv, index=FALSE, from=1, to=obs, maxRows=1000)

  out <- list()
  for ( i in 1:length(res$results) ) {
    if ( i == 1 ) {
      keyname <- 'Fetch'
    } else {
      keyname <- paste('Fetch', i-1, sep='')
    }
    if ( is.null(res$results[keyname]) ) {
      break
    }
    out[[i]] <- res$results[[keyname]]
  }


  out <- do.call('rbind', out)
  rownames(out) <- NULL

  print("Output object size =")
  print(object.size(out))
  print(dim(out))
  print("CASRetrieve Res object size =")
  print(object.size(res))
  print(length(res))
  print(class(res))

  return( out )
}

In [20]:
class(tblR)

# PMAP =1149728K

In [24]:
Sys.time()
tbl <- defCasTable(conn, tablename="test_data1", caslib = "public")
tblR <- to.r.data.frame(tbl,obs = 1000000000000)
Sys.time()

[1] "2022-01-05 15:14:08 UTC"

NOTE: Executing action 'table.columnInfo'.
NOTE: Action 'table.columnInfo' used (Total process time):
NOTE:       real time               0.000901 seconds
NOTE:       cpu time                0.000840 seconds (93.23%)
NOTE:       total nodes             1 (16 cores)
NOTE:       total memory            125.75G
NOTE:       memory                  842.06K (0.00%)
NOTE: Executing action 'table.fetch'.
NOTE: Action 'table.fetch' used (Total process time):
NOTE:       real time               0.595499 seconds
NOTE:       cpu time                0.595500 seconds (100.00%)
NOTE:       total nodes             1 (16 cores)
NOTE:       total memory            125.75G
NOTE:       memory                  4.63M (0.00%)


[1] "Output object size ="
16074744 bytes
[1] 100000     11
[1] "CASRetrieve Res object size ="
86903184 bytes
[1] 5
[1] "list"


[1] "2022-01-05 15:14:10 UTC"

In [9]:
object.size(tblR)

16074744 bytes

# PMAP = 1289200K  (linux command used : sudo pmap <R PID> | grep total)

In [5]:
# 1 149 724K
Sys.time()
tbl <- defCasTable(conn, tablename="test_data2", caslib = "public")
tblR <- to.r.data.frame(tbl,obs = 1000000000000)
Sys.time()
# object size : 99531240 bytes
# 2 385 672 K

[1] "2022-01-05 14:53:52 UTC"

NOTE: Executing action 'table.columnInfo'.
NOTE: Action 'table.columnInfo' used (Total process time):
NOTE:       real time               0.001820 seconds
NOTE:       cpu time                0.001378 seconds (75.71%)
NOTE:       total nodes             1 (16 cores)
NOTE:       total memory            125.75G
NOTE:       memory                  1.18M (0.00%)
NOTE: Executing action 'table.fetch'.
NOTE: Action 'table.fetch' used (Total process time):
NOTE:       real time               10.158848 seconds
NOTE:       cpu time                6.085815 seconds (59.91%)
NOTE:       total nodes             1 (16 cores)
NOTE:       total memory            125.75G
NOTE:       memory                  32.94M (0.03%)


[1] "2022-01-05 14:54:08 UTC"

# PMAP = 2418656K (linux command used : sudo pmap <R PID> | grep total)

In [13]:
Sys.time()
tbl <- defCasTable(conn, tablename="test_data3", caslib = "public")
tblR <- to.r.data.frame(tbl,obs = 1000000000000)
Sys.time()

[1] "2022-01-05 14:19:38 UTC"

NOTE: Executing action 'table.columnInfo'.
NOTE: Action 'table.columnInfo' used (Total process time):
NOTE:       real time               0.001148 seconds
NOTE:       cpu time                0.001048 seconds (91.29%)
NOTE:       total nodes             1 (16 cores)
NOTE:       total memory            125.75G
NOTE:       memory                  1.17M (0.00%)
NOTE: Executing action 'table.fetch'.
NOTE: Action 'table.fetch' used (Total process time):
NOTE:       real time               109.366772 seconds
NOTE:       cpu time                60.876181 seconds (55.66%)
NOTE:       total nodes             1 (16 cores)
NOTE:       total memory            125.75G
NOTE:       memory                  311.06M (0.24%)


[1] "2022-01-05 14:22:10 UTC"

# PMAP = 14769484K (linux command used : sudo pmap <R PID> | grep total)

In [6]:
object.size(tblR)
# 3,618,733,968 bytes

99531240 bytes

In [12]:
cas.session.endSession(conn)

[1] "ERROR: Connection failed. Server returned: Session reconnect failed: Could not find the specified session."


ERROR: Error in swat::errorcheck(sw_connection): An error occurred while sending request.


In [9]:
# pmap : 1 149 732 Kilobytes
Sys.time()
tbl <- defCasTable(conn, tablename="test_data2", caslib = "public")
tblR <- to.casDataFrame(tbl,obs = 1000000000000)
Sys.time()

# pmap : 2 322 608 Kilobytes
# tblR object size = 262 663 752 Kilobytes

In [9]:
# pmap : 1 149 732 Kilobytes
Sys.time()
tbl <- defCasTable(conn, tablename="test_data2", caslib = "public")
tblR <- to.casDataFrame(tbl,obs = 1000000000000)
Sys.time()

# pmap : 2 322 608 Kilobytes
# tblR object size = 262 663 752 Kilobytes

In [8]:
object.size(to.r.data.frame)

196944 bytes

In [7]:
ls(.GlobalEnv)