#> 8 8 1 8 44 F 2020-07-21 1998-08-22 05:23:24#> 9 9 1 9 51 F 1967-05-25 2003-01-03 22:09:29#> 10 10 1 10 40 M 2044-03-11 1996-04-19 11:10:12
-#> # ... with 20 more rows, and 4 more variables: CONTINUOUS <dbl>,
+#> # … with 20 more rows, and 4 more variables: CONTINUOUS <dbl>,#> # CATEGORICAL <fct>, LOGICAL <lgl>, CHARACTER <chr>diffdf( test_data , test_data)
@@ -157,7 +161,7 @@
Missing Rows
test_data2 <-test_data
-test_data2 <-test_data2[1:(nrow(test_data2) -2),]
+test_data2 <-test_data2[1:(nrow(test_data2) -2),]
diffdf(test_data, test_data2 , suppress_warnings = T)
#> Differences found between the objects!#>
@@ -233,8 +237,8 @@
Different Labels
test_data2 <-test_data
-attr(test_data$ID , "label") <- "This is a interesting label"
-attr(test_data2$ID , "label") <- "what do I type here?"
+attr(test_data$ID , "label") <- "This is a interesting label"
+attr(test_data2$ID , "label") <- "what do I type here?"diffdf(test_data , test_data2 , suppress_warnings = T)
#> Differences found between the objects!#>
@@ -253,7 +257,7 @@
Different Factor Levels
test_data2 <-test_data
-levels(test_data2$CATEGORICAL) <-c(1,2,3)
+levels(test_data2$CATEGORICAL) <-c(1,2,3)
diffdf(test_data , test_data2 , suppress_warnings = T)
#> Differences found between the objects!#>
@@ -301,8 +305,8 @@
Grouping Variables
A key feature of diffdf that enables easier diagnostics is the ability to specify which variables form a unique row i.e. which rows should be compared against each other based upon a key. By default if no key is specified diffdf will use the row numbers as the key however in general this isn’t recommended as it means two identical datasets simply sorted differently can lead to incomprehensible error messages as every observation is flagged as different. In diffdf keys can be specified as character vectors using the keys argument.
test_data2 <-test_data
-test_data2$INTEGER[c(5,2,15)] <-99L
-diffdf( test_data , test_data2 , keys =c("GROUP1" , "GROUP2") , suppress_warnings = T)
+test_data2$INTEGER[c(5,2,15)] <-99L
+diffdf( test_data , test_data2 , keys =c("GROUP1" , "GROUP2") , suppress_warnings = T)
#> Differences found between the objects!#> #> A summary is given below.
@@ -334,7 +338,7 @@
You can use the tolerance argument of diffdf to define how sensitive the comparison should be to decimal place inaccuracies. This important as very often floating point numbers will not compare equal due to machine rounding as they cannot be perfectly represented in binary. By default tolerance is set to sqrt(.Machine$double.eps)
You can use the tolerance argument of diffdf to define how sensitive the comparison should be to decimal place inaccuracies. This important as very often floating point numbers will not compare equal due to machine rounding as they cannot be perfectly represented in binary. By default tolerance is set to sqrt(.Machine$double.eps)
+
dsin1 <-data.frame(x =1.1e-06)
+dsin2 <-data.frame(x =1.1e-07)
diffdf(dsin1 , dsin2 , suppress_warnings = T)
#> Differences found between the objects!
@@ -418,8 +422,8 @@
Strictness
By default, the function will note a difference between integer and double columns, and factor and character columns. It can be useful in some contexts to prevent this from occuring. We can do so with the strict_numeric = FALSE and strict_factor = FALSE arguments.
dsin1 <-data.frame(x =as.integer(c(1,2,3)))
+dsin2 <-data.frame(x =as.numeric(c(1,2,3)))
diffdf(dsin1 , dsin2 , suppress_warnings = T)
#> Differences found between the objects!
@@ -438,8 +442,8 @@
#> NOTE: Variable x in base was casted to numeric#> No issues were found!
-dsin1 <-data.frame(x =as.character(c(1,2,3)), stringsAsFactors =FALSE)
-dsin2 <-data.frame(x =as.factor(c(1,2,3)))
+dsin1 <-data.frame(x =as.character(c(1,2,3)), stringsAsFactors =FALSE)
+dsin2 <-data.frame(x =as.factor(c(1,2,3)))
diffdf(dsin1 , dsin2 , suppress_warnings = T)
#> Differences found between the objects!
@@ -491,9 +495,8 @@