In [1]:
library('base64enc');
library('stringr');
library('dplyr');
library('tidyr');
library('ggplot2');
library('stringr');
library('kimisc');
library(readr)
library(knitr)
library(kableExtra)
library(IRdisplay)
library(hrbrthemes)
library(extrafont)

extrafont::loadfonts()


options(repr.matrix.max.rows=600, repr.matrix.max.cols=200)

extractExceptionName <- function(firstLine){
    ret <- str_match(firstLine, "([^:]+)")
    ret[1]
}
decodeTrace <- function(enc) {
    rawToChar(base64decode(as.character(enc)))
}
convertTestName <- function(t){
    parts <- str_match(t,"(.+)\\.(.+)")
    paste(parts[,2],parts[,3],sep="#")
}


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union



Attaching package: ‘kableExtra’


The following object is masked from ‘package:dplyr’:

    group_rows


Registering fonts with R

Adobe Caslon Pro Bold already registered with pdfFonts().

Adobe Caslon Pro already registered with pdfFonts().

DejaVu Sans already registered with pdfFonts().

DejaVu Sans Mono already registered with pdfFonts().

DejaVu Serif already registered with pdfFonts().

Droid Sans Fallback already registered with pdfFonts().

FontAwesome already registered with pdfFonts().

GLYPHICONS Halflings already registered with pdfFonts().

Lato Black already registered with pdfFonts().

Lato already registered with pdfFonts().

Lato Hairline already registered with pdfFonts().

Lato Heavy already registered with pdfFonts().

Lato Light already registered with pdfFonts().



In [2]:
# Read in source data
allReproResults <- read_csv("repro-matches.csv") %>%
    mutate_if(is.numeric, ~ replace(., is.na(.), 0))

# tests that failed persistently in isolated rerun, we could not get them to appear "flaky" (just persistent fails)
skippedTests <- c("ch.qos.logback.core.recovery.ResilientOutputStreamTest#verifyRecuperationAfterFailure", "ch.qos.logback.core.util.FileUtilTest#basicCopyingWorks", "org.springframework.boot.cli.command.CommandRunnerIntegrationTests#debugAddsAutoconfigReport", "org.springframework.boot.cli.DirectorySourcesIntegrationTests#runDirectory", "org.springframework.boot.cli.JarCommandIT#jarCreation", "org.springframework.boot.cli.JarCommandIT#jarCreationWithGrabResolver", "org.springframework.boot.cli.JarCommandIT#jarCreationWithIncludes", "org.springframework.boot.cli.ReproIntegrationTests#grabAntBuilder", "org.springframework.boot.cli.ReproIntegrationTests#securityDependencies", "org.springframework.boot.cli.ReproIntegrationTests#shellDependencies", "org.springframework.boot.cli.SampleIntegrationTests#actuatorSample", "org.springframework.boot.cli.SampleIntegrationTests#beansSample", "org.springframework.boot.cli.SampleIntegrationTests#deviceSample", "org.springframework.boot.cli.SampleIntegrationTests#httpSample", "org.springframework.boot.cli.SampleIntegrationTests#integrationSample", "org.springframework.boot.cli.SampleIntegrationTests#jobWebSample", "org.springframework.boot.cli.SampleIntegrationTests#reactorSample", "org.springframework.boot.cli.SampleIntegrationTests#templateSample", "org.springframework.boot.cli.SampleIntegrationTests#uiSample", "org.springframework.boot.cli.SampleIntegrationTests#webSample", "org.springframework.boot.cli.TestCommandIntegrationTests#integrationAutoConfigEmbeddedTest", "org.springframework.boot.cli.TestCommandIntegrationTests#integrationAutoConfigTest", "org.springframework.boot.gradle.ClassifierTests#classifierInBootExtension", "org.springframework.boot.gradle.ClassifierTests#classifierInBootTask", "org.springframework.boot.gradle.CustomVersionManagementTests#exclusionsAreStillInPlace", "org.springframework.boot.gradle.FlatdirTests#flatdir", "org.springframework.boot.gradle.InstallTests#cleanInstall", "org.springframework.boot.gradle.InstallTests#cleanInstallApp", "org.springframework.boot.gradle.InstallTests#cleanInstallVersionManagement", "org.springframework.boot.gradle.MainClassTests#buildFromRunTask", "org.springframework.boot.gradle.MultiProjectRepackagingTests#repackageWithCommonFileDependency", "org.springframework.boot.gradle.MultiProjectRepackagingTests#repackageWithTransitiveFileDependency", "org.springframework.boot.gradle.NoJarTests#nojar", "org.springframework.boot.gradle.RepackagingTests#repackageWithFileDependency", "org.springframework.boot.gradle.RepackagingTests#repackagingDisabled", "org.springframework.boot.gradle.RepackagingTests#repackagingDisabledWithCustomRepackagedJar", "org.springframework.boot.gradle.RepackagingTests#repackagingDisabledWithCustomRepackagedJarUsingStringJarTaskReference", "org.springframework.boot.gradle.RepackagingTests#repackagingEnabled", "org.springframework.boot.gradle.RepackagingTests#repackagingEnabledWithCustomRepackagedJar", "org.springframework.boot.gradle.RepackagingTests#repackagingEnableWithCustomRepackagedJarUsingStringJarTaskReference", "org.springframework.boot.gradle.SpringLoadedTests#defaultJvmArgsArePreservedWhenLoadedAgentIsConfigured", "org.springframework.boot.gradle.SpringLoadedTests#springLoadedCanBeUsedWithGradle16", "org.springframework.boot.gradle.WarPackagingTests#onlyJettyIsPackackedInWebInfLibProvided", "org.springframework.boot.gradle.WarPackagingTests#onlyTomcatIsPackackedInWebInfLibProvided", "sample.data.gemfire.SampleDataGemFireApplicationTests#testGemstonesApp", "sample.data.redis.SampleRedisApplicationTests#testDefaultSettings", "sample.liquibase.SampleLiquibaseApplicationTests#testDefaultSettings", "sample.parent.consumer.SampleIntegrationParentApplicationTests#testVanillaExchange")

matchedFailures <- read_csv("matched-failures.csv",
    col_types = "ccccccdddddd"
) %>%
    mutate(failingException = extractExceptionName(failureMessage)) %>%
    filter(failureMessage != "SleepyTimeOut(ProbableDeadlock)") %>%
    mutate_if(is.numeric, ~ replace(., is.na(.), 0))

# Join the repro results with the original matched failure results
reproResults <- matchedFailures %>%
    left_join(allReproResults,
        by = c("test" = "test", "failureID" = "traceHash"),
        suffix = c(".matchedFailures", "")
    ) %>%
    mutate(count.FlakeRake = case_when(flakerake == 0 & count.FlakeRake > 0 ~ 0, TRUE ~ count.FlakeRake)) %>% # NAs to 0's
    filter((rerun > 0) & (flakerake > 0 | isolatedRerun > 2 | flakeFlaggerRepl > 2)) # Filter to only the failures we care about: flakerake or isolated rerun reproducing an original rerun failure

reproByFailure <- reproResults %>%
    group_by(slug, test, failureID) %>%
    summarise(
        reproRate.FlakeRake = max(count.FlakeRake / tried.FlakeRake),
        reproRate.baseline = max(count.Rerun / 10000),
        reproRate.IsolatedRerun = max(count.IsolatedRerun) / 10000,
        reproRate.FlakeFlaggerRepl = max(count.flakeFlaggerRepl) / 10000
    ) %>%
    mutate_if(is.numeric, ~ replace_na(., 0) %>%
        replace(., is.infinite(.), 0)) %>%
    ungroup() %>%
    mutate(reproRate.MaxAlternative = ifelse(reproRate.IsolatedRerun > reproRate.FlakeFlaggerRepl, reproRate.IsolatedRerun, reproRate.FlakeFlaggerRepl))

[1m[1mRows: [1m[22m[34m[34m1087[34m[39m [1m[1mColumns: [1m[22m[34m[34m13[34m[39m

[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m (7): url, slug, sha, test, sleepLines, traceHash, exemplarFirstLine
[32mdbl[39m (6): count.FlakeRake, tried.FlakeRake, reproAvgTime, count.Rerun, count....


[36mℹ[39m Use [30m[47m[30m[47m`spec()`[47m[30m[49m[39m to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set [30m[47m[30m[47m`show_col_types = FALSE`[47m[30m[49m[39m to quiet this message.

“One or more parsing issues, see `problems()` for details”
`summarise()` has grouped output by 'slug', 'test'. You can override using the `.groups` argument.

`mutate_if()` ignored the following grouping variables:
Columns `slug`, `test`



## Generates Table 2: Reproduction rate for each failure

In [3]:
breaks <- c(0,0.1,0.25,0.5,0.75,0.99,1)
bucketedRepro <- reproByFailure %>%
        mutate( #Bucket the repro rates
            reproRate.FlakeRake    =cut_format(reproRate.FlakeRake,     breaks=breaks, format_fun = function(x) paste0(x * 100, "%")),
            reproRate.MaxAlternative=cut_format(reproRate.MaxAlternative, breaks=breaks,  format_fun = function(x) paste0(x * 100, "%"))
            ) %>%
        mutate_if(is.factor, as.character) %>% 
        mutate_at(vars(contains("reproRate")), ~replace(., is.na(.), 0)) %>%
        group_by(reproRate.FlakeRake,reproRate.MaxAlternative) %>% summarise(count=n()) %>%
        bind_rows(group_by(.,reproRate.FlakeRake) %>%
                  summarise(count=sum(count)) %>%
                  mutate(reproRate.MaxAlternative='Total')) %>%
          bind_rows(group_by(.,reproRate.MaxAlternative) %>%
                  summarise(count=sum(count)) %>%
                  mutate(reproRate.FlakeRake='Total'))
bucketedRepro %>% 
pivot_wider(names_from=reproRate.MaxAlternative,values_from=count) %>% relocate(`0`, .before=`(0%, 10%]`) %>%
relocate(`(10%, 25%]`, .before=`(25%, 50%]`) %>%
mutate_if(is.numeric, ~replace(., is.na(.), 0)) %>%
arrange(reproRate.FlakeRake) %>% kable("latex", 
        booktabs=TRUE,escape=TRUE, linesep="", digits=0, format.args = list(big.mark = ",", scientific = FALSE))

`summarise()` has grouped output by 'reproRate.FlakeRake'. You can override using the `.groups` argument.

`mutate_if()` ignored the following grouping variables:
Column `reproRate.FlakeRake`




\begin{tabular}{lrrrrr}
\toprule
reproRate.FlakeRake & 0 & (0\%, 10\%] & (10\%, 25\%] & (25\%, 50\%] & Total\\
\midrule
(0\%, 10\%] & 7 & 13 & 0 & 0 & 20\\
(10\%, 25\%] & 1 & 2 & 0 & 0 & 3\\
(25\%, 50\%] & 2 & 1 & 0 & 0 & 3\\
(50\%, 75\%] & 3 & 5 & 0 & 0 & 8\\
(75\%, 99\%] & 1 & 5 & 0 & 0 & 6\\
(99\%, 100\%] & 35 & 52 & 4 & 2 & 93\\
0 & 25 & 61 & 23 & 5 & 114\\
Total & 74 & 139 & 27 & 7 & 247\\
\bottomrule
\end{tabular}