Skip to content

Commit

Permalink
moved average benchmark results to p90 results (#2152)
Browse files Browse the repository at this point in the history
* moved average benchmark results to p90 results
* increased the measured iterations from 15 to 20 and reduced warmups from 3 to 2 by default
* removed testOrchestrator option from saucelabs config and gradle
* profiling benchmark now prints raw values to the console, to later read them from the log file, but we just assert on the cpu overhead
* Benchmarks in SauceLabs will now run on 2 devices with Andorid 12, 3 with Android 11, 2 with Android 10
* added collecting the refresh rate of the device
* sdk init duration increase threshold increased to 250 milliseconds
* cpu overhead range for the same operation increased to -2%..2%
* added a test to send profiles to a Sentry project (dogfooding test)
  • Loading branch information
stefanosiano committed Jul 13, 2022
1 parent 3ad96de commit f160e0d
Show file tree
Hide file tree
Showing 18 changed files with 525 additions and 194 deletions.
31 changes: 15 additions & 16 deletions .sauce/sentry-uitest-android-benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,27 +21,25 @@ suites:
# Devices are chosen so that there is a high-end and a low-end device for each api level
- name: "Android 12 (api 31)"
devices:
- id: Google_Pixel_6_Pro_real_us # Google Pixel 6 Pro - api 31 (12)
- id: Google_Pixel_3_12_real_us # Google Pixel 3 - api 31 (12)
testOptions:
useTestOrchestrator: true
- id: Google_Pixel_6_Pro_real_us # Google Pixel 6 Pro - api 31 (12) - high end
- id: Google_Pixel_3_12_real_us # Google Pixel 3 - api 31 (12) - low end

- name: "Android 11 (api 30)"
devices:
- id: OnePlus_9_Pro_real_us # OnePlus 9 Pro - api 30 (11)
- id: Google_Pixel_2_real_us # Google Pixel 2 - api 30 (11)
testOptions:
useTestOrchestrator: true
- id: OnePlus_9_Pro_real_us # OnePlus 9 Pro - api 30 (11) - high end
- id: Google_Pixel_4_real_us # Google Pixel 4 - api 30 (11) - mid end
- id: Google_Pixel_2_real_us # Google Pixel 2 - api 30 (11) - low end

# Commenting for the moment, due to the error "Cannot install test-services-1.4.1.apk on device" on low Android versions
# - name: "Android 5 (api 22)"
# devices:
# - id: Amazon_Kindle_Fire_HD_8_real_us # Amazon Kindle Fire HD 8 - api 22 (5.1.1)
# testOptions:
# useTestOrchestrator: true
- name: "Android 10 (api 29)"
devices:
- id: Google_Pixel_4_XL_real_us1 # Google Pixel 4 XL - api 29 (10)
- id: Nokia_7_1_real_us # Nokia 7.1 - api 29 (10)

# - id: Google_Pixel_4_XL_real_us1 # Google Pixel 4 XL - api 29 (10)
# - id: Motorola_Moto_G_Power_real_us # Motorola Moto G Power (2021) - api 29 (10)
# At the time of writing (July, 4, 2022), the market share per android version is:
# 12.0 = 17.54%, 11.0 = 31.65%, 10.0 = 21.92%
# Using these 3 versions we cover 71,11% of all devices out there. Currently, this is enough for benchmarking scope
# Leaving these devices here in case we change mind on them
# devices:
# - id: Samsung_Galaxy_S8_plus_real_us # Samsung Galaxy S8+ - api 28 (9)
# - id: LG_G8_ThinQ_real_us # LG G8 ThinQ - api 28 (9)
# - id: OnePlus_5_real_us # OnePlus 5 - api 27 (8.1.0)
Expand All @@ -54,6 +52,7 @@ suites:
# - id: LG_K10_real # LG K10 - api 24 (7.0)
# - id: Samsung_Galaxy_S6_Edge_Plus_real # Samsung Galaxy S6 Edge+ - api 23 (6.0.1)
# - id: Samsung_Tab_E_real_us # Samsung Tab E - api 23 (6.0.1)
# - id: Amazon_Kindle_Fire_HD_8_real_us # Amazon Kindle Fire HD 8 - api 22 (5.1.1)

artifacts:
download:
Expand Down
2 changes: 0 additions & 2 deletions .sauce/sentry-uitest-android-ui.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@ suites:
- name: "Android 12 (api 31)"
devices:
- id: Samsung_Galaxy_S22_Ultra_5G_real_us # Samsung Galaxy S22 Ultra 5G - api 31 (12)
testOptions:
useTestOrchestrator: true

# Controls what artifacts to fetch when the suite on Sauce Cloud has finished.
artifacts:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ android {
// https://developer.android.com/training/testing/instrumented-tests/androidx-test-libraries/runner#enable-gradle
// This doesn't work on some devices with Android 11+. Clearing package data resets permissions.
// Check the readme for more info.
// Test orchestrator was removed due to issues with SauceLabs
// testInstrumentationRunnerArguments["clearPackageData"] = "true"
}

Expand All @@ -34,10 +35,6 @@ android {
viewBinding = true
}

testOptions {
execution = "ANDROIDX_TEST_ORCHESTRATOR"
}

signingConfigs {
getByName("debug") {
storeFile = rootProject.file("debug.keystore")
Expand All @@ -51,7 +48,6 @@ android {

buildTypes {
getByName("debug") {
isDebuggable = false
isMinifyEnabled = true
signingConfig = signingConfigs.getByName("debug")
proguardFiles(getDefaultProguardFile("proguard-android-optimize.txt"), "benchmark-proguard-rules.pro")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ package io.sentry.uitest.android.benchmark

import android.content.Context
import android.view.Choreographer
import androidx.lifecycle.Lifecycle
import androidx.test.core.app.ApplicationProvider
import androidx.test.core.app.launchActivity
import androidx.test.platform.app.InstrumentationRegistry
import androidx.test.runner.AndroidJUnitRunner
import kotlin.test.BeforeTest
Expand All @@ -22,5 +24,8 @@ abstract class BaseBenchmarkTest {
runner.runOnMainSync {
choreographer = Choreographer.getInstance()
}
// We need the refresh rate, but we can get it only from the activity, so we start and destroy one
val benchmarkScenario = launchActivity<BenchmarkActivity>()
benchmarkScenario.moveToState(Lifecycle.State.DESTROYED)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,19 @@ class SdkBenchmarkTest : BaseBenchmarkTest() {
it.tracesSampleRate = 1.0
}
}
val simpleSdkResult = BenchmarkOperation.compare(opNoSdk, "No Sdk", opSimpleSdk, "Simple Sdk")
val perfProfilingSdkResult = BenchmarkOperation.compare(opNoSdk2, "No Sdk", opPerfProfilingSdk, "Sdk with perf and profiling")
val refreshRate = BenchmarkActivity.refreshRate ?: 60F
val simpleSdkResults = BenchmarkOperation.compare(opNoSdk, "No Sdk", opSimpleSdk, "Simple Sdk", refreshRate)
val simpleSdkResult = simpleSdkResults.getSummaryResult()
simpleSdkResult.printResults()
val perfProfilingSdkResults = BenchmarkOperation.compare(opNoSdk2, "No Sdk", opPerfProfilingSdk, "Sdk with perf and profiling", refreshRate)
val perfProfilingSdkResult = perfProfilingSdkResults.getSummaryResult()
perfProfilingSdkResult.printResults()

val maxDurationThreshold = TimeUnit.MILLISECONDS.toNanos(100)
val maxDurationThreshold = TimeUnit.MILLISECONDS.toNanos(250)
assertTrue(simpleSdkResult.durationIncreaseNanos in 0..maxDurationThreshold)
assertTrue(simpleSdkResult.cpuTimeIncreaseMillis in 0..100)
assertTrue(perfProfilingSdkResult.durationIncreaseNanos in simpleSdkResult.durationIncreaseNanos..maxDurationThreshold)
assertTrue(perfProfilingSdkResult.cpuTimeIncreaseMillis in simpleSdkResult.cpuTimeIncreaseMillis..100)
assertTrue(perfProfilingSdkResult.durationIncreaseNanos in 0..maxDurationThreshold)
assertTrue(perfProfilingSdkResult.cpuTimeIncreaseMillis in 0..100)
}

private fun getOperation(init: (() -> Unit)? = null) = BenchmarkOperation(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,15 @@ class SentryBenchmarkTest : BaseBenchmarkTest() {
// should be very similar.
val op1 = BenchmarkOperation(choreographer, op = getOperation(runner))
val op2 = BenchmarkOperation(choreographer, op = getOperation(runner))
val comparisonResult = BenchmarkOperation.compare(op1, "Op1", op2, "Op2")
val refreshRate = BenchmarkActivity.refreshRate ?: 60F
val comparisonResults = BenchmarkOperation.compare(op1, "Op1", op2, "Op2", refreshRate)
val comparisonResult = comparisonResults.getSummaryResult()
comparisonResult.printResults()

assertTrue(comparisonResult.durationIncreasePercentage in -1F..1F)
assertTrue(comparisonResult.cpuTimeIncreasePercentage in -1F..1F)
// Currently we just want to assert the cpu overhead
assertTrue(comparisonResult.cpuTimeIncreasePercentage in -2F..2F)
// The fps decrease comparison is skipped, due to approximation: 59.51 and 59.49 fps are considered 60 and 59,
// respectively. Also, if the average fps is 20 or 60, a difference of 1 fps becomes 5% or 1.66% respectively.
assertTrue(comparisonResult.droppedFramesIncreasePercentage in -1F..1F)
}

@Test
Expand Down Expand Up @@ -76,17 +78,20 @@ class SentryBenchmarkTest : BaseBenchmarkTest() {
}
}
)
val comparisonResult = BenchmarkOperation.compare(
val refreshRate = BenchmarkActivity.refreshRate ?: 60F
val comparisonResults = BenchmarkOperation.compare(
benchmarkOperationNoTransaction,
"NoTransaction",
benchmarkOperationProfiled,
"ProfiledTransaction"
"ProfiledTransaction",
refreshRate
)
comparisonResults.printAllRuns("Profiling Benchmark")
val comparisonResult = comparisonResults.getSummaryResult()
comparisonResult.printResults()

assertTrue(comparisonResult.durationIncreasePercentage in 0F..5F)
// Currently we just want to assert the cpu overhead
assertTrue(comparisonResult.cpuTimeIncreasePercentage in 0F..5F)
assertTrue(comparisonResult.fpsDecreasePercentage in 0F..5F)
assertTrue(comparisonResult.droppedFramesIncreasePercentage in 0F..5F)
}

/**
Expand All @@ -103,12 +108,10 @@ class SentryBenchmarkTest : BaseBenchmarkTest() {
}
// Just swipe the list some times: this is the benchmarked operation
swipeList(2)
// We finish the transaction
// We finish the transaction. We do it on main thread, so there's no need to perform other operations after it
runner.runOnMainSync {
transaction?.finish()
}
// We swipe a last time to measure how finishing the transaction may affect other operations
swipeList(1)

benchmarkScenario.moveToState(Lifecycle.State.DESTROYED)
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
package io.sentry.uitest.android.benchmark.util

import java.util.concurrent.TimeUnit

/** Result of the [BenchmarkOperation] comparison. */
internal data class BenchmarkComparisonResult(
/** Number of measured iterations. */
val iterations: Int,
/** Screen refresh rate. */
val refreshRate: Float,
/** Screen refresh rate. */
val cores: Int,
/** Name of the first compared operation. */
val op1Name: String,
/** Name of the second compared operation. */
val op2Name: String,
/** Raw cpu time in milliseconds of op1. */
val op1CpuTime: List<Long>,
/** Raw cpu time in milliseconds of op2. */
val op2CpuTime: List<Long>,
/** Increase of cpu time in milliseconds. */
val cpuTimeIncreases: List<Long>,
/** Increase of cpu time in percentage. */
val cpuTimeIncreasePercentages: List<Double>,
/** Raw dropped frames of op1. */
val op1DroppedFrames: List<Double>,
/** Raw dropped frames of op2. */
val op2DroppedFrames: List<Double>,
/** Increase of dropped frames. */
val droppedFramesIncreases: List<Double>,
/** Increase of dropped frames in percentage. */
val droppedFramesIncreasePercentages: List<Double>,
/** Raw duration in nanoseconds of op1. */
val op1Duration: List<Long>,
/** Raw duration in nanoseconds of op2. */
val op2Duration: List<Long>,
/** Increase of duration in nanoseconds. If it's low enough, no end user will ever realize it. */
val durationIncreaseNanos: List<Long>,
/** Increase of duration in percentage. */
val durationIncreasePercentage: List<Double>,
/** Raw fps of op1. */
val op1Fps: List<Int>,
/** Raw fps of op2. */
val op2Fps: List<Int>,
/** Decrease of fps. */
val fpsDecreases: List<Int>,
/** Decrease of fps in percentage. */
val fpsDecreasePercentages: List<Double>
) {

/**
* Prints the raw results of all runs of the comparison.
* Each printed line is prefixed by [prefix], to allow parsers to easily parse log files to read raw values.
*/
fun printAllRuns(prefix: String) {
repeat(iterations) { index ->

println("$prefix ==================== Iteration $index ====================")

println("$prefix [$op2Name]: duration=${op2Duration[index]} ns, cpuTime=${op2CpuTime[index]}, fps=${op2Fps[index]}, droppedFrames=${op2DroppedFrames[index]}")
println("$prefix [$op1Name]: duration=${op1Duration[index]} ns, cpuTime=${op1CpuTime[index]}, fps=${op1Fps[index]}, droppedFrames=${op1DroppedFrames[index]}")
println(
"$prefix Duration increase: %.2f%% (%d ns = %d ms)".format(
durationIncreasePercentage[index],
durationIncreaseNanos[index],
TimeUnit.NANOSECONDS.toMillis(durationIncreaseNanos[index])
)
)

println(
"$prefix CPU time overhead, over $cores cores: %.2f%% (%d ms)".format(
cpuTimeIncreasePercentages[index],
cpuTimeIncreases[index]
)
)

println("$prefix FPS decrease: %.2f%% (%d fps)".format(fpsDecreasePercentages[index], fpsDecreases[index]))

val expectedFrames = TimeUnit.NANOSECONDS.toMillis(op2Duration[index]) * refreshRate / 1000
println(
"$prefix Frame drop increase, over $expectedFrames total frames, with $refreshRate hz: %.2f%% (%.2f)".format(
droppedFramesIncreasePercentages[index],
droppedFramesIncreases[index]
)
)
}
}

fun getSummaryResult() = BenchmarkSummaryResult(
calculatePercentile(cpuTimeIncreases, 90),
calculatePercentile(cpuTimeIncreasePercentages, 90),
calculatePercentile(droppedFramesIncreases, 90),
calculatePercentile(droppedFramesIncreasePercentages, 90),
calculatePercentile(durationIncreaseNanos, 90),
calculatePercentile(durationIncreasePercentage, 90),
calculatePercentile(fpsDecreases, 90),
calculatePercentile(fpsDecreasePercentages, 90)
)

/** Calculate the [percentile] of the [list]. [percentile] should be in the range 0, 100. */
private fun <T : Number> calculatePercentile(list: List<T>, percentile: Int): T {
if (list.isEmpty()) {
return 0 as T
}
val sortedList = list.sortedBy { it.toDouble() }
val percentileIndex = (list.size * percentile / 100 - 1).coerceIn(0, list.size)
return sortedList[percentileIndex]
}
}

/** Result of the [BenchmarkOperation] comparison. */
internal data class BenchmarkSummaryResult(
/**
* Increase of cpu time in milliseconds.
* It has no direct impact on performance of the app, but it has on battery usage, as the cpu is 'awaken' longer.
*/
val cpuTimeIncreaseMillis: Long,
/** Increase of cpu time in percentage. */
val cpuTimeIncreasePercentage: Double,
/**
* Increase of dropped frames.Very important, as it weights dropped frames based on the time
* passed between each frame. This is the metric end users can perceive as 'performance' in app usage.
*/
val droppedFramesIncrease: Double,
/** Increase of dropped frames in percentage. */
val droppedFramesIncreasePercentage: Double,
/** Increase of duration in nanoseconds. If it's low enough, no end user will ever realize it. */
val durationIncreaseNanos: Long,
/** Increase of duration in percentage. */
val durationIncreasePercentage: Double,
/**
* Decrease of fps. Not really important, as even if fps are the same, the cpu could be
* doing more work in the frame window, and it could be hidden by checking average fps only.
*/
val fpsDecrease: Int,
/** Decrease of fps in percentage. */
val fpsDecreasePercentage: Double
) {

/** Prints the summary results of the comparison. */
fun printResults() {
println(
"Duration increase: %.2f%% (%d ns = %d ms)".format(
durationIncreasePercentage,
durationIncreaseNanos,
TimeUnit.NANOSECONDS.toMillis(durationIncreaseNanos)
)
)
println("CPU time overhead: %.2f%% (%d ms)".format(cpuTimeIncreasePercentage, cpuTimeIncreaseMillis))
println("FPS decrease: %.2f%% (%d fps)".format(fpsDecreasePercentage, fpsDecrease))
println("Frame drop increase: %.2f%% (%.2f)".format(droppedFramesIncreasePercentage, droppedFramesIncrease))
}
}

0 comments on commit f160e0d

Please sign in to comment.