Skip to content

Commit

Permalink
nix | cardano-node: tracing, new vs. old
Browse files Browse the repository at this point in the history
  • Loading branch information
deepfire committed May 13, 2022
1 parent 8d273ee commit 254f8dd
Show file tree
Hide file tree
Showing 8 changed files with 144 additions and 73 deletions.
7 changes: 4 additions & 3 deletions Makefile
Expand Up @@ -58,10 +58,11 @@ fixed: PROFILE = fixed-${ERA}
fixed: ARGS += --arg 'autoStartCluster' true
forge-stress: PROFILE = forge-stress-${ERA}
forge-stress-plutus: PROFILE = forge-stress-plutus-${ERA}
forge-stress-newtracing: PROFILE = forge-stress-newtracing-${ERA}
forge-stress-oldtracing: PROFILE = forge-stress-oldtracing-${ERA}
quick: PROFILE = quick-${ERA}
forge-stress forge-stress-plutus forge-stress-newtracing quick: ARGS += --arg 'workbenchDevMode' true
shell-dev cluster-shell-dev cluster-shell-trace cluster-shell-dev-trace fixed forge-stress forge-stress-plutus forge-stress-newtracing quick: shell
quick-oldtracing: PROFILE = quick-oldtracing-${ERA}
forge-stress forge-stress-plutus forge-stress-oldtracing quick quick-oldtracing: ARGS += --arg 'workbenchDevMode' true
shell-dev cluster-shell-dev cluster-shell-trace cluster-shell-dev-trace fixed forge-stress forge-stress-plutus forge-stress-oldtracing quick quick-oldtracing: shell

test-smoke: smoke ## Build the 'workbench-smoke-test', same as the Hydra job
smoke:
Expand Down
63 changes: 43 additions & 20 deletions nix/nixos/cardano-node-service.nix
Expand Up @@ -9,27 +9,42 @@ let
envConfig = cfg.environments.${cfg.environment};
runtimeDir = if cfg.runtimeDir == null then cfg.stateDir else "/run/${cfg.runtimeDir}";
mkScript = cfg:
let baseConfig = recursiveUpdate (cfg.nodeConfig
// (mapAttrs' (era: epoch:
nameValuePair "Test${era}HardForkAtEpoch" epoch
) cfg.forceHardForks)
// (optionalAttrs cfg.useNewTopology {
EnableP2P = true;
TargetNumberOfRootPeers = cfg.targetNumberOfRootPeers;
TargetNumberOfKnownPeers = cfg.targetNumberOfKnownPeers;
TargetNumberOfEstablishedPeers = cfg.targetNumberOfEstablishedPeers;
TargetNumberOfActivePeers = cfg.targetNumberOfActivePeers;
TestEnableDevelopmentNetworkProtocols = true;
MaxConcurrencyBulkSync = 2;
})) cfg.extraNodeConfig;
let baseConfig =
recursiveUpdate
(cfg.nodeConfig
// (mapAttrs' (era: epoch:
nameValuePair "Test${era}HardForkAtEpoch" epoch
) cfg.forceHardForks)
// (optionalAttrs cfg.useNewTopology {
EnableP2P = true;
TargetNumberOfRootPeers = cfg.targetNumberOfRootPeers;
TargetNumberOfKnownPeers = cfg.targetNumberOfKnownPeers;
TargetNumberOfEstablishedPeers = cfg.targetNumberOfEstablishedPeers;
TargetNumberOfActivePeers = cfg.targetNumberOfActivePeers;
TestEnableDevelopmentNetworkProtocols = true;
MaxConcurrencyBulkSync = 2;
})) cfg.extraNodeConfig;
baseInstanceConfig =
i:
if !cfg.useLegacyTracing
then baseConfig //
{ ## XXX: remove once legacy tracing is dropped
minSeverity = "Critical";
setupScribes = [];
setupBackends = [];
defaultScribes = [];
defaultBackends = [];
options = {};
}
else baseConfig //
(optionalAttrs (baseConfig ? hasEKG) {
hasEKG = baseConfig.hasEKG + i;
}) //
(optionalAttrs (baseConfig ? hasPrometheus) {
hasPrometheus = map (n: if isInt n then n + i else n) baseConfig.hasPrometheus;
});
in i: let
instanceConfig = recursiveUpdate (baseConfig
// (optionalAttrs (baseConfig ? hasEKG) {
hasEKG = baseConfig.hasEKG + i;
})
// (optionalAttrs (baseConfig ? hasPrometheus) {
hasPrometheus = map (n: if isInt n then n + i else n) baseConfig.hasPrometheus;
})) (cfg.extraNodeInstanceConfig i);
instanceConfig = recursiveUpdate (baseInstanceConfig i) (cfg.extraNodeInstanceConfig i);
nodeConfigFile = if (cfg.nodeConfigFile != null) then cfg.nodeConfigFile
else toFile "config-${toString cfg.nodeId}-${toString i}.json" (toJSON instanceConfig);
newTopology = {
Expand Down Expand Up @@ -425,6 +440,14 @@ in {
'';
};

useLegacyTracing = mkOption {
type = types.bool;
default = true;
description = ''
Use the legacy tracing, based on iohk-monitoring-framework.
'';
};

usePeersFromLedgerAfterSlot = mkOption {
type = types.nullOr types.int;
default = if cfg.kesKey != null then null
Expand Down
18 changes: 18 additions & 0 deletions nix/workbench/analyse.sh
Expand Up @@ -206,3 +206,21 @@ analysis_set_filters() {

filters+=(${filter_files[*]/#/--filter })
}

analysis_classify_traces() {
local name=${1:-current}; if test $# != 0; then shift; fi
local dir=$(run get "$name")

progress "analysis" "enumerating namespace from logs of node-0.."
local types=($(grep -h '^{' $dir/node-0/stdout* | jq --raw-output '.ns' 2>/dev/null | tr -d ']["' | sort -u))

progress_ne "analysis" "collecting occurence stats: "
for node in $dir/node-*/
do for type in ${types[*]}
do echo $(grep -hF "\"$type\"" $node/stdout* | wc -l) $type
done |
sort -nr > $node/log-namespace-occurence-stats.txt
echo -n ' '$(basename $node) >&2
done
echo >&2
}
48 changes: 31 additions & 17 deletions nix/workbench/profiles/node-services.nix
Expand Up @@ -50,42 +50,56 @@ let
UseTraceDispatcher = true;

TraceOptionSeverity = [
{ ns = ""; severity = "Debug"; }
{ ns = "Node.Resources"; severity = "Debug"; }
{ ns = "Node.ChainDB"; severity = "Debug"; }
# { ns = "Node.Mempool"; severity = "Debug"; }
# { ns = "Node.ChainDB.ImmutableDBEvent"; severity = "Warning"; }
{ ns = ""; severity = "Debug"; }
{ ns = "Node.ChainDB"; severity = "Info"; }
{ ns = "Node.AcceptPolicy"; severity = "Info"; }
{ ns = "Node.DNSResolver"; severity = "Info"; }
{ ns = "Node.DNSSubscription"; severity = "Info"; }
{ ns = "Node.DiffusionInit"; severity = "Info"; }
{ ns = "Node.ErrorPolicy"; severity = "Info"; }
{ ns = "Node.Forge"; severity = "Info"; }
{ ns = "Node.IpSubscription"; severity = "Info"; }
{ ns = "Node.LocalErrorPolicy"; severity = "Info"; }
{ ns = "Node.Mempool"; severity = "Info"; }
{ ns = "Node.Resources"; severity = "Info"; }

{ ns = "Node.Mux"; severity = "Silence"; }
{ ns = "Node.MuxLocal"; severity = "Silence"; }
{ ns = "Node.TxInbound"; severity = "Silence"; }
{ ns = "Node.TxSubmission2"; severity = "Silence"; }
{ ns = "Node.BlockFetchSerialised"; severity = "Silence"; }
{ ns = "Node.ChainSyncSerialised"; severity = "Silence"; }
];

TraceOptionDetail = [
{ ns = ""; detail = "DNormal"; }
{ ns = "Node.BlockFetchClient"; detail = "DMinimal"; }
{ ns = ""; detail = "DNormal"; }
{ ns = "Node.BlockFetchClient"; detail = "DMinimal"; }
{ ns = "Node.TxSubmission2";
detail = "DMinimal"; }
];

TraceOptionBackend = [
{ ns = "";
backends = [
"Stdout MachineFormat"
"EKGBackend"
"Forwarder"
# "Forwarder"
];
}
# { ns = "Node.ChainDB";
# backends = ["Stdout HumanFormatColoured"];
# }
];

TraceOptionLimiter = [
{ ns = "Node.ChainDB.OpenEvent";
limiterName = "ChainDB open limiter";
limiterFrequency = 0.1;
}
{ ns = "Node.ChainDB.AddBlockEvent.AddedBlockToQueue"; limiterFrequency = 2.0; limiterName = "AddedBlockToQueue"; }
{ ns = "Node.ChainDB.AddBlockEvent.AddedBlockToVolatileDB"; limiterFrequency = 2.0; limiterName = "AddedBlockToVolatileDB"; }
{ ns = "Node.ChainDB.CopyToImmutableDBEvent.CopiedBlockToImmutableDB"; limiterFrequency = 2.0; limiterName = "CopiedBlockToImmutableDB"; }
{ ns = "Node.ChainDB.AddBlockEvent.AddBlockValidation.ValidCandidate"; limiterFrequency = 2.0; limiterName = "ValidCandidate"; }
{ ns = "Node.BlockFetchClient.CompletedBlockFetch"; limiterFrequency = 2.0; limiterName = "CompletedBlockFetch"; }
];

TraceOptionForwarder = {
mode = "Initiator";
mode = "Responder";
address = {
filePath = "/tmp/forwarder-${toString i}.sock";
filePath = "forwarder.sock";
};
};
};
Expand Down
53 changes: 31 additions & 22 deletions nix/workbench/profiles/variants.jq
Expand Up @@ -2,7 +2,24 @@
## variations of genesis/generator/node axes.

def genesis_profile_variants:

{ scenario: "fixed-loaded"
, composition:
{ n_singular_hosts: 2
, n_dense_hosts: 0
}
, genesis:
{ utxo: 6000
, delegators: 1300
, max_block_size: 80000
, epoch_length: 600
, parameter_k: 3
}
, node:
{ shutdown_on_slot_synced: 10
}
, generator: { tps: 15 }
} as $quick_base
|
{ scenario: "fixed-loaded"
, composition:
{ n_singular_hosts: 2
Expand All @@ -20,8 +37,8 @@ def genesis_profile_variants:
}
, generator: { tps: 15 }
} as $forge_stress_base

| { genesis:
|
{ genesis:
{ alonzo:
{ maxTxExUnits:
{ exUnitsMem: 12500000
Expand Down Expand Up @@ -99,15 +116,15 @@ def genesis_profile_variants:

, $forge_stress_base *
{ name: "forge-stress"
}

, $forge_stress_base *
{ name: "forge-stress-newtracing"
, node:
{ tracing_backend: "trace-dispatcher"
}
}

, $forge_stress_base *
{ name: "forge-stress-oldtracing"
}

, $forge_stress_base *
$plutus_base *
{ name: "forge-stress-plutus"
Expand All @@ -119,23 +136,15 @@ def genesis_profile_variants:
}
}

, { name: "quick"
, scenario: "fixed-loaded"
, composition:
{ n_singular_hosts: 2
, n_dense_hosts: 0
}
, genesis:
{ utxo: 6000
, delegators: 1300
, max_block_size: 80000
, epoch_length: 600
, parameter_k: 3
}
, $quick_base *
{ name: "quick"
, node:
{ shutdown_on_slot_synced: 10
{ tracing_backend: "trace-dispatcher"
}
, generator: { tps: 15 }
}

, $quick_base *
{ name: "quick-oldtracing"
}

## Chainsync:
Expand Down
11 changes: 2 additions & 9 deletions nix/workbench/run.sh
Expand Up @@ -470,7 +470,8 @@ EOF
--idle ) scenario_override='generic-idle';;
--scenario | -s ) scenario_override=$2; shift;;
--no-analysis ) analyse=;;
--analysis-can-fail ) analysis_can_fail=t;;
--analysis-can-fail | -f )
analysis_can_fail=t;;
--* ) msg "FATAL: unknown flag '$1'"; usage_run;;
* ) break;; esac; shift; done

Expand All @@ -488,14 +489,6 @@ EOF
scenario "$scenario" "$dir"

run compat-meta-fixups "$tag"
if test -n "$analyse"
then progress "run | analysis" "processing logs of $(with_color white $tag)"
analyse std $tag ||
if test -n "$analysis_can_fail"
then progress "run | analysis" "log processing failed, but --analysis-can-fail prevents failure: $(with_color red $tag)"
else fail "analysis failed: $tag"
false; fi
fi
;;

stop )
Expand Down
1 change: 1 addition & 0 deletions nix/workbench/scenario.sh
Expand Up @@ -100,4 +100,5 @@ scenario_setup_termination() {

scenario_cleanup_termination() {
kill $__scenario_watcher_pid 2>/dev/null || true
trap - EXIT
}
16 changes: 14 additions & 2 deletions nix/workbench/wb
Expand Up @@ -96,6 +96,7 @@ start()
local verbose=
local manifest="{}"
local iterations=1
local analyse=yes analysis_can_fail=

local run_args=()
local run_allocate_args=()
Expand All @@ -114,8 +115,8 @@ start()

--idle ) run_start_args+=($1);;
--scenario | -s ) run_start_args+=($1 $2); shift;;
--no-analysis ) run_start_args+=($1);;
--analysis-can-fail ) run_start_args+=($1);;
--no-analysis ) analyse=;;
--analysis-can-fail | -f ) analysis_can_fail=t;;

--cabal-mode | --cabal ) cabal_mode=t;;
--supervisor | --backend-supervisor )
Expand Down Expand Up @@ -152,6 +153,17 @@ start()
mkdir -p "$current_run_path"

run ${run_args[@]} 'start' ${run_start_args[@]} $(run current-tag)

if test -z "$analyse"; then continue; fi

progress "top-level | analysis" "processing logs of $(with_color white $tag)"
local tag=$(run current-tag)
analyse std $tag ||
if test -n "$analysis_can_fail"
then progress "run | analysis" "log processing failed, but --analysis-can-fail prevents failure: $(with_color red $tag)"
iterations=$((iterations + 1))
else fail "analysis failed: $tag"
false; fi
done
}

Expand Down

0 comments on commit 254f8dd

Please sign in to comment.