Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ object ViewElement {
formatIn = config.formatIn,
rawInclude = None,
rawExclude = None,
fuzzyInclude = None,
fuzzyExclude = None,
excludeFields = None,
fieldNames = None,
showEmptyFields = None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,10 @@ class ConfigYamlLoaderImpl extends ConfigYamlLoader {
parseOptionalListString(feedFields, "rawInclude")
val rawExcludeValidated =
parseOptionalListString(feedFields, "rawExclude")
val fuzzyIncludeValidated =
parseOptionalListString(feedFields, "fuzzyInclude")
val fuzzyExcludeValidated =
parseOptionalListString(feedFields, "fuzzyExclude")
val excludeFieldsValidated =
parseOptionalListString(
feedFields,
Expand All @@ -218,6 +222,8 @@ class ConfigYamlLoaderImpl extends ConfigYamlLoader {
fieldNamesValidated,
rawIncludeValidated,
rawExcludeValidated,
fuzzyIncludeValidated,
fuzzyExcludeValidated,
excludeFieldsValidated,
showEmptyFieldsValidated
)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
package ru.d10xa.jsonlogviewer

import ru.d10xa.jsonlogviewer.config.ResolvedConfig

/** Fuzzy filter that searches for patterns across all fields in a parsed log
* entry using token-based matching.
*
* Unlike rawFilter (regex on raw strings) or SQL filters (exact field
* matching), fuzzy filter:
* - Works after JSON parsing
* - Searches across all fields (level, message, stackTrace, etc.)
* - Uses tokenization to ignore punctuation
* - Is case-insensitive
* - Supports partial token matching
*
* Example: {{{ fuzzyInclude: ["error timeout"] // Will match: {"level":
* "ERROR", "message": "Connection timeout"} {"message": "Error: request
* timeout occurred"} {"error_code": "500", "details": "timeout"} }}}
*
* @param config
* Resolved configuration containing fuzzyInclude and fuzzyExclude patterns
*/
class FuzzyFilter(config: ResolvedConfig) {

/** Collects all values from the parsed log entry into a single searchable
* string.
*
* Includes standard fields (level, message, etc.) and all custom attributes
* from otherAttributes.
*
* @param parseResult
* Parsed log entry
* @return
* Space-separated concatenation of all field values
*/
private def collectAllValues(parseResult: ParseResult): String =
parseResult.parsed match {
case None => parseResult.raw // Fallback to raw string if parsing failed
case Some(parsed) =>
val standardFields = List(
parsed.timestamp,
parsed.level,
parsed.message,
parsed.stackTrace,
parsed.loggerName,
parsed.threadName
).flatten

val otherValues = parsed.otherAttributes.values

(standardFields ++ otherValues).mkString(" ")
}

/** Token-based fuzzy matching: checks if all tokens from the pattern exist in
* the text.
*
* Uses partial matching: pattern token "timeout" will match text tokens
* "timeout", "timeouts", "timeout_ms", etc.
*
* @param text
* Text to search in (typically all log field values concatenated)
* @param pattern
* Search pattern (e.g., "error timeout")
* @return
* true if all pattern tokens are found in text tokens
*/
private def tokenBasedMatch(text: String, pattern: String): Boolean = {
val textTokens = FuzzyTokenizer.tokenize(text)
val patternTokens = FuzzyTokenizer.tokenize(pattern)

// All pattern tokens must be present in text tokens (with partial matching)
patternTokens.forall { patternToken =>
textTokens.exists(textToken => textToken.contains(patternToken))
}
}

/** Tests whether the parsed log entry matches fuzzyInclude and fuzzyExclude
* patterns.
*
* Logic:
* - fuzzyInclude: At least one pattern must match (OR logic)
* - fuzzyExclude: No pattern should match (AND NOT logic)
* - If fuzzyInclude is empty or None, all entries pass
*
* @param parseResult
* Parsed log entry to test
* @return
* true if entry should be included in output
*/
def test(parseResult: ParseResult): Boolean = {
val allValues = collectAllValues(parseResult)

val includeMatches = config.fuzzyInclude match {
case None => true
case Some(patterns) if patterns.isEmpty => true
case Some(patterns) =>
patterns.exists(pattern => tokenBasedMatch(allValues, pattern))
}

val excludeMatches = config.fuzzyExclude match {
case None => true
case Some(patterns) =>
patterns.forall(pattern => !tokenBasedMatch(allValues, pattern))
}

includeMatches && excludeMatches
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
package ru.d10xa.jsonlogviewer

/** Tokenizer for fuzzy search that handles punctuation, quotes, and special
* characters in log messages.
*
* Rules:
* - Splits text into words while preserving meaningful characters
* - Keeps dots and underscores inside words (e.g., john.doe, user_id)
* - Removes standalone punctuation
* - Converts to lowercase for case-insensitive matching
* - Filters tokens shorter than 2 characters
*
* Examples: {{{ tokenize("User 'john.doe' timeout") → Set("user", "john.doe",
* "timeout") tokenize("ERROR: database.query() failed") → Set("error",
* "database.query", "failed") tokenize("card_number=1234") →
* Set("card_number", "1234") }}}
*/
object FuzzyTokenizer {

/** Tokenizes text into a set of searchable words.
*
* @param text
* Text to tokenize
* @return
* Set of normalized tokens (lowercase, minimum 2 characters)
*/
def tokenize(text: String): Set[String] = {
// Pattern matches alphanumeric characters, dots, and underscores
// This preserves: user_id, john.doe, 192.168.1.1, etc.
val tokenPattern = """[\w._]+""".r

tokenPattern
.findAllIn(text.toLowerCase)
.toSet
.filter(_.length >= 2)
.filterNot(isOnlyPunctuation)
}

/** Checks if a token consists only of non-alphanumeric characters.
*
* @param token
* Token to check
* @return
* true if token contains only punctuation
*/
private def isOnlyPunctuation(token: String): Boolean =
token.forall(c => !c.isLetterOrDigit)
}
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ object LogViewerStream {
val timestampFilter = TimestampFilter()
val parseResultKeys = ParseResultKeys(resolvedConfig)
val logLineFilter = LogLineFilter(resolvedConfig, parseResultKeys)
val fuzzyFilter = new FuzzyFilter(resolvedConfig)

val outputLineFormatter = resolvedConfig.formatOut match {
case Some(Config.FormatOut.Raw) => RawFormatter()
Expand All @@ -142,6 +143,7 @@ object LogViewerStream {
.map(parser.parse)
.filter(logLineFilter.grep)
.filter(logLineFilter.logLineQueryPredicate)
.filter(fuzzyFilter.test)
.through(
timestampFilter.filterTimestampAfter(resolvedConfig.timestampAfter)
)
Expand All @@ -165,6 +167,7 @@ object LogViewerStream {
val timestampFilter = TimestampFilter()
val parseResultKeys = ParseResultKeys(resolvedConfig)
val logLineFilter = LogLineFilter(resolvedConfig, parseResultKeys)
val fuzzyFilter = new FuzzyFilter(resolvedConfig)

val outputLineFormatter = resolvedConfig.formatOut match {
case Some(Config.FormatOut.Raw) => RawFormatter()
Expand All @@ -183,6 +186,7 @@ object LogViewerStream {
.map(csvHeaderParser.parse)
.filter(logLineFilter.grep)
.filter(logLineFilter.logLineQueryPredicate)
.filter(fuzzyFilter.test)
.through(
timestampFilter.filterTimestampAfter(resolvedConfig.timestampAfter)
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ final case class ResolvedConfig(
// Feed-specific settings
rawInclude: Option[List[String]],
rawExclude: Option[List[String]],
fuzzyInclude: Option[List[String]],
fuzzyExclude: Option[List[String]],
excludeFields: Option[List[String]],

// Timestamp settings
Expand Down Expand Up @@ -89,6 +91,8 @@ object ConfigResolver {
fieldNames = feedFieldNames,
rawInclude = feed.rawInclude,
rawExclude = feed.rawExclude,
fuzzyInclude = feed.fuzzyInclude,
fuzzyExclude = feed.fuzzyExclude,
excludeFields = feed.excludeFields,
timestampAfter = config.timestamp.after,
timestampBefore = config.timestamp.before,
Expand All @@ -109,6 +113,8 @@ object ConfigResolver {
fieldNames = globalFieldNames,
rawInclude = None,
rawExclude = None,
fuzzyInclude = None,
fuzzyExclude = None,
excludeFields = None,
timestampAfter = config.timestamp.after,
timestampBefore = config.timestamp.before,
Expand All @@ -130,6 +136,8 @@ object ConfigResolver {
fieldNames = config.fieldNames,
rawInclude = None,
rawExclude = None,
fuzzyInclude = None,
fuzzyExclude = None,
excludeFields = None,
timestampAfter = config.timestamp.after,
timestampBefore = config.timestamp.before,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ case class Feed(
fieldNames: Option[FieldNames],
rawInclude: Option[List[String]],
rawExclude: Option[List[String]],
fuzzyInclude: Option[List[String]],
fuzzyExclude: Option[List[String]],
excludeFields: Option[List[String]],
showEmptyFields: Option[Boolean]
)
Loading