[REQUEST] Treat accented characters as regular characters #1119

YodaEmbedding · 2020-05-07T12:23:38Z

Similar to how -i sets case insensitivity, there should be a flag that allows one to filter through accented characters by typing regular (ASCII?) characters. For instance:

Björk
André Rieu
Renée Fleming

By typing bjork or andre, we should filter to only one of the above.

$ rofi -v
Version: 1.5.4-17-gc3caa20d (makepkg)

The text was updated successfully, but these errors were encountered:

DaveDavenport · 2020-05-07T13:05:47Z

Rofi currently uses GRegex engine for matching and finding the match position.
I am not sure if it supports this. Agree it would be a nice feature to have.

DaveDavenport · 2020-09-23T17:58:58Z

This is not a trivial one, but I just tried out something random:

Highlighting is now disabled, not sure how well this works.:

diff --git a/config/config.c b/config/config.c
index 05723e5a..111ec742 100644
--- a/config/config.c
+++ b/config/config.c
@@ -170,5 +170,8 @@ Settings config = {
     .drun_reload_desktop_cache = FALSE,
 
     /** Benchmarks */
-    .benchmark_ui              = FALSE
+    .benchmark_ui              = FALSE,
+
+    /** normalize match */
+    .normalize_match 	       = FALSE
 };
diff --git a/include/settings.h b/include/settings.h
index cecb9c34..c8e9c810 100644
--- a/include/settings.h
+++ b/include/settings.h
@@ -203,6 +203,9 @@ typedef struct
 
     /** Benchmark */
     gboolean       benchmark_ui;
+
+    /** Normalize match */
+    gboolean 	   normalize_match;
 } Settings;
 /** Global Settings structure. */
 extern Settings config;
diff --git a/source/helper.c b/source/helper.c
index a7bfa3e8..dc874cc6 100644
--- a/source/helper.c
+++ b/source/helper.c
@@ -183,7 +183,16 @@ static gchar *fuzzy_to_regex ( const char * input )
 // Macro for quickly generating regex for matching.
 static inline GRegex * R ( const char *s, int case_sensitive  )
 {
-    return g_regex_new ( s, G_REGEX_OPTIMIZE | ( ( case_sensitive ) ? 0 : G_REGEX_CASELESS ), 0, NULL );
+	if ( config.normalize_match ) {
+		char * norm = g_str_to_ascii ( s, NULL );
+		GRegex *retv = g_regex_new ( norm, G_REGEX_OPTIMIZE | ( ( case_sensitive ) ? 0 : G_REGEX_CASELESS ), 0, NULL );
+		g_free ( norm);
+		return retv;
+
+	} else {
+		return g_regex_new ( s, G_REGEX_OPTIMIZE | ( ( case_sensitive ) ? 0 : G_REGEX_CASELESS ), 0, NULL );
+
+	}
 }
 
 static rofi_int_matcher * create_regex ( const char *input, int case_sensitive )
@@ -380,6 +389,9 @@ int find_arg_char ( const char * const key, char *val )
 
 PangoAttrList *helper_token_match_get_pango_attr ( RofiHighlightColorStyle th, rofi_int_matcher**tokens, const char *input, PangoAttrList *retv )
 {
+    if ( config.normalize_match ) {
+	return retv;
+    }
     // Do a tokenized match.
     if ( tokens ) {
         for ( int j = 0; tokens[j]; j++ ) {
@@ -387,10 +399,10 @@ PangoAttrList *helper_token_match_get_pango_attr ( RofiHighlightColorStyle th, r
             if ( tokens[j]->invert ) {
                 continue;
             }
-            g_regex_match ( tokens[j]->regex, input, G_REGEX_MATCH_PARTIAL, &gmi );
-            while ( g_match_info_matches ( gmi ) ) {
-                int count = g_match_info_get_match_count ( gmi );
-                for ( int index = ( count > 1 ) ? 1 : 0; index < count; index++ ) {
+	    g_regex_match ( tokens[j]->regex, input, G_REGEX_MATCH_PARTIAL, &gmi );
+	    while ( g_match_info_matches ( gmi ) ) {
+		int count = g_match_info_get_match_count ( gmi );
+		for ( int index = ( count > 1 ) ? 1 : 0; index < count; index++ ) {
                     int start, end;
                     g_match_info_fetch_pos ( gmi, index, &start, &end );
                     if ( th.style & ROFI_HL_BOLD ) {
@@ -454,8 +466,16 @@ int helper_token_match ( rofi_int_matcher* const *tokens, const char *input )
     // Do a tokenized match.
     if ( tokens ) {
         for ( int j = 0; match && tokens[j]; j++ ) {
-            match  = g_regex_match ( tokens[j]->regex, input, 0, NULL );
-            match ^= tokens[j]->invert;
+	    if ( config.normalize_match ) {
+		    char *norm_input = g_str_to_ascii(input, NULL );//g_utf8_normalize(input, -1, G_NORMALIZE_ALL);
+printf("%s\n", norm_input);
+		    match  = g_regex_match ( tokens[j]->regex, norm_input, 0, NULL );
+		    match ^= tokens[j]->invert;
+		    g_free ( norm_input );
+	    } else {
+		    match  = g_regex_match ( tokens[j]->regex, input, 0, NULL );
+		    match ^= tokens[j]->invert;
+	    }
         }
     }
     return match;
diff --git a/source/xrmoptions.c b/source/xrmoptions.c
index aff2ec9b..3516875f 100644
--- a/source/xrmoptions.c
+++ b/source/xrmoptions.c
@@ -231,6 +231,8 @@ static XrmOption xrmOptions[] = {
       "DRUN: build and use a cache with desktop file content.", CONFIG_DEFAULT },
     { xrm_Boolean, "drun-reload-desktop-cache", { .snum  = &config.drun_reload_desktop_cache            }, NULL,
       "DRUN: If enabled, reload the cache with desktop file content.", CONFIG_DEFAULT },
+    { xrm_Boolean, "normalize-match", 		{ .snum  = &config.normalize_match                      }, NULL,
+      "Normalize when matching.", CONFIG_DEFAULT },
 };
 
 /** Dynamic array of extra options */

Added -normalize-match option.

DaveDavenport · 2020-09-23T18:00:42Z

hmm not great on characters like €

DaveDavenport · 2020-09-27T18:06:01Z

Tried to get it a little bit better (no config option yet.)

diff --git a/source/helper.c b/source/helper.c
index a7bfa3e8..22bd049f 100644
--- a/source/helper.c
+++ b/source/helper.c
@@ -180,10 +180,38 @@ static gchar *fuzzy_to_regex ( const char * input )
     return retv;
 }
 
+static char *reduce_string ( const char *s)
+{
+  char *composed = g_utf8_normalize ( s, -1, G_NORMALIZE_ALL_COMPOSE );
+  char * str = g_malloc0((strlen(composed)+2));
+  char *striter = str;
+  for ( char *iter = composed; iter && *iter; iter = g_utf8_next_char ( iter )) {
+    char buf[6] = {0,};
+    gunichar uc = g_utf8_get_char ( iter );
+    gunichar a,b;
+    int l = 0;
+    if ( g_unichar_decompose ( uc, &a, &b) ) {
+      l = g_unichar_to_utf8 ( a, buf);
+    } else {
+      l = g_unichar_to_utf8 ( uc, buf);
+    }
+    memcpy(striter, buf, l);
+    striter+=l;
+  }
+
+  g_free ( composed );
+  return str;
+}
+
 // Macro for quickly generating regex for matching.
 static inline GRegex * R ( const char *s, int case_sensitive  )
 {
-    return g_regex_new ( s, G_REGEX_OPTIMIZE | ( ( case_sensitive ) ? 0 : G_REGEX_CASELESS ), 0, NULL );
+  char *str = reduce_string ( s );
+
+  GRegex *r = g_regex_new ( str, G_REGEX_OPTIMIZE | ( ( case_sensitive ) ? 0 : G_REGEX_CASELESS ), 0, NULL );
+
+  g_free ( str );
+  return r;
 }
 
 static rofi_int_matcher * create_regex ( const char *input, int case_sensitive )
@@ -454,8 +482,10 @@ int helper_token_match ( rofi_int_matcher* const *tokens, const char *input )
     // Do a tokenized match.
     if ( tokens ) {
         for ( int j = 0; match && tokens[j]; j++ ) {
-            match  = g_regex_match ( tokens[j]->regex, input, 0, NULL );
+            char *r = reduce_string(input);
+            match  = g_regex_match ( tokens[j]->regex, r, 0, NULL );
             match ^= tokens[j]->invert;
+            g_free(r);
         }
     }
     return match;

DaveDavenport · 2020-09-27T18:18:44Z

diff --git a/config/config.c b/config/config.c
index 05723e5a..111ec742 100644
--- a/config/config.c
+++ b/config/config.c
@@ -170,5 +170,8 @@ Settings config = {
     .drun_reload_desktop_cache = FALSE,
 
     /** Benchmarks */
-    .benchmark_ui              = FALSE
+    .benchmark_ui              = FALSE,
+
+    /** normalize match */
+    .normalize_match 	       = FALSE
 };
diff --git a/include/settings.h b/include/settings.h
index cecb9c34..491036b5 100644
--- a/include/settings.h
+++ b/include/settings.h
@@ -203,6 +203,8 @@ typedef struct
 
     /** Benchmark */
     gboolean       benchmark_ui;
+
+    gboolean       normalize_match;
 } Settings;
 /** Global Settings structure. */
 extern Settings config;
diff --git a/source/helper.c b/source/helper.c
index a7bfa3e8..1e6fc8a9 100644
--- a/source/helper.c
+++ b/source/helper.c
@@ -180,10 +180,43 @@ static gchar *fuzzy_to_regex ( const char * input )
     return retv;
 }
 
+static char *reduce_string ( const char *s)
+{
+  char *composed = g_utf8_normalize ( s, -1, G_NORMALIZE_ALL_COMPOSE );
+  char * str = g_malloc0((strlen(composed)+2));
+  char *striter = str;
+  for ( char *iter = composed; iter && *iter; iter = g_utf8_next_char ( iter )) {
+    char buf[6] = {0,};
+    gunichar uc = g_utf8_get_char ( iter );
+    gunichar a,b;
+    int l = 0;
+    if ( g_unichar_decompose ( uc, &a, &b) ) {
+      l = g_unichar_to_utf8 ( a, buf);
+    } else {
+      l = g_unichar_to_utf8 ( uc, buf);
+    }
+    memcpy(striter, buf, l);
+    striter+=l;
+  }
+
+  g_free ( composed );
+  return str;
+}
+
 // Macro for quickly generating regex for matching.
 static inline GRegex * R ( const char *s, int case_sensitive  )
 {
+  if ( config.normalize_match ) {
+    char *str = reduce_string ( s );
+
+    GRegex *r = g_regex_new ( str, G_REGEX_OPTIMIZE | ( ( case_sensitive ) ? 0 : G_REGEX_CASELESS ), 0, NULL );
+
+    g_free ( str );
+    return r;
+  } else {
     return g_regex_new ( s, G_REGEX_OPTIMIZE | ( ( case_sensitive ) ? 0 : G_REGEX_CASELESS ), 0, NULL );
+
+  }
 }
 
 static rofi_int_matcher * create_regex ( const char *input, int case_sensitive )
@@ -453,10 +486,19 @@ int helper_token_match ( rofi_int_matcher* const *tokens, const char *input )
     int match = TRUE;
     // Do a tokenized match.
     if ( tokens ) {
+      if ( config.normalize_match ) {
+        char *r = reduce_string(input);
+        for ( int j = 0; match && tokens[j]; j++ ) {
+          match  = g_regex_match ( tokens[j]->regex, r, 0, NULL );
+          match ^= tokens[j]->invert;
+        }
+        g_free(r);
+      } else {
         for ( int j = 0; match && tokens[j]; j++ ) {
-            match  = g_regex_match ( tokens[j]->regex, input, 0, NULL );
-            match ^= tokens[j]->invert;
+          match  = g_regex_match ( tokens[j]->regex, input, 0, NULL );
+          match ^= tokens[j]->invert;
         }
+      }
     }
     return match;
 }
diff --git a/source/xrmoptions.c b/source/xrmoptions.c
index aff2ec9b..a7c8176c 100644
--- a/source/xrmoptions.c
+++ b/source/xrmoptions.c
@@ -231,6 +231,8 @@ static XrmOption xrmOptions[] = {
       "DRUN: build and use a cache with desktop file content.", CONFIG_DEFAULT },
     { xrm_Boolean, "drun-reload-desktop-cache", { .snum  = &config.drun_reload_desktop_cache            }, NULL,
       "DRUN: If enabled, reload the cache with desktop file content.", CONFIG_DEFAULT },
+    { xrm_Boolean, "normalize-match", 		      { .snum  = &config.normalize_match                      }, NULL,
+            "Normalize when matching.", CONFIG_DEFAULT },
 };
 
 /** Dynamic array of extra options */

github-actions · 2020-11-01T00:04:38Z

This issue has been automatically locked since there has not been any recent activity after it was closed. Please open a new issue for related bugs.

YodaEmbedding added the Feature Request label May 7, 2020

DaveDavenport added the Nice To Have label May 7, 2020

DaveDavenport closed this as completed in b4bbce4 Sep 30, 2020

github-actions bot locked as resolved and limited conversation to collaborators Nov 1, 2020

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[REQUEST] Treat accented characters as regular characters #1119

[REQUEST] Treat accented characters as regular characters #1119

YodaEmbedding commented May 7, 2020 •

edited

Loading

DaveDavenport commented May 7, 2020

DaveDavenport commented Sep 23, 2020

DaveDavenport commented Sep 23, 2020

DaveDavenport commented Sep 27, 2020 •

edited

Loading

DaveDavenport commented Sep 27, 2020 •

edited

Loading

github-actions bot commented Nov 1, 2020

[REQUEST] Treat accented characters as regular characters #1119

[REQUEST] Treat accented characters as regular characters #1119

Comments

YodaEmbedding commented May 7, 2020 • edited Loading

DaveDavenport commented May 7, 2020

DaveDavenport commented Sep 23, 2020

DaveDavenport commented Sep 23, 2020

DaveDavenport commented Sep 27, 2020 • edited Loading

DaveDavenport commented Sep 27, 2020 • edited Loading

github-actions bot commented Nov 1, 2020

YodaEmbedding commented May 7, 2020 •

edited

Loading

DaveDavenport commented Sep 27, 2020 •

edited

Loading

DaveDavenport commented Sep 27, 2020 •

edited

Loading