Skip to content

Commit

Permalink
refactor: optimizing regex pettern for html meta matching (#4235)
Browse files Browse the repository at this point in the history
#### What type of PR is this?
/kind improvement
/area core
/milestone 2.8.x

#### What this PR does / why we need it:
优化去除 Html Meta 重复标签的正则表达式

see #4234 for more details.

#### Which issue(s) this PR fixes:

Fixes #4234

#### Does this PR introduce a user-facing change?

```release-note
优化去除 Html Meta 重复标签的正则表达式
```
  • Loading branch information
guqing committed Jul 20, 2023
1 parent 6b70296 commit 5eb9b68
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import lombok.AllArgsConstructor;
import org.apache.commons.lang3.StringUtils;
import org.springframework.core.annotation.Order;
import org.springframework.stereotype.Component;
import org.thymeleaf.context.ITemplateContext;
Expand All @@ -30,7 +29,7 @@
@Component
@AllArgsConstructor
public class DuplicateMetaTagProcessor implements TemplateHeadProcessor {
static final Pattern META_PATTERN = Pattern.compile("<meta\\s+name=\"(\\w+)\"(.*?)>");
static final Pattern META_PATTERN = Pattern.compile("<meta[^>]+?name=\"([^\"]+)\"[^>]*>\\n*");

@Override
public Mono<Void> process(ITemplateContext context, IModel model,
Expand All @@ -49,15 +48,17 @@ public Mono<Void> process(ITemplateContext context, IModel model,
while (matcher.find()) {
String tagLine = matcher.group(0);
String nameAttribute = matcher.group(1);
IText metaTagNode = context.getModelFactory().createText(tagLine);
// create a new text node to replace the original text node
// replace multiple line breaks with one line break
IText metaTagNode = context.getModelFactory()
.createText(tagLine.replaceAll("\\n+", "\n"));
uniqueMetaTags.put(nameAttribute, new IndexedModel(i, metaTagNode));
text = text.replace(tagLine, "");
}
if (StringUtils.isNotBlank(text)) {
IText otherText = context.getModelFactory()
.createText(text);
otherModel.add(new IndexedModel(i, otherText));
}
// put the rest of the text into the other model
IText otherText = context.getModelFactory()
.createText(text);
otherModel.add(new IndexedModel(i, otherText));
} else {
otherModel.add(new IndexedModel(i, templateEvent));
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package run.halo.app.theme.dialect;

import static org.assertj.core.api.Assertions.assertThat;

import java.util.regex.Matcher;
import org.junit.jupiter.api.Test;

/**
* Tests for {@link DuplicateMetaTagProcessor}.
*
* @author guqing
* @since 2.8.0
*/
class DuplicateMetaTagProcessorTest {

@Test
void extractMetaTag() {
// normal
String text = "<meta name=\"description\" content=\"a description\"/>";
Matcher matcher = DuplicateMetaTagProcessor.META_PATTERN.matcher(text);
assertThat(matcher.find()).isTrue();
assertThat(matcher.group(1)).isEqualTo("description");

// name and content are not in the general order
text = "<meta content=\"K1,K2\" name=\"keywords\"/>";
matcher = DuplicateMetaTagProcessor.META_PATTERN.matcher(text);
assertThat(matcher.find()).isTrue();
assertThat(matcher.group(1)).isEqualTo("keywords");

// no closing slash
text = "<meta content=\"K1,K2\" name=\"keywords\">";
matcher = DuplicateMetaTagProcessor.META_PATTERN.matcher(text);
assertThat(matcher.find()).isTrue();
assertThat(matcher.group(1)).isEqualTo("keywords");

// multiple line breaks and other stuff
text = """
<meta content="全局 Head description" name="description" />
<style>
.moment .momemt-content pre.notranslate {
background: #f3f3f3;
color: #444;
}
</style>
""";
matcher = DuplicateMetaTagProcessor.META_PATTERN.matcher(text);
assertThat(matcher.find()).isTrue();
assertThat(matcher.group(1)).isEqualTo("description");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,10 @@ void messageResolverWhenDefaultTheme() {
.isEqualTo("""
<!DOCTYPE html>
<html lang="en">
<head><meta charset="UTF-8"><title>Title</title></head>
<head>
<meta charset="UTF-8">
<title>Title</title>
</head>
<body>
index
<div>zh</div>
Expand All @@ -93,7 +96,10 @@ void messageResolverForEnLanguageWhenDefaultTheme() {
.isEqualTo("""
<!DOCTYPE html>
<html lang="en">
<head><meta charset="UTF-8"><title>Title</title></head>
<head>
<meta charset="UTF-8">
<title>Title</title>
</head>
<body>
index
<div>en</div>
Expand All @@ -114,7 +120,10 @@ void shouldUseDefaultWhenLanguageNotSupport() {
.isEqualTo("""
<!DOCTYPE html>
<html lang="en">
<head><meta charset="UTF-8"><title>Title</title></head>
<head>
<meta charset="UTF-8">
<title>Title</title>
</head>
<body>
index
<div>foo</div>
Expand All @@ -135,7 +144,10 @@ void switchTheme() throws URISyntaxException {
.isEqualTo("""
<!DOCTYPE html>
<html lang="en">
<head><meta charset="UTF-8"><title>Title</title></head>
<head>
<meta charset="UTF-8">
<title>Title</title>
</head>
<body>
index
<div>zh</div>
Expand All @@ -154,7 +166,10 @@ void switchTheme() throws URISyntaxException {
.isEqualTo("""
<!DOCTYPE html>
<html lang="en">
<head><meta charset="UTF-8"><title>Other theme title</title></head>
<head>
<meta charset="UTF-8">
<title>Other theme title</title>
</head>
<body>
<p>Other 首页</p>
</body>
Expand All @@ -167,7 +182,10 @@ void switchTheme() throws URISyntaxException {
.isEqualTo("""
<!DOCTYPE html>
<html lang="en">
<head><meta charset="UTF-8"><title>Other theme title</title></head>
<head>
<meta charset="UTF-8">
<title>Other theme title</title>
</head>
<body>
<p>other index</p>
</body>
Expand Down

0 comments on commit 5eb9b68

Please sign in to comment.