-
Notifications
You must be signed in to change notification settings - Fork 292
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
请写一个正则,去除掉html标签字符串里的所有属性,并保留src和href两种属性 #24
Comments
|
匹配标签的属性内容: var html = `<div title="test">
<icon v-if="icon" name="info" class="m-message--icon" slot="icon" />
<img src="http://img.123.jpg" title="img"/>
<input checked=true data-value="12" checked/>
<a href="#/test" on-click="jump"></a>
<m-alert title='Alert message'>alert</m-alert>
</div>`
var whiteList = [ 'href', 'src' ]
html.replace(/<([\w-]+)(.*?)>/g, function (ori, tag, content) {
console.log(1, arguments)
return ori.replace(/\s?([\w-]+)=['"]?[\w-\s]+['"]?/g, function (attr, name) {
console.log(2, arguments)
if (whiteList.indexOf(name) > -1) return attr
return ''
})
}) 以上例子还是存在一些属性无法去除,比如没有值的属性 |
正则写不来,但是好像可以直接js获取元素来实现,哈哈哈哈 |
还是要限制一下尖括号吧,不然正文里的也匹配上了 |
来个boss级的终极版,参考引用于vue源码 |
贡献一个测试用例:
双引号里带空格、单引号的 |
//参考了vue源码看着貌似有点长
'<div class="xxxx" href="xxxddx" >sdfsdf</div><span name="xxsdf" src="sdfsdf"></span>'.
replace(/<([a-zA-Z_][\w\-\.]*\s*)((?:\s*(?:[^\s"'<>\/=]+)(?:\s*(?:=)\s*(?:"(?:[^"]*)"+|'(?:[^']*)'+|(?:[^\s"'=<>`]+)))?)*)(\s*(?:\/)?\s*)>/g,(s,s1,s2,s3)=>{
var hrefSrc = (s2 || '').match(/\s*(?:href|src)(?:\s*(?:=)\s*(?:"(?:[^"]*)"+|'(?:[^']*)'+|(?:[^\s"'=<>`]+)))?/g);
return '<' + s1 + (hrefSrc || []).join('') + s3 + '>'
});//<div href="xxxddx" >sdfsdf</div><span src="sdfsdf"></span> |
撸了一个,不知道对第七条用例算不算失败的 const list = [
'<a test=adsf>asdf</a>asdfasdf<p>adf</p>',
'<a href=adsf>asdf</a>asdfasdf<p>adf</p>',
'<a href="adsf">asdf</a>asdfasdf<p>adf</p>',
'<a href="adsf">asdf</a>asdfasdf<p>adf</p>',
'<a aa="asdfads" href="adsf">asdf</a>asdfasdf<p>adf</p>',
'<a href="adsf" aa="asdfads">asdf</a>asdfasdf<p>adf</p>',
'<a aa="asdfads" href="adsf" aa="asdfads">asdf</a>asdfasdf<p>adf</p>',
'<a aa="asdfads" href="adsf" aa="asdfads" src="adsf" aa="asdfads">asdf</a>asdfasdf<p>adf</p>',
`
<a aa="asdfads"
href="adsf"
aa="asdfads">asdf</a>asdfasdf<p>adf</p>
`,
`
<div value="src='AAA AAA'" src='BBBBBB' href=abcabc>
value="src='AAA AAA'" src='BBBBBB' href=abcabc
</div>`,
'<script src="adsf"/>asdf<link href="adsf" />asdf<p>adf</p>',
];
const regs = {
mine: {
search: /(<\w+)[^>]*(\s+\b(href|src)=("|'|)[^\s>]*\4)(?:\s)?[^>]*?(?=>)/g,
replace: '$1$2',
},
enhanced: {
search: /(?<=<\w+\b)([^>]*?(\s+\b(href|src)=("|'|)[^\s>]*\4))?([^>]*?(\s+\b(href|src)=("|'|)[^\s>]*\8))?[^>]*?(?=\/?>)/g,
replace: '$2$6',
}
};
function run(reg, testIndex = -1) {
for (const [index, item] of list.entries()) {
if (testIndex < 0) {
console.log('▶', index, item);
let result = item.replace(reg.search, reg.replace);
console.log('result:', result, '\n');
} else if (testIndex === index) {
console.log('▶', index, item);
let match = reg.search.exec(item);
console.log(match);
let result = item.replace(reg.search, reg.replace);
console.log('result:', result, '\n');
}
}
}
run(regs.enhanced, -1); |
//给我点差评
;( () => {
str = `<a afa href="http://fanyi-pro.baidu.com/?hmsr=%E7%99%BE%E5%BA%A6%E7%BF%BB%E8%AF%91&hmpl=%E5%9B%BA%E5%AE%9A%E5%85%A5%E5%8F%A3&hmcu=%E9%A1%B6%E9%83%A8%E6%8C%89%E9%92%AE&hmkw=&hmci=" target="_blank" class="list-name" src="" f="" aefa>人工翻译</a>
<div id="search-box" class="search-box-new line">
<ul class="channel grid">
<li><a log="sc_pos:c_baidu" data-type='baidu' rel="nofollow" href="http://www.baidu.com/s?cl=3&wd=chrome%CA%E9%C7%A9%B1%BE%B5%D8%CE%C4%BC%FE%D4%DA%CA%B2%C3%B4%B5%D8%B7%BD">网页</a></li>
<li><a log="sc_pos:c_news" data-type='news' rel="nofollow" href="https://www.baidu.com/s?rtt=1&bsst=1&cl=2&tn=news&word=chrome%CA%E9%C7%A9%B1%BE%B5%D8%CE%C4%BC%FE%D4%DA%CA%B2%C3%B4%B5%D8%B7%BD&fr=zhidao">资讯</a></li>
<li><a log="sc_pos:c_video" data-type='video' rel="nofollow" href="https://www.baidu.com/sf/vsearch?pd=video&tn=vsearch&wd=chrome%CA%E9%C7%A9%B1%BE%B5%D8%CE%C4%BC%FE%D4%DA%CA%B2%C3%B4%B5%D8%B7%BD&rsv_spt=16">视频</a></li>
<li><a log="sc_pos:c_pic" data-type='image' rel="nofollow" href="http://image.baidu.com/search/index?tn=baiduimage&ct=201326592&lm=-1&cl=2&word=chrome%CA%E9%C7%A9%B1%BE%B5%D8%CE%C4%BC%FE%D4%DA%CA%B2%C3%B4%B5%D8%B7%BD&t=3&ie=gbk">图片</a></li>
<li><strong>知道</strong></li>
<li><a log="sc_pos:c_doc" data-type='wenku' rel="nofollow" href="http://wenku.baidu.com/search?word=chrome%CA%E9%C7%A9%B1%BE%B5%D8%CE%C4%BC%FE%D4%DA%CA%B2%C3%B4%B5%D8%B7%BD&lm=0&od=0">文库</a></li>
<li><a log="sc_pos:c_tieba" data-type='tieba' rel="nofollow" href="http://tieba.baidu.com/f?kw=chrome%CA%E9%C7%A9%B1%BE%B5%D8%CE%C4%BC%FE%D4%DA%CA%B2%C3%B4%B5%D8%B7%BD&t=4">贴吧</a></li><li><a log="sc_pos:c_b2b" data-type='b2b' rel="nofollow" href="https://b2b.baidu.com/s?q=chrome%CA%E9%C7%A9%B1%BE%B5%D8%CE%C4%BC%FE%D4%DA%CA%B2%C3%B4%B5%D8%B7%BD&fr=www">采购</a></li>
<li><a log="sc_pos:c_map" data-type='map' rel="nofollow" href="http://map.baidu.com/m?word=chrome%CA%E9%C7%A9%B1%BE%B5%D8%CE%C4%BC%FE%D4%DA%CA%B2%C3%B4%B5%D8%B7%BD&fr=map007">地图</a></li><li><a log="sc_pos:c_more" data-type="more" href="http://www.baidu.com/more/">更多»</a></li>
</ul>
<div class="search-block clearfix">
<div class="search-cont clearfix">
<a class="logo" href="/" title="百度知道"></a>
<form action="/search" name="search-form" method="get" id="search-form-new" class="search-form">
<input class="hdi" id="kw" maxlength="256" tabindex="1" size="46" name="word" value="chrome书签本地文件在什么地方" autocomplete="off" placeholder="" />
<button alog-action="g-search-anwser" type="submit" id="search-btn" hidefocus="true" tabindex="2" class="btn-global">搜索答案</button>
<a href="#" alog-action="g-i-ask" class="i-ask-link" id="ask-btn-new">我要提问</a>
</form>
</div>
</div>
</div>
`
reg = /<[A-z-]+(?:\s((?!href|src)[a-z-]+)=(?:'([^"]+|"[^"]+")+'|"([^']+|'[^']+')+"|''|""|[a-z]+))|\s[a-z]+>/igm
reg2 = /(?:\s((?!href|src)[a-z-]+)(="[^"<>]+"|='[^'<>]+'|=""|=''|(=|)[A-z0-9]+))/igm
reg_html_one = /(<[A-z]+[^<>]+(\/>|>))/igm
re = str.replace(reg_html_one,($1) => {
var re2 = $1.replace(reg2,'')
return re2
})
console.log(re)
})()
//结果正确
//<a href="http://fanyi-pro.baidu.com/?hmsr=%E7%99%BE%E5%BA%A6%E7%BF%BB%E8%AF%91&hmpl=%E5%9B%BA%E5%AE%9A%E5%85%A5%E5%8F%A3&hmcu=%E9%A1%B6%E9%83%A8%E6%8C%89%E9%92%AE&hmkw=&hmci=" src="">人工翻译</a>
<div>
<ul>
<li><a href="http://www.baidu.com/s?cl=3&wd=chrome%CA%E9%C7%A9%B1%BE%B5%D8%CE%C4%BC%FE%D4%DA%CA%B2%C3%B4%B5%D8%B7%BD">网页</a></li>
<li><a href="https://www.baidu.com/s?rtt=1&bsst=1&cl=2&tn=news&word=chrome%CA%E9%C7%A9%B1%BE%B5%D8%CE%C4%BC%FE%D4%DA%CA%B2%C3%B4%B5%D8%B7%BD&fr=zhidao">资讯</a></li>
<li><a href="https://www.baidu.com/sf/vsearch?pd=video&tn=vsearch&wd=chrome%CA%E9%C7%A9%B1%BE%B5%D8%CE%C4%BC%FE%D4%DA%CA%B2%C3%B4%B5%D8%B7%BD&rsv_spt=16">视频</a></li>
<li><a href="http://image.baidu.com/search/index?tn=baiduimage&ct=201326592&lm=-1&cl=2&word=chrome%CA%E9%C7%A9%B1%BE%B5%D8%CE%C4%BC%FE%D4%DA%CA%B2%C3%B4%B5%D8%B7%BD&t=3&ie=gbk">图片</a></li>
<li><strong>知道</strong></li>
<li><a href="http://wenku.baidu.com/search?word=chrome%CA%E9%C7%A9%B1%BE%B5%D8%CE%C4%BC%FE%D4%DA%CA%B2%C3%B4%B5%D8%B7%BD&lm=0&od=0">文库</a></li>
<li><a href="http://tieba.baidu.com/f?kw=chrome%CA%E9%C7%A9%B1%BE%B5%D8%CE%C4%BC%FE%D4%DA%CA%B2%C3%B4%B5%D8%B7%BD&t=4">贴吧</a></li><li><a href="https://b2b.baidu.com/s?q=chrome%CA%E9%C7%A9%B1%BE%B5%D8%CE%C4%BC%FE%D4%DA%CA%B2%C3%B4%B5%D8%B7%BD&fr=www">采购</a></li>
<li><a href="http://map.baidu.com/m?word=chrome%CA%E9%C7%A9%B1%BE%B5%D8%CE%C4%BC%FE%D4%DA%CA%B2%C3%B4%B5%D8%B7%BD&fr=map007">地图</a></li><li><a href="http://www.baidu.com/more/">更多»</a></li>
</ul>
<div>
<div>
<a href="/"></a>
<form>
<input />
<button >搜索答案</button>
<a href="#">我要提问</a>
</form>
</div>
</div>
</div>
|
我目前未解决的问题是单双引号嵌套如何确保引号正确 |
|
repost |
脱离浏览器你怎么办 |
答案呢? |
我觉得,首先正则很强大,但不要低估html的容错性(比如非闭合,嵌套错误),而且就算是格式化后的html标签,也有特例,比如 <textarea name="" id="" cols="30" rows="10">
<a value="src='AAA AAA'" src='BBBBBB' href=abcabc wtf=弄啥嘞>
value="src='AAA AAA'" src='BBBBBB' href=abcabc
</textarea> 就连语法高亮都认为 期待答案 |
这题目简单的理解就是,写一个正则表达式,将字符串'正则'转化成'正则'。
当然,真正包含一个网页的html的字符串要比这个复杂。
而且,google里关于这个问题的前三篇文章答案,都存在严重的问题,随便写几个case都是满足不了的。
正则的问题,很多前端人员都停留在如何用正则去判断一个数字是不是手机号,一段字符串是不是邮箱,说实话,这都没用到正则知识体系的十分之一
在一些工程项目难题上,如果正则使用到位,真的是一行正则可以抵1000行代码。
建议有能力的小伙伴,可以玩一下这题。
The text was updated successfully, but these errors were encountered: