Skip to content

Commit

Permalink
adjust haokan url parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
Youmoo committed Nov 7, 2020
1 parent b4ccc70 commit 98198af
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 4 deletions.
11 changes: 10 additions & 1 deletion extractors/haokan/haokan.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package haokan

import (
"strings"

"github.com/iawia002/annie/extractors/types"
"github.com/iawia002/annie/request"
"github.com/iawia002/annie/utils"
Expand All @@ -26,12 +28,19 @@ func (e *extractor) Extract(url string, option types.Options) ([]*types.Data, er
}
title := titles[1]

// 之前的好看网页中,视频地址是放在 video 标签下
urls := utils.MatchOneOf(html, `<video\s*class="video"\s*src="?(.+?)"?\s*>`)

if urls == nil || len(urls) < 2 {
// fallbak: 新的好看网页中,视频地址在 json 数据里
urls = utils.MatchOneOf(html, `"playurl":"(http.+?)"`)
}

if urls == nil || len(urls) < 2 {
return nil, types.ErrURLParseFailed
}
playurl := urls[1]

playurl := strings.Replace(urls[1], `\/`, `/`, -1)

size, err := request.Size(playurl, url)
if err != nil {
Expand Down
6 changes: 3 additions & 3 deletions extractors/haokan/haokan_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ func TestDownload(t *testing.T) {
{
name: "normal test",
args: test.Args{
URL: "https://haokan.baidu.com/v?vid=12153099833589381848",
Title: "你知道吗,十元人民币背后有人的名字,有十元的抓紧看一下吧",
Size: 7260836,
URL: "https://haokan.baidu.com/v?vid=10057409468467026969",
Title: "听歌学英语小学篇(6):my new pen pal",
Size: 2027354,
},
},
}
Expand Down

0 comments on commit 98198af

Please sign in to comment.