-
Notifications
You must be signed in to change notification settings - Fork 2.9k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* acfun bangumi download Co-authored-by: Xinzhao Xu <z2d@jifangcheng.com> Co-authored-by: Xinzhao Xu <z2d@jifangcheng.com> * Update extractors/acfun/acfun.go Co-authored-by: Xinzhao Xu <z2d@jifangcheng.com> * Update extractors/acfun/acfun.go Co-authored-by: Xinzhao Xu <z2d@jifangcheng.com> Co-authored-by: Xinzhao Xu <z2d@jifangcheng.com>
- Loading branch information
Showing
8 changed files
with
293 additions
and
24 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,172 @@ | ||
package acfun | ||
|
||
import ( | ||
"fmt" | ||
"net/url" | ||
"regexp" | ||
|
||
"github.com/iawia002/annie/extractors/types" | ||
"github.com/iawia002/annie/parser" | ||
"github.com/iawia002/annie/request" | ||
"github.com/iawia002/annie/utils" | ||
jsoniter "github.com/json-iterator/go" | ||
) | ||
|
||
const ( | ||
bangumiDataPattern = "window.pageInfo = window.bangumiData = (.*);" | ||
qualityConfigPattern = "window.qualityConfig = (.*);" | ||
bangumiListPattern = "window.bangumiList = (.*);" | ||
|
||
bangumiHTMLURL = "https://www.acfun.cn/bangumi/aa%d_36188_%d" | ||
bangumiVideoURL = "https://%s/mediacloud/acfun/acfun_video/hls/" | ||
|
||
referer = "https://www.acfun.cn" | ||
host = "https://www.acfun.cn" | ||
) | ||
|
||
type extractor struct{} | ||
|
||
// New returns a new acfun bangumi extractor | ||
func New() types.Extractor { | ||
return &extractor{} | ||
} | ||
|
||
func (e *extractor) Extract(URL string, option types.Options) ([]*types.Data, error) { | ||
html, err := request.GetByte(URL, referer, nil) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
epDatas := make([]*episodeData, 0) | ||
|
||
if option.Playlist { | ||
list, err := resolvingEpisodes(html) | ||
if err != nil { | ||
return nil, err | ||
} | ||
items := utils.NeedDownloadList(option.Items, option.ItemStart, option.ItemEnd, len(list.Episodes)) | ||
|
||
for _, item := range items { | ||
epDatas = append(epDatas, list.Episodes[item-1]) | ||
} | ||
} else { | ||
bgData, _, err := resolvingData(html) | ||
if err != nil { | ||
return nil, err | ||
} | ||
epDatas = append(epDatas, &bgData.episodeData) | ||
} | ||
|
||
datas := make([]*types.Data, 0) | ||
|
||
wgp := utils.NewWaitGroupPool(option.ThreadNumber) | ||
for _, epData := range epDatas { | ||
t := epData | ||
wgp.Add() | ||
go func() { | ||
defer wgp.Done() | ||
datas = append(datas, extractBangumi(concatURL(t), option)) | ||
}() | ||
} | ||
wgp.Wait() | ||
return datas, nil | ||
} | ||
|
||
func concatURL(epData *episodeData) string { | ||
return fmt.Sprintf(bangumiHTMLURL, epData.BangumiID, epData.ItemID) | ||
} | ||
|
||
func extractBangumi(URL string, option types.Options) *types.Data { | ||
var err error | ||
html, err := request.GetByte(URL, referer, nil) | ||
if err != nil { | ||
return types.EmptyData(URL, err) | ||
} | ||
|
||
_, vInfo, err := resolvingData(html) | ||
if err != nil { | ||
return types.EmptyData(URL, err) | ||
} | ||
|
||
streams := make(map[string]*types.Stream) | ||
|
||
for _, stm := range vInfo.AdaptationSet[0].Streams { | ||
m3u8URL, err := url.Parse(stm.URL) | ||
if err != nil { | ||
return types.EmptyData(URL, err) | ||
} | ||
|
||
urls, err := utils.M3u8URLs(m3u8URL.String()) | ||
if err != nil { | ||
|
||
m3u8URL, err = url.Parse(stm.URL) | ||
if err != nil { | ||
return types.EmptyData(URL, err) | ||
} | ||
|
||
urls, err = utils.M3u8URLs(stm.BackURL) | ||
if err != nil { | ||
return types.EmptyData(URL, err) | ||
} | ||
} | ||
|
||
// There is no size information in the m3u8 file and the calculation will take too much time, just ignore it. | ||
parts := make([]*types.Part, 0) | ||
for _, u := range urls { | ||
parts = append(parts, &types.Part{ | ||
URL: u, | ||
Ext: "ts", | ||
}) | ||
} | ||
streams[stm.QualityLabel] = &types.Stream{ | ||
ID: stm.QualityType, | ||
Parts: parts, | ||
Quality: stm.QualityType, | ||
NeedMux: false, | ||
} | ||
} | ||
|
||
doc, err := parser.GetDoc(string(html)) | ||
if err != nil { | ||
return types.EmptyData(URL, err) | ||
} | ||
data := &types.Data{ | ||
Site: "AcFun acfun.cn", | ||
Title: parser.Title(doc), | ||
Type: types.DataTypeVideo, | ||
Streams: streams, | ||
URL: URL, | ||
} | ||
return data | ||
} | ||
|
||
func resolvingData(html []byte) (*bangumiData, *videoInfo, error) { | ||
bgData := &bangumiData{} | ||
vInfo := &videoInfo{} | ||
|
||
pattern, _ := regexp.Compile(bangumiDataPattern) | ||
|
||
groups := pattern.FindSubmatch(html) | ||
err := jsoniter.Unmarshal(groups[1], bgData) | ||
if err != nil { | ||
return nil, nil, err | ||
} | ||
|
||
err = jsoniter.UnmarshalFromString(bgData.CurrentVideoInfo.KsPlayJSON, vInfo) | ||
if err != nil { | ||
return nil, nil, err | ||
} | ||
return bgData, vInfo, nil | ||
} | ||
|
||
func resolvingEpisodes(html []byte) (*episodeList, error) { | ||
list := &episodeList{} | ||
pattern, _ := regexp.Compile(bangumiListPattern) | ||
|
||
groups := pattern.FindSubmatch(html) | ||
err := jsoniter.Unmarshal(groups[1], list) | ||
if err != nil { | ||
return nil, err | ||
} | ||
return list, nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
package acfun | ||
|
||
import ( | ||
"testing" | ||
|
||
"github.com/iawia002/annie/extractors/types" | ||
"github.com/iawia002/annie/test" | ||
) | ||
|
||
func TestDownload(t *testing.T) { | ||
tests := []struct { | ||
name string | ||
args test.Args | ||
}{ | ||
{ | ||
name: "normal test", | ||
args: test.Args{ | ||
URL: "https://www.acfun.cn/bangumi/aa6000686_36188_1704167", | ||
Title: "瑞克和莫蒂 第四季 :第2话 注释版", | ||
}, | ||
}, | ||
} | ||
for _, tt := range tests { | ||
t.Run(tt.name, func(t *testing.T) { | ||
data, err := New().Extract(tt.args.URL, types.Options{}) | ||
test.CheckError(t, err) | ||
test.Check(t, tt.args, data[0]) | ||
}) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
package acfun | ||
|
||
type episodeData struct { | ||
ItemID int64 `json:"itemId"` | ||
EpisodeName string `json:"episodeName"` | ||
BangumiID int64 `json:"bangumiId"` | ||
VideoID int64 `json:"videoId"` | ||
} | ||
|
||
type bangumiData struct { | ||
episodeData | ||
BangumiTitle string `json:"bangumiTitle"` | ||
CurrentVideoInfo struct { | ||
KsPlayJSON string `json:"ksPlayJson"` | ||
} `json:"currentVideoInfo"` | ||
} | ||
|
||
type videoInfo struct { | ||
AdaptationSet []struct { | ||
Streams streams `json:"representation"` | ||
} `json:"adaptationSet"` | ||
} | ||
|
||
type streams []stream | ||
|
||
type episodeList struct { | ||
Episodes []*episodeData `json:"items"` | ||
} | ||
|
||
type stream struct { | ||
ID int64 `json:"id"` | ||
BackURL string `json:"backUrl"` | ||
Codecs string `json:"codecs"` | ||
URL string `json:"url"` | ||
BitRate int64 `json:"avgBitrate"` | ||
QualityType string `json:"qualityType"` | ||
QualityLabel string `json:"qualityLabel"` | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
这里更改为HEAD会导致weiboTV获取内容size不可用,403 Error