Skip to content

Commit

Permalink
Acfun extractor (#838)
Browse files Browse the repository at this point in the history
* acfun bangumi download

Co-authored-by: Xinzhao Xu <z2d@jifangcheng.com>

Co-authored-by: Xinzhao Xu <z2d@jifangcheng.com>

* Update extractors/acfun/acfun.go

Co-authored-by: Xinzhao Xu <z2d@jifangcheng.com>

* Update extractors/acfun/acfun.go

Co-authored-by: Xinzhao Xu <z2d@jifangcheng.com>

Co-authored-by: Xinzhao Xu <z2d@jifangcheng.com>
  • Loading branch information
kilosonc and iawia002 committed Dec 15, 2020
1 parent 2263d4c commit abc3c9d
Show file tree
Hide file tree
Showing 8 changed files with 293 additions and 24 deletions.
63 changes: 40 additions & 23 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,29 +24,45 @@

👾 Annie is a fast, simple and clean video downloader built with Go.

* [Installation](#installation)
* [Getting Started](#getting-started)
* [Download a video](#download-a-video)
* [Download anything else](#download-anything-else)
* [Download playlist](#download-playlist)
* [Multiple inputs](#multiple-inputs)
* [Resume a download](#resume-a-download)
* [Cookies](#cookies)
* [Auto retry](#auto-retry)
* [Proxy](#proxy)
* [Multi-Thread](#multi-thread)
* [Short link](#short-link)
* [Use specified Referrer](#use-specified-referrer)
* [Specify the output path and name](#specify-the-output-path-and-name)
* [Debug Mode](#debug-mode)
* [Reuse extracted data](#reuse-extracted-data)
* [Options](#options)
* [Supported Sites](#supported-sites)
* [Known issues](#known-issues)
* [Contributing](#contributing)
* [Authors](#authors)
* [Similar projects](#similar-projects)
* [License](#license)
- [Installation](#installation)
- [Prerequisites](#prerequisites)
- [Install via `go get`](#install-via-go-get)
- [Homebrew (macOS only)](#homebrew-macos-only)
- [Arch Linux](#arch-linux)
- [Void Linux](#void-linux)
- [Scoop on Windows](#scoop-on-windows)
- [Chocolatey on Windows](#chocolatey-on-windows)
- [Getting Started](#getting-started)
- [Download a video](#download-a-video)
- [Download anything else](#download-anything-else)
- [Download playlist](#download-playlist)
- [Multiple inputs](#multiple-inputs)
- [Resume a download](#resume-a-download)
- [Auto retry](#auto-retry)
- [Cookies](#cookies)
- [Proxy](#proxy)
- [Multi-Thread](#multi-thread)
- [Short link](#short-link)
- [bilibili](#bilibili)
- [Use specified Referrer](#use-specified-referrer)
- [Specify the output path and name](#specify-the-output-path-and-name)
- [Debug Mode](#debug-mode)
- [Reuse extracted data](#reuse-extracted-data)
- [Options](#options)
- [Download:](#download)
- [Network:](#network)
- [Playlist:](#playlist)
- [Filesystem:](#filesystem)
- [Subtitle:](#subtitle)
- [Youku:](#youku)
- [aria2:](#aria2)
- [Supported Sites](#supported-sites)
- [Known issues](#known-issues)
- [优酷](#优酷)
- [Contributing](#contributing)
- [Authors](#authors)
- [Similar projects](#similar-projects)
- [License](#license)


## Installation
Expand Down Expand Up @@ -610,6 +626,7 @@ XVIDEOS | <https://xvideos.com> | ✓ | | | |
聯合新聞網 | <https://udn.com> | ✓ | | | |
TikTok | <https://www.tiktok.com> | ✓ | | | |
好看视频 | <https://haokan.baidu.com> | ✓ | | | |
AcFun | <https://www.acfun.cn> | ✓ | | ✓ | |


## Known issues
Expand Down
172 changes: 172 additions & 0 deletions extractors/acfun/acfun.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
package acfun

import (
"fmt"
"net/url"
"regexp"

"github.com/iawia002/annie/extractors/types"
"github.com/iawia002/annie/parser"
"github.com/iawia002/annie/request"
"github.com/iawia002/annie/utils"
jsoniter "github.com/json-iterator/go"
)

const (
bangumiDataPattern = "window.pageInfo = window.bangumiData = (.*);"
qualityConfigPattern = "window.qualityConfig = (.*);"
bangumiListPattern = "window.bangumiList = (.*);"

bangumiHTMLURL = "https://www.acfun.cn/bangumi/aa%d_36188_%d"
bangumiVideoURL = "https://%s/mediacloud/acfun/acfun_video/hls/"

referer = "https://www.acfun.cn"
host = "https://www.acfun.cn"
)

type extractor struct{}

// New returns a new acfun bangumi extractor
func New() types.Extractor {
return &extractor{}
}

func (e *extractor) Extract(URL string, option types.Options) ([]*types.Data, error) {
html, err := request.GetByte(URL, referer, nil)
if err != nil {
return nil, err
}

epDatas := make([]*episodeData, 0)

if option.Playlist {
list, err := resolvingEpisodes(html)
if err != nil {
return nil, err
}
items := utils.NeedDownloadList(option.Items, option.ItemStart, option.ItemEnd, len(list.Episodes))

for _, item := range items {
epDatas = append(epDatas, list.Episodes[item-1])
}
} else {
bgData, _, err := resolvingData(html)
if err != nil {
return nil, err
}
epDatas = append(epDatas, &bgData.episodeData)
}

datas := make([]*types.Data, 0)

wgp := utils.NewWaitGroupPool(option.ThreadNumber)
for _, epData := range epDatas {
t := epData
wgp.Add()
go func() {
defer wgp.Done()
datas = append(datas, extractBangumi(concatURL(t), option))
}()
}
wgp.Wait()
return datas, nil
}

func concatURL(epData *episodeData) string {
return fmt.Sprintf(bangumiHTMLURL, epData.BangumiID, epData.ItemID)
}

func extractBangumi(URL string, option types.Options) *types.Data {
var err error
html, err := request.GetByte(URL, referer, nil)
if err != nil {
return types.EmptyData(URL, err)
}

_, vInfo, err := resolvingData(html)
if err != nil {
return types.EmptyData(URL, err)
}

streams := make(map[string]*types.Stream)

for _, stm := range vInfo.AdaptationSet[0].Streams {
m3u8URL, err := url.Parse(stm.URL)
if err != nil {
return types.EmptyData(URL, err)
}

urls, err := utils.M3u8URLs(m3u8URL.String())
if err != nil {

m3u8URL, err = url.Parse(stm.URL)
if err != nil {
return types.EmptyData(URL, err)
}

urls, err = utils.M3u8URLs(stm.BackURL)
if err != nil {
return types.EmptyData(URL, err)
}
}

// There is no size information in the m3u8 file and the calculation will take too much time, just ignore it.
parts := make([]*types.Part, 0)
for _, u := range urls {
parts = append(parts, &types.Part{
URL: u,
Ext: "ts",
})
}
streams[stm.QualityLabel] = &types.Stream{
ID: stm.QualityType,
Parts: parts,
Quality: stm.QualityType,
NeedMux: false,
}
}

doc, err := parser.GetDoc(string(html))
if err != nil {
return types.EmptyData(URL, err)
}
data := &types.Data{
Site: "AcFun acfun.cn",
Title: parser.Title(doc),
Type: types.DataTypeVideo,
Streams: streams,
URL: URL,
}
return data
}

func resolvingData(html []byte) (*bangumiData, *videoInfo, error) {
bgData := &bangumiData{}
vInfo := &videoInfo{}

pattern, _ := regexp.Compile(bangumiDataPattern)

groups := pattern.FindSubmatch(html)
err := jsoniter.Unmarshal(groups[1], bgData)
if err != nil {
return nil, nil, err
}

err = jsoniter.UnmarshalFromString(bgData.CurrentVideoInfo.KsPlayJSON, vInfo)
if err != nil {
return nil, nil, err
}
return bgData, vInfo, nil
}

func resolvingEpisodes(html []byte) (*episodeList, error) {
list := &episodeList{}
pattern, _ := regexp.Compile(bangumiListPattern)

groups := pattern.FindSubmatch(html)
err := jsoniter.Unmarshal(groups[1], list)
if err != nil {
return nil, err
}
return list, nil
}
30 changes: 30 additions & 0 deletions extractors/acfun/acfun_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package acfun

import (
"testing"

"github.com/iawia002/annie/extractors/types"
"github.com/iawia002/annie/test"
)

func TestDownload(t *testing.T) {
tests := []struct {
name string
args test.Args
}{
{
name: "normal test",
args: test.Args{
URL: "https://www.acfun.cn/bangumi/aa6000686_36188_1704167",
Title: "瑞克和莫蒂 第四季 :第2话 注释版",
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
data, err := New().Extract(tt.args.URL, types.Options{})
test.CheckError(t, err)
test.Check(t, tt.args, data[0])
})
}
}
38 changes: 38 additions & 0 deletions extractors/acfun/types.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package acfun

type episodeData struct {
ItemID int64 `json:"itemId"`
EpisodeName string `json:"episodeName"`
BangumiID int64 `json:"bangumiId"`
VideoID int64 `json:"videoId"`
}

type bangumiData struct {
episodeData
BangumiTitle string `json:"bangumiTitle"`
CurrentVideoInfo struct {
KsPlayJSON string `json:"ksPlayJson"`
} `json:"currentVideoInfo"`
}

type videoInfo struct {
AdaptationSet []struct {
Streams streams `json:"representation"`
} `json:"adaptationSet"`
}

type streams []stream

type episodeList struct {
Episodes []*episodeData `json:"items"`
}

type stream struct {
ID int64 `json:"id"`
BackURL string `json:"backUrl"`
Codecs string `json:"codecs"`
URL string `json:"url"`
BitRate int64 `json:"avgBitrate"`
QualityType string `json:"qualityType"`
QualityLabel string `json:"qualityLabel"`
}
2 changes: 2 additions & 0 deletions extractors/extractors.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"net/url"
"strings"

"github.com/iawia002/annie/extractors/acfun"
"github.com/iawia002/annie/extractors/bcy"
"github.com/iawia002/annie/extractors/bilibili"
"github.com/iawia002/annie/extractors/douyin"
Expand Down Expand Up @@ -72,6 +73,7 @@ func init() {
"udn": udn.New(),
"tiktok": tiktok.New(),
"haokan": haokan.New(),
"acfun": acfun.New(),
}
}

Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ require (
github.com/cheggaaa/pb v1.0.25
github.com/fatih/color v1.7.0
github.com/go-rod/rod v0.77.1
github.com/json-iterator/go v1.1.10
github.com/kr/pretty v0.1.0
github.com/mattn/go-colorable v0.0.9 // indirect
github.com/robertkrimen/otto v0.0.0-20191219234010-c382bd3c16ff
Expand Down
9 changes: 9 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,14 @@ github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7
github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/fatih/color v1.7.0 h1:DkWD4oS2D8LGGgTQ6IvwJJXSL5Vp2ffcQg58nFV38Ys=
github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=
github.com/go-rod/rod v0.77.1 h1:e6QK8KyUaLRswDUDHxW526nDPaP9EvRaaiSce9qm55M=
github.com/go-rod/rod v0.77.1/go.mod h1:XEc4dRYDxlKw+SFG3ZpWTZ8k4vosgg5IDUHKYPMzVSI=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/json-iterator/go v1.1.10 h1:Kz6Cvnvv2wGdaG/V8yMvfkmNiXq9Ya2KUv4rouJJr68=
github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
Expand All @@ -28,6 +32,10 @@ github.com/mattn/go-runewidth v0.0.6 h1:V2iyH+aX9C5fsYCpK60U8BYIvmhqxuOL3JZcqc1N
github.com/mattn/go-runewidth v0.0.6/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
github.com/mihaiav/ytdl v0.6.3-0.20200510100116-5f2bf8b4fec0 h1:2w0EKolK4KUQu+A0Jc7XCBRteLIpobAovG7vAu4eCfQ=
github.com/mihaiav/ytdl v0.6.3-0.20200510100116-5f2bf8b4fec0/go.mod h1:F0WX8szfQ00mhmfla+0xVJp483SBV4VO/ByUaNioNSM=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 h1:ZqeYNhU3OHLH3mGKHDcjJRFFRrJa6eAM5H+CtDdOsPc=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742 h1:Esafd1046DLDQ0W1YjYsBW+p8U2u7vzgW2SQVmlNazg=
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/olekukonko/tablewriter v0.0.1/go.mod h1:vsDQFd/mU46D+Z4whnwzcISnGGzXWMclvtLoiIKAKIo=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
Expand All @@ -40,6 +48,7 @@ github.com/rs/zerolog v1.16.0/go.mod h1:9nvC1axdVrAHcu/s9taAVfBuIdTZLVQmKQyvrUjF
github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/tidwall/gjson v1.3.2 h1:+7p3qQFaH3fOMXAJSrdZwGKcOO/lYdGS0HqGhPqDdTI=
Expand Down
2 changes: 1 addition & 1 deletion request/request.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ func Headers(url, refer string) (http.Header, error) {
headers := map[string]string{
"Referer": refer,
}
res, err := Request(http.MethodGet, url, nil, headers)
res, err := Request(http.MethodHead, url, nil, headers)

This comment has been minimized.

Copy link
@Sansui233

Sansui233 May 9, 2021

这里更改为HEAD会导致weiboTV获取内容size不可用,403 Error

This comment has been minimized.

Copy link
@shimamura-hougetsu
if err != nil {
return nil, err
}
Expand Down

0 comments on commit abc3c9d

Please sign in to comment.