Skip to content

Commit

Permalink
Weibotv patch (#729)
Browse files Browse the repository at this point in the history
* extractors/weibo: update weibo tv parse method;  fix issue#727

* extractors/weibo: update&add test

* Update style (gofmt)

Co-authored-by: Xinzhao Xu <z2d@jifangcheng.com>

Co-authored-by: Xinzhao Xu <z2d@jifangcheng.com>
  • Loading branch information
Half9000 and iawia002 committed Jul 3, 2020
1 parent 82958e7 commit 3798075
Show file tree
Hide file tree
Showing 2 changed files with 92 additions and 29 deletions.
109 changes: 83 additions & 26 deletions extractors/weibo/weibo.go
Original file line number Diff line number Diff line change
@@ -1,52 +1,109 @@
package weibo

import (
"fmt"
"compress/gzip"
"encoding/json"
"io"
"net/http"
netURL "net/url"
"strings"

"github.com/iawia002/annie/extractors/types"
"github.com/iawia002/annie/parser"
"github.com/iawia002/annie/request"
"github.com/iawia002/annie/utils"
)

func downloadWeiboTV(url string) ([]*types.Data, error) {
headers := map[string]string{
"Cookie": "SUB=_2AkMsZ8xOf8NxqwJRmP4RzGLqbo5xyQDEieKaOz2VJRMxHRl-yj83qlEotRB6B-fiobWQ5vdEoYw7bCoCdf4KyP8O3Ujq",
type playInfo struct {
Title string `json:"title"`
URLs map[string]string `json:"urls"`
}

type playData struct {
PlayInfo playInfo `json:"Component_Play_Playinfo"`
}

type weiboData struct {
Code string `json:"code"`
Data playData `json:"data"`
Msg string `json:"msg"`
}

func getXSRFToken() (string, error) {
client := &http.Client{
CheckRedirect: func(req *http.Request, via []*http.Request) error {
return http.ErrUseLastResponse
},
}
html, err := request.Get(url, url, headers)
url := "https://weibo.com/ajax/getversion"
req, err := http.NewRequest(http.MethodHead, url, nil)

if err != nil {
return "", err
}
req.Header.Add("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36")

res, err := client.Do(req)
if err != nil {
return "", err
}
token := utils.MatchOneOf(res.Header.Get("Set-Cookie"), `XSRF-TOKEN=(.+?);`)[1]
return token, nil
}

func downloadWeiboTV(url string) ([]*types.Data, error) {
APIEndpoint := "https://weibo.com/tv/api/component?page="
urldata, err := netURL.Parse(url)
if err != nil {
return nil, err
}
doc, err := parser.GetDoc(html)
APIURL := APIEndpoint + netURL.QueryEscape(urldata.Path)
token, err := getXSRFToken()
if err != nil {
return nil, err
}
title := strings.TrimSpace(
strings.Replace(doc.Find(".info_txt").First().Text(), "\u200B", " ", -1), // Zero width space.
)
// http://f.us.sinaimg.cn/003Cddn4lx07oCX1hC0001040200hkQk0k010.mp4?label=mp4_hd&template=852x480.20&Expires=1541041515&ssig=%2BYnCmZaToS&KID=unistore,video
// &480=http://f.us.sinaimg.cn/003Cddn4lx07oCX1hC0001040200hkQk0k010.mp4?label=mp4_hd&template=852x480.20&Expires=1541041515&ssig=%2BYnCmZaToS&KID=unistore,video
// &720=http://f.us.sinaimg.cn/004cqzndlx07oCX1kMOQ01040200vyxj0k010.mp4?label=mp4_720p&template=1280x720.20&Expires=1541041515&ssig=Fdasnr1aW6&KID=unistore,video&qType=720
realURLs := utils.MatchOneOf(html, `video-sources="fluency=(.+?)"`)
if realURLs == nil || len(realURLs) < 2 {
return nil, types.ErrURLParseFailed
headers := map[string]string{
"Cookie": "SUB=_2AkMpogLYf8NxqwJRmP0XxG7kbo10ww_EieKf_vMDJRMxHRl-yj_nqm4NtRB6AiIsKFFGRY4-UuGD5B1-Kf9glz3sp7Ii; XSRF-TOKEN=" + token,
"Referer": utils.MatchOneOf(url, `^([^?]+)`)[1],
"content-type": `application/x-www-form-urlencoded`,
"x-xsrf-token": token,
}
oid := utils.MatchOneOf(url, `tv/show/([^?]+)`)[1]
postData := "data=" + netURL.QueryEscape("{\"Component_Play_Playinfo\":{\"oid\":\""+oid+"\"}}")
payload := strings.NewReader(postData)
res, err := request.Request(http.MethodPost, APIURL, payload, headers)

realURL, err := netURL.PathUnescape(realURLs[1])
if err != nil {
return nil, err
}
quality := []string{"480", "720"}
streams := make(map[string]*types.Stream, len(quality))
for _, q := range quality {
urlList := strings.Split(realURL, fmt.Sprintf("&%s=", q))
u := urlList[len(urlList)-1]
if !strings.HasPrefix(u, "http") {
defer res.Body.Close()
var dataReader io.ReadCloser
if res.Header.Get("Content-Encoding") == "gzip" {
dataReader, err = gzip.NewReader(res.Body)
if err != nil {
return nil, err
}
} else {
dataReader = res.Body
}
var data weiboData
if err = json.NewDecoder(dataReader).Decode(&data); err != nil {
return nil, err
}

if data.Data.PlayInfo.URLs == nil || len(data.Data.PlayInfo.URLs) < 3 {
return nil, types.ErrURLParseFailed
}
realURLs := map[string]string{}
for k, v := range data.Data.PlayInfo.URLs {
if strings.HasPrefix(v, "http") {
continue
}
size, err := request.Size(u, url)
realURLs[k] = "https:" + v
}

streams := make(map[string]*types.Stream, len(realURLs))
for q, u := range realURLs {
size, err := request.Size(u, "")
if err != nil {
return nil, err
}
Expand All @@ -65,7 +122,7 @@ func downloadWeiboTV(url string) ([]*types.Data, error) {
return []*types.Data{
{
Site: "微博 weibo.com",
Title: title,
Title: data.Data.PlayInfo.Title,
Type: types.DataTypeVideo,
Streams: streams,
URL: url,
Expand All @@ -83,7 +140,7 @@ func New() types.Extractor {
// Extract is the main function to extract the data.
func (e *extractor) Extract(url string, option types.Options) ([]*types.Data, error) {
if !strings.Contains(url, "m.weibo.cn") {
if strings.Contains(url, "weibo.com/tv/v/") {
if strings.Contains(url, "weibo.com/tv/show/") {
return downloadWeiboTV(url)
}
url = strings.Replace(url, "weibo.com", "m.weibo.cn", 1)
Expand Down
12 changes: 9 additions & 3 deletions extractors/weibo/weibo_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@ import (
"github.com/iawia002/annie/test"
)

func TestToken(t *testing.T) {
t.Run(
"XSRF token test", func(t *testing.T) { getXSRFToken() },
)
}

func TestDownload(t *testing.T) {
tests := []struct {
name string
Expand All @@ -31,9 +37,9 @@ func TestDownload(t *testing.T) {
{
name: "weibo.com/tv test",
args: test.Args{
URL: "https://weibo.com/tv/v/jGz6llNZ1?fid=1034:4298353237002268",
Title: "做了这么一个屌炸天的视频我也不知道起什么标题好 @DRock-Art @毒液-致命守护者 @漫威影业 #绘画# #blender# #漫威#",
Quality: "720",
URL: "https://weibo.com/tv/show/1034:4298353237002268?from=old_pc_videoshow",
Title: "毒液插图Blender+Photoshop2.5小时工作流",
Quality: "720p",
Size: 7520929,
},
},
Expand Down

0 comments on commit 3798075

Please sign in to comment.