From 3f3e647c5303e216bb980f14352f48e0750b7026 Mon Sep 17 00:00:00 2001 From: asuma <770878450@qq.com> Date: Fri, 19 Apr 2024 15:50:23 +0800 Subject: [PATCH 1/2] =?UTF-8?q?=E9=87=8D=E5=86=99iXigua(Toutiao)=E8=A7=86?= =?UTF-8?q?=E9=A2=91=E8=8E=B7=E5=8F=96=E6=96=B9=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 8 +++ extractors/ixigua/ixigua.go | 64 +++++++++------------- extractors/ixigua/types.go | 36 ++++++++++++ extractors/xiaohongshu/xiaohongshu_test.go | 2 +- 4 files changed, 70 insertions(+), 40 deletions(-) create mode 100644 extractors/ixigua/types.go diff --git a/README.md b/README.md index 06f11651a..b8f520f0d 100644 --- a/README.md +++ b/README.md @@ -340,6 +340,11 @@ As a text file: $ lux -c cookies.txt "https://www.bilibili.com/video/av20203945" ``` +Another example +``` +$ lux -c "msToken=yoEh0-qLUq4obZ8Sfxsem_CxCo9R3NM6ViTrWaRcM1...; ttwid=1%7C..." "https://m.toutiao.com/is/iYbTfJ79/" +``` + ### Proxy You can set the HTTP/SOCKS5 proxy using environment variables: @@ -655,6 +660,9 @@ $ lux -j "https://www.bilibili.com/video/av20203945" 最好是每次下载都附带登录过的 Cookie 以避免部分 `ccode` 的问题 +### 西瓜&头条 +西瓜&头条视频必须带Cookie才能下载成功,西瓜和头条可共用西瓜视频的Cookie,Cookie的有效期可能较短,下载失败就更新Cookie尝试(2024-04-19) + ## Contributing Lux is an open source project and built on the top of open-source projects. Check out the [Contributing Guide](./CONTRIBUTING.md) to get started. diff --git a/extractors/ixigua/ixigua.go b/extractors/ixigua/ixigua.go index f3b472a3e..793d9c628 100644 --- a/extractors/ixigua/ixigua.go +++ b/extractors/ixigua/ixigua.go @@ -4,11 +4,12 @@ import ( "encoding/base64" "encoding/json" "fmt" + browser "github.com/EDDYCJY/fake-useragent" + "github.com/iawia002/lux/utils" "net/http" "regexp" "strings" - "github.com/itchyny/gojq" "github.com/pkg/errors" "github.com/iawia002/lux/extractors" @@ -40,8 +41,8 @@ func New() extractors.Extractor { // Extract is the main function to extract the data. func (e *extractor) Extract(url string, option extractors.Options) ([]*extractors.Data, error) { headers := map[string]string{ - "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:98.0) Gecko/20100101 Firefox/98.0", - "Content-Type": "application/json", + "User-Agent": browser.Chrome(), + "Cookie": option.Cookie, } // ixigua 有三种格式的 URL @@ -66,60 +67,45 @@ func (e *extractor) Extract(url string, option extractors.Options) ([]*extractor finalURL = resp.Request.URL.String() } - finalURL = strings.ReplaceAll(finalURL, "https://www.toutiao.com/a", "https://www.ixigua.com/") + finalURL = strings.ReplaceAll(finalURL, "https://www.toutiao.com/video/", "https://www.ixigua.com/") r := regexp.MustCompile(`(ixigua.com/)(\w+)?`) id := r.FindSubmatch([]byte(finalURL))[2] - url2 := fmt.Sprintf("https://www.ixigua.com/api/public/videov2/brief/details?group_id=%s", string(id)) + //url2 := fmt.Sprintf("https://www.ixigua.com/api/public/videov2/brief/details?group_id=%s", string(id)) + url2 := fmt.Sprintf("https://www.ixigua.com/%s", string(id)) body, err := request.Get(url2, url, headers) if err != nil { return nil, errors.WithStack(err) } - var m interface{} - err = json.Unmarshal([]byte(body), &m) - if err != nil { - return nil, errors.WithStack(err) + videoListJson := utils.MatchOneOf(body, `window._SSR_HYDRATED_DATA=(\{.*?\})\<\/script\>`) + if videoListJson == nil || len(videoListJson) != 2 { + return nil, errors.WithStack(extractors.ErrBodyParseFailed) } - query, err := gojq.Parse("{title: .data.title} + {qualities: [.data.videoResource.normal.video_list | .[] | {url: .main_url, size: .size, ext: .vtype, quality: .definition}]}") + videoUrl := videoListJson[1] + videoUrl = strings.Replace(videoUrl, ":undefined", ":\"undefined\"", -1) + + // 解析JSON字符串 + var xiguanData xiguanData + err = json.Unmarshal([]byte(videoUrl), &xiguanData) if err != nil { return nil, errors.WithStack(err) } - video := Video{} - - iter := query.Run(m) - for { - v, ok := iter.Next() - if !ok { - break - } - if err, ok := v.(error); ok { - return nil, errors.WithStack(err) - } - - jsonbody, err := json.Marshal(v) - if err != nil { - return nil, errors.WithStack(err) - } - - if err := json.Unmarshal(jsonbody, &video); err != nil { - return nil, errors.WithStack(err) - } - } + title := xiguanData.AnyVideo.GidInformation.PackerData.Video.Title + videoList := xiguanData.AnyVideo.GidInformation.PackerData.Video.VideoResource.Normal.VideoList streams := make(map[string]*extractors.Stream) - for _, quality := range video.Qualities { - streams[quality.Quality] = &extractors.Stream{ - Size: quality.Size, - Quality: quality.Quality, + for _, v := range videoList { + streams[v.Definition] = &extractors.Stream{ + Quality: v.Definition, Parts: []*extractors.Part{ { - URL: base64Decode(quality.URL), - Size: quality.Size, - Ext: quality.Ext, + URL: base64Decode(v.MainUrl), + Size: v.Size, + Ext: v.Vtype, }, }, } @@ -128,7 +114,7 @@ func (e *extractor) Extract(url string, option extractors.Options) ([]*extractor return []*extractors.Data{ { Site: "西瓜视频 ixigua.com", - Title: video.Title, + Title: title, Type: extractors.DataTypeVideo, Streams: streams, URL: url, diff --git a/extractors/ixigua/types.go b/extractors/ixigua/types.go new file mode 100644 index 000000000..bdc718342 --- /dev/null +++ b/extractors/ixigua/types.go @@ -0,0 +1,36 @@ +package ixigua + +type xiguanData struct { + AnyVideo struct { + GidInformation struct { + Gid string `json:"gid"` + PackerData struct { + Video struct { + Title string `json:"title"` + PosterUrl string `json:"poster_url"` + VideoResource struct { + Vid string `json:"vid"` + Normal struct { + VideoId string `json:"video_id"` + VideoList map[string]struct { + Definition string `json:"definition"` + Quality string `json:"quality"` + Vtype string `json:"vtype"` + Vwidth int `json:"vwidth"` + Vheight int `json:"vheight"` + Bitrate int64 `json:"bitrate"` + RealBitrate int64 `json:"real_bitrate"` + Fps int `json:"fps"` + CodecType string `json:"codec_type"` + Size int64 `json:"size"` + MainUrl string `json:"main_url"` + BackupUrl1 string `json:"backup_url_1"` + } `json:"video_list"` + } `json:"normal"` + } `json:"videoResource"` + } `json:"video"` + Key string `json:"key"` + } `json:"packerData"` + } `json:"gidInformation"` + } `json:"anyVideo"` +} diff --git a/extractors/xiaohongshu/xiaohongshu_test.go b/extractors/xiaohongshu/xiaohongshu_test.go index ea709673d..d7f8f3895 100644 --- a/extractors/xiaohongshu/xiaohongshu_test.go +++ b/extractors/xiaohongshu/xiaohongshu_test.go @@ -15,7 +15,7 @@ func TestDownload(t *testing.T) { { name: "normal test", args: test.Args{ - URL: "https://www.xiaohongshu.com/explore/64e9f1e50000000003023b3f?m_source=pinpai", + URL: "https://www.xiaohongshu.com/explore/64e9f1e50000000003023b3f", Title: "七星级大厨都不会告诉你的,五花肉的8种做法", Size: 59410194, }, From 4c772e5aa5adcf98bcadd175d78d80b94844bcfc Mon Sep 17 00:00:00 2001 From: Xinzhao Xu Date: Fri, 19 Apr 2024 16:13:17 +0800 Subject: [PATCH 2/2] code format --- README.md | 13 ++++++------- extractors/ixigua/ixigua.go | 15 ++++++--------- 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index b8f520f0d..8507d8b40 100644 --- a/README.md +++ b/README.md @@ -340,11 +340,6 @@ As a text file: $ lux -c cookies.txt "https://www.bilibili.com/video/av20203945" ``` -Another example -``` -$ lux -c "msToken=yoEh0-qLUq4obZ8Sfxsem_CxCo9R3NM6ViTrWaRcM1...; ttwid=1%7C..." "https://m.toutiao.com/is/iYbTfJ79/" -``` - ### Proxy You can set the HTTP/SOCKS5 proxy using environment variables: @@ -660,8 +655,12 @@ $ lux -j "https://www.bilibili.com/video/av20203945" 最好是每次下载都附带登录过的 Cookie 以避免部分 `ccode` 的问题 -### 西瓜&头条 -西瓜&头条视频必须带Cookie才能下载成功,西瓜和头条可共用西瓜视频的Cookie,Cookie的有效期可能较短,下载失败就更新Cookie尝试(2024-04-19) +### 西瓜/头条视频 +西瓜/头条视频必须带 Cookie 才能下载成功,西瓜和头条可共用西瓜视频的 Cookie,Cookie 的有效期可能较短,下载失败就更新 Cookie 尝试: + +``` +$ lux -c "msToken=yoEh0-qLUq4obZ8Sfxsem_CxCo9R3NM6ViTrWaRcM1...; ttwid=1%7C..." "https://m.toutiao.com/is/iYbTfJ79/" +``` ## Contributing diff --git a/extractors/ixigua/ixigua.go b/extractors/ixigua/ixigua.go index 793d9c628..616255fde 100644 --- a/extractors/ixigua/ixigua.go +++ b/extractors/ixigua/ixigua.go @@ -4,16 +4,16 @@ import ( "encoding/base64" "encoding/json" "fmt" - browser "github.com/EDDYCJY/fake-useragent" - "github.com/iawia002/lux/utils" "net/http" "regexp" "strings" + browser "github.com/EDDYCJY/fake-useragent" "github.com/pkg/errors" "github.com/iawia002/lux/extractors" "github.com/iawia002/lux/request" + "github.com/iawia002/lux/utils" ) func init() { @@ -71,7 +71,6 @@ func (e *extractor) Extract(url string, option extractors.Options) ([]*extractor r := regexp.MustCompile(`(ixigua.com/)(\w+)?`) id := r.FindSubmatch([]byte(finalURL))[2] - //url2 := fmt.Sprintf("https://www.ixigua.com/api/public/videov2/brief/details?group_id=%s", string(id)) url2 := fmt.Sprintf("https://www.ixigua.com/%s", string(id)) body, err := request.Get(url2, url, headers) @@ -87,15 +86,13 @@ func (e *extractor) Extract(url string, option extractors.Options) ([]*extractor videoUrl := videoListJson[1] videoUrl = strings.Replace(videoUrl, ":undefined", ":\"undefined\"", -1) - // 解析JSON字符串 - var xiguanData xiguanData - err = json.Unmarshal([]byte(videoUrl), &xiguanData) - if err != nil { + var data xiguanData + if err = json.Unmarshal([]byte(videoUrl), &data); err != nil { return nil, errors.WithStack(err) } - title := xiguanData.AnyVideo.GidInformation.PackerData.Video.Title - videoList := xiguanData.AnyVideo.GidInformation.PackerData.Video.VideoResource.Normal.VideoList + title := data.AnyVideo.GidInformation.PackerData.Video.Title + videoList := data.AnyVideo.GidInformation.PackerData.Video.VideoResource.Normal.VideoList streams := make(map[string]*extractors.Stream) for _, v := range videoList {