Skip to content

Commit

Permalink
feat: support video download for xiaohognshu.com
Browse files Browse the repository at this point in the history
  • Loading branch information
tkgfan committed Sep 13, 2023
1 parent de9bd35 commit 08e790d
Show file tree
Hide file tree
Showing 3 changed files with 125 additions and 0 deletions.
1 change: 1 addition & 0 deletions app/register.go
Expand Up @@ -35,6 +35,7 @@ import (
_ "github.com/iawia002/lux/extractors/vimeo"
_ "github.com/iawia002/lux/extractors/vk"
_ "github.com/iawia002/lux/extractors/weibo"
_ "github.com/iawia002/lux/extractors/xiaohongshu"
_ "github.com/iawia002/lux/extractors/ximalaya"
_ "github.com/iawia002/lux/extractors/xinpianchang"
_ "github.com/iawia002/lux/extractors/xvideos"
Expand Down
92 changes: 92 additions & 0 deletions extractors/xiaohongshu/xiaohongshu.go
@@ -0,0 +1,92 @@
package xiaohongshu

import (
"encoding/json"
"strconv"
"strings"

"github.com/pkg/errors"

"github.com/iawia002/lux/config"
"github.com/iawia002/lux/extractors"
"github.com/iawia002/lux/request"
"github.com/iawia002/lux/utils"
)

func init() {
extractors.Register("xiaohongshu", New())
}

type extractor struct{}

// New returns a xiaohognshu extractor.
func New() extractors.Extractor {
return &extractor{}
}

const mp4VideoType = "mp4"

// Extract is the main function to extract the data.
func (e *extractor) Extract(url string, option extractors.Options) ([]*extractors.Data, error) {
html, err := request.Get(url, url, config.FakeHeaders)
if err != nil {
return nil, errors.WithStack(err)
}

// title
titles := utils.MatchOneOf(html, `,"title":"(.+?)",`)
if titles == nil || len(titles) != 2 {
return nil, errors.WithStack(extractors.ErrBodyParseFailed)
}
title := titles[1]

// video url
urlsJSON := utils.MatchOneOf(html, `"backupUrls":(\[.+?\])`)
if urlsJSON == nil || len(urlsJSON) != 2 {
return nil, errors.WithStack(extractors.ErrBodyParseFailed)
}
var urls []string
err = json.Unmarshal([]byte(urlsJSON[1]), &urls)
if err != nil {
return nil, errors.WithStack(extractors.ErrBodyParseFailed)
}

// streams
streams := make(map[string]*extractors.Stream)
var size int64
for i, u := range urls {
if !strings.Contains(u, mp4VideoType) {
continue
}
size, err = request.Size(u, u)
if err != nil {
continue
}
streams[strconv.Itoa(i)] = &extractors.Stream{
Parts: []*extractors.Part{
{
URL: u,
Size: size,
Ext: mp4VideoType,
},
},
Size: size,
}
}
if err != nil {
return nil, errors.WithStack(err)
}
if len(streams) == 0 {
return nil, errors.WithStack(extractors.ErrBodyParseFailed)
}

return []*extractors.Data{
{
Site: "小红书 xiaohongshu.com",
Title: title,
Type: extractors.DataTypeVideo,
Streams: streams,
URL: url,
},
}, nil
}
32 changes: 32 additions & 0 deletions extractors/xiaohongshu/xiaohongshu_test.go
@@ -0,0 +1,32 @@
package xiaohongshu

import (
"testing"

"github.com/iawia002/lux/extractors"
"github.com/iawia002/lux/test"
)

func TestDownload(t *testing.T) {
tests := []struct {
name string
args test.Args
}{
{
name: "normal test",
args: test.Args{
URL: "https://www.xiaohongshu.com/explore/64e9f1e50000000003023b3f?m_source=pinpai",
Title: "七星级大厨都不会告诉你的,五花肉的8种做法",
Size: 59410194,
},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
data, err := New().Extract(tt.args.URL, extractors.Options{})
test.CheckError(t, err)
test.Check(t, tt.args, data[0])
})
}
}

0 comments on commit 08e790d

Please sign in to comment.