From 138805fa1b26f7786ee13e089ce0a69ee078f466 Mon Sep 17 00:00:00 2001 From: shavit Date: Thu, 15 Jun 2023 11:51:00 -0600 Subject: [PATCH 1/4] Extract Instagram payload Read the payload of stories and images --- extractors/errors.go | 1 + extractors/instagram/instagram.go | 185 +++++++++++++++--------------- 2 files changed, 96 insertions(+), 90 deletions(-) diff --git a/extractors/errors.go b/extractors/errors.go index 9b57c107c..5c317d131 100644 --- a/extractors/errors.go +++ b/extractors/errors.go @@ -9,4 +9,5 @@ var ( ErrURLParseFailed = errors.New("url parse failed") ErrInvalidRegularExpression = errors.New("invalid regular expression") ErrURLQueryParamsParseFailed = errors.New("url query params parse failed") + ErrBodyParseFailed = errors.New("body parse failed") ) diff --git a/extractors/instagram/instagram.go b/extractors/instagram/instagram.go index 7c7f2902a..6f5fff977 100644 --- a/extractors/instagram/instagram.go +++ b/extractors/instagram/instagram.go @@ -3,15 +3,13 @@ package instagram import ( "encoding/json" netURL "net/url" - "path" "strings" "github.com/pkg/errors" + "golang.org/x/net/html" "github.com/iawia002/lux/extractors" - "github.com/iawia002/lux/parser" "github.com/iawia002/lux/request" - "github.com/iawia002/lux/utils" ) func init() { @@ -32,6 +30,32 @@ type instagram struct { } `json:"shortcode_media"` } +type instagramPayload struct { + ArticleBody string `json:"articleBody"` + Author struct { + Image string `json:"image"` + Name string `json:"name"` + AlternativeName string `json:"alternativeName"` + Url string `json:"url"` + }`json:"author"` + Videos []struct { + UploadData string `json:"string"` + Description string `json:"description"` + Name string `json:"name"` + Caption string `json:"caption"` + Height string `json:"height"` + Width string `json:"width"` + ContentURL string `json:"contentUrl"` + ThumbnailURL string `json:"thumbnailUrl"` + } `json:"video"` + Images []struct { + Caption string `json:"caption"` + Height string `json:"height"` + Width string `json:"width"` + Url string `json:"url"` + } `json:"image"` +} + type extractor struct{} // New returns a instagram extractor. @@ -39,104 +63,67 @@ func New() extractors.Extractor { return &extractor{} } -func extractImageFromPage(html, url string) (map[string]*extractors.Stream, error) { - _, realURLs, err := parser.GetImages(html, "EmbeddedMediaImage", nil) - if err != nil { - return nil, errors.WithStack(err) - } - - urls := make([]*extractors.Part, 0, len(realURLs)) - var totalSize int64 - for _, realURL := range realURLs { - size, err := request.Size(realURL, url) - if err != nil { - return nil, errors.WithStack(err) - } - urlData := &extractors.Part{ - URL: realURL, - Size: size, - Ext: "jpg", - } - urls = append(urls, urlData) - totalSize += size - } - - return map[string]*extractors.Stream{ - "default": { - Parts: urls, - Size: totalSize, - }, - }, nil -} - -func extractFromData(dataString, url string) (map[string]*extractors.Stream, error) { - var data instagram - if err := json.Unmarshal([]byte(dataString), &data); err != nil { - return nil, errors.WithStack(err) - } - - urls := make([]*extractors.Part, 0, len(data.ShortcodeMedia.EdgeSidecar.Edges)) - var totalSize int64 - for _, u := range data.ShortcodeMedia.EdgeSidecar.Edges { - // Image - realURL := u.Node.DisplayURL - ext := "jpg" - if u.Node.IsVideo { - // Video - realURL = u.Node.VideoURL - ext = "mp4" - } - - size, err := request.Size(realURL, url) - if err != nil { - return nil, errors.WithStack(err) - } - urlData := &extractors.Part{ - URL: realURL, - Size: size, - Ext: ext, - } - urls = append(urls, urlData) - totalSize += size - } - - return map[string]*extractors.Stream{ - "default": { - Parts: urls, - Size: totalSize, - }, - }, nil -} - // Extract is the main function to extract the data. func (e *extractor) Extract(url string, option extractors.Options) ([]*extractors.Data, error) { - // Instagram is forcing a login to access the page, so we use the embed page to bypass that. u, err := netURL.Parse(url) if err != nil { return nil, errors.WithStack(err) } - id := u.Path[strings.LastIndex(u.Path, "/")+1:] - u.Path = path.Join(u.Path, "embed") - html, err := request.Get(u.String(), url, nil) + htmlResp, err := request.Get(u.String(), url, nil) if err != nil { return nil, errors.WithStack(err) } - dataStrings := utils.MatchOneOf(html, `window\.__additionalDataLoaded\('graphql',(.*)\);`) - if dataStrings == nil || len(dataStrings) < 2 { - return nil, errors.WithStack(extractors.ErrURLParseFailed) - } - dataString := dataStrings[1] - var streams map[string]*extractors.Stream - if dataString == "" || dataString == "null" { - streams, err = extractImageFromPage(html, url) - } else { - streams, err = extractFromData(dataString, url) - } - if err != nil { - return nil, errors.WithStack(err) - } + reader := strings.NewReader(htmlResp) + htmlRoot, err := html.Parse(reader) + if err != nil { + return nil, errors.WithStack(err) + } + + sNode, err := dfsFindScript(htmlRoot) + if err != nil { + return nil, errors.WithStack(err) + } + + var payload instagramPayload + if err = json.Unmarshal([]byte(sNode.Data), &payload); err != nil { + return nil, errors.WithStack(err) + } + + var parts []*extractors.Part + if len(payload.Videos) > 0 { + for _, it := range payload.Videos { + ext := "" + part := &extractors.Part{ + URL: it.ContentURL, + Size: 0, + Ext: ext, + } + parts = append(parts, part) + } + } else if len(payload.Images) > 0 { + for _, it := range payload.Videos { + ext := "" + part := &extractors.Part{ + URL: it.ContentURL, + Size: 0, + Ext: ext, + } + parts = append(parts, part) + } + } else { + return nil, errors.WithStack(extractors.ErrBodyParseFailed) + } + + streams := map[string]*extractors.Stream{ + "default": { + Parts: parts, + Size: 0, + }, + }; + + id := u.Path[strings.LastIndex(u.Path, "/") + 1:] return []*extractors.Data{ { @@ -148,3 +135,21 @@ func (e *extractor) Extract(url string, option extractors.Options) ([]*extractor }, }, nil } + +func dfsFindScript(n *html.Node) (*html.Node, error) { + if n.Type == html.ElementNode && n.Data == "script" { + for _, attr := range n.Attr { + if attr.Key == "type" && attr.Val == "application/ld+json" { + return n.FirstChild, nil + } + } + } + + for c := n.FirstChild; c != nil; c = c.NextSibling { + if ret, err := dfsFindScript(c); err == nil { + return ret, nil + } + } + + return nil, errors.WithStack(extractors.ErrBodyParseFailed) +} From b24e50a82dc5b777df4e8bcdf9ef34c74e6a7f73 Mon Sep 17 00:00:00 2001 From: shavit Date: Thu, 15 Jun 2023 12:23:00 -0600 Subject: [PATCH 2/4] Extract images and videos --- extractors/errors.go | 2 +- extractors/instagram/instagram.go | 231 +++++++++++++++++------------- go.mod | 11 +- go.sum | 26 ++++ 4 files changed, 167 insertions(+), 103 deletions(-) diff --git a/extractors/errors.go b/extractors/errors.go index 5c317d131..8759033bb 100644 --- a/extractors/errors.go +++ b/extractors/errors.go @@ -9,5 +9,5 @@ var ( ErrURLParseFailed = errors.New("url parse failed") ErrInvalidRegularExpression = errors.New("invalid regular expression") ErrURLQueryParamsParseFailed = errors.New("url query params parse failed") - ErrBodyParseFailed = errors.New("body parse failed") + ErrBodyParseFailed = errors.New("body parse failed") ) diff --git a/extractors/instagram/instagram.go b/extractors/instagram/instagram.go index 6f5fff977..3477726de 100644 --- a/extractors/instagram/instagram.go +++ b/extractors/instagram/instagram.go @@ -10,50 +10,37 @@ import ( "github.com/iawia002/lux/extractors" "github.com/iawia002/lux/request" + "github.com/iawia002/lux/utils" ) func init() { extractors.Register("instagram", New()) } -type instagram struct { - ShortcodeMedia struct { - EdgeSidecar struct { - Edges []struct { - Node struct { - DisplayURL string `json:"display_url"` - IsVideo bool `json:"is_video"` - VideoURL string `json:"video_url"` - } `json:"node"` - } `json:"edges"` - } `json:"edge_sidecar_to_children"` - } `json:"shortcode_media"` -} - type instagramPayload struct { - ArticleBody string `json:"articleBody"` - Author struct { - Image string `json:"image"` - Name string `json:"name"` - AlternativeName string `json:"alternativeName"` - Url string `json:"url"` - }`json:"author"` - Videos []struct { - UploadData string `json:"string"` - Description string `json:"description"` - Name string `json:"name"` - Caption string `json:"caption"` - Height string `json:"height"` - Width string `json:"width"` - ContentURL string `json:"contentUrl"` - ThumbnailURL string `json:"thumbnailUrl"` - } `json:"video"` - Images []struct { - Caption string `json:"caption"` - Height string `json:"height"` - Width string `json:"width"` - Url string `json:"url"` - } `json:"image"` + ArticleBody string `json:"articleBody"` + Author struct { + Image string `json:"image"` + Name string `json:"name"` + AlternativeName string `json:"alternativeName"` + Url string `json:"url"` + } `json:"author"` + Videos []struct { + UploadData string `json:"string"` + Description string `json:"description"` + Name string `json:"name"` + Caption string `json:"caption"` + Height string `json:"height"` + Width string `json:"width"` + ContentURL string `json:"contentUrl"` + ThumbnailURL string `json:"thumbnailUrl"` + } `json:"video"` + Images []struct { + Caption string `json:"caption"` + Height string `json:"height"` + Width string `json:"width"` + URL string `json:"url"` + } `json:"image"` } type extractor struct{} @@ -75,55 +62,53 @@ func (e *extractor) Extract(url string, option extractors.Options) ([]*extractor return nil, errors.WithStack(err) } - reader := strings.NewReader(htmlResp) - htmlRoot, err := html.Parse(reader) - if err != nil { - return nil, errors.WithStack(err) - } - - sNode, err := dfsFindScript(htmlRoot) - if err != nil { - return nil, errors.WithStack(err) - } - - var payload instagramPayload - if err = json.Unmarshal([]byte(sNode.Data), &payload); err != nil { - return nil, errors.WithStack(err) - } - - var parts []*extractors.Part - if len(payload.Videos) > 0 { - for _, it := range payload.Videos { - ext := "" - part := &extractors.Part{ - URL: it.ContentURL, - Size: 0, - Ext: ext, - } - parts = append(parts, part) - } - } else if len(payload.Images) > 0 { - for _, it := range payload.Videos { - ext := "" - part := &extractors.Part{ - URL: it.ContentURL, - Size: 0, - Ext: ext, - } - parts = append(parts, part) - } - } else { - return nil, errors.WithStack(extractors.ErrBodyParseFailed) - } - - streams := map[string]*extractors.Stream{ - "default": { - Parts: parts, - Size: 0, - }, - }; - - id := u.Path[strings.LastIndex(u.Path, "/") + 1:] + reader := strings.NewReader(htmlResp) + htmlRoot, err := html.Parse(reader) + if err != nil { + return nil, errors.WithStack(err) + } + + sNode, err := dfsFindScript(htmlRoot) + if err != nil { + return nil, errors.WithStack(err) + } + + var payload instagramPayload + if err = json.Unmarshal([]byte(sNode.Data), &payload); err != nil { + return nil, errors.WithStack(err) + } + + var totalSize int64 + var parts []*extractors.Part + if len(payload.Videos) > 0 { + videoParts, err := createPartVideos(&payload, url) + if err != nil { + return nil, errors.WithStack(extractors.ErrBodyParseFailed) + } + + parts = append(parts, videoParts...) + } + if len(payload.Images) > 0 { + imageParts, err := createPartImages(&payload, url) + if err != nil { + return nil, errors.WithStack(extractors.ErrBodyParseFailed) + } + + parts = append(parts, imageParts...) + } + + for _, part := range parts { + totalSize += part.Size + } + + streams := map[string]*extractors.Stream{ + "default": { + Parts: parts, + Size: totalSize, + }, + } + + id := u.Path[strings.LastIndex(u.Path, "/")+1:] return []*extractors.Data{ { @@ -137,19 +122,63 @@ func (e *extractor) Extract(url string, option extractors.Options) ([]*extractor } func dfsFindScript(n *html.Node) (*html.Node, error) { - if n.Type == html.ElementNode && n.Data == "script" { - for _, attr := range n.Attr { - if attr.Key == "type" && attr.Val == "application/ld+json" { - return n.FirstChild, nil - } - } - } - - for c := n.FirstChild; c != nil; c = c.NextSibling { - if ret, err := dfsFindScript(c); err == nil { - return ret, nil - } - } - - return nil, errors.WithStack(extractors.ErrBodyParseFailed) + if n.Type == html.ElementNode && n.Data == "script" { + for _, attr := range n.Attr { + if attr.Key == "type" && attr.Val == "application/ld+json" { + return n.FirstChild, nil + } + } + } + + for c := n.FirstChild; c != nil; c = c.NextSibling { + if ret, err := dfsFindScript(c); err == nil { + return ret, nil + } + } + + return nil, errors.WithStack(extractors.ErrBodyParseFailed) +} + +func createPartVideos(payload *instagramPayload, ref string) (parts []*extractors.Part, err error) { + for _, it := range payload.Videos { + _, ext, err := utils.GetNameAndExt(it.ContentURL) + if err != nil { + return parts, errors.WithStack(err) + } + filesize, err := request.Size(it.ContentURL, ref) + if err != nil { + return parts, errors.WithStack(err) + } + + part := &extractors.Part{ + URL: it.ContentURL, + Size: filesize, + Ext: ext, + } + parts = append(parts, part) + } + + return parts, err +} + +func createPartImages(payload *instagramPayload, ref string) (parts []*extractors.Part, err error) { + for _, it := range payload.Images { + _, ext, err := utils.GetNameAndExt(it.URL) + if err != nil { + return parts, errors.WithStack(err) + } + filesize, err := request.Size(it.URL, ref) + if err != nil { + return parts, errors.WithStack(err) + } + + part := &extractors.Part{ + URL: it.URL, + Size: filesize, + Ext: ext, + } + parts = append(parts, part) + } + + return parts, err } diff --git a/go.mod b/go.mod index 971aa3d61..e8aa2b2b7 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module github.com/iawia002/lux go 1.20 require ( + github.com/Azure/azure-sdk-for-go v68.0.0+incompatible github.com/MercuryEngineering/CookieMonster v0.0.0-20180304172713-1584578b3403 github.com/PuerkitoBio/goquery v1.8.0 github.com/cheggaaa/pb/v3 v3.0.8 @@ -16,15 +17,23 @@ require ( github.com/pkg/errors v0.9.1 github.com/robertkrimen/otto v0.0.0-20211024170158-b87d35c0b86f github.com/urfave/cli/v2 v2.6.0 + golang.org/x/net v0.7.0 ) require ( + github.com/Azure/go-autorest v14.2.0+incompatible // indirect + github.com/Azure/go-autorest/autorest v0.11.29 // indirect + github.com/Azure/go-autorest/autorest/adal v0.9.22 // indirect + github.com/Azure/go-autorest/autorest/date v0.3.0 // indirect + github.com/Azure/go-autorest/logger v0.2.1 // indirect + github.com/Azure/go-autorest/tracing v0.6.0 // indirect github.com/VividCortex/ewma v1.2.0 // indirect github.com/andybalholm/cascadia v1.3.1 // indirect github.com/bitly/go-simplejson v0.5.0 // indirect github.com/cpuguy83/go-md2man/v2 v2.0.1 // indirect github.com/dlclark/regexp2 v1.7.0 // indirect github.com/go-sourcemap/sourcemap v2.1.3+incompatible // indirect + github.com/golang-jwt/jwt/v4 v4.5.0 // indirect github.com/google/pprof v0.0.0-20230207041349-798e818bf904 // indirect github.com/itchyny/timefmt-go v0.1.3 // indirect github.com/kr/text v0.2.0 // indirect @@ -36,8 +45,8 @@ require ( github.com/rivo/uniseg v0.4.3 // indirect github.com/rogpeppe/go-internal v1.9.0 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect + golang.org/x/crypto v0.6.0 // indirect golang.org/x/exp v0.0.0-20220518171630-0b5c67f07fdf // indirect - golang.org/x/net v0.7.0 // indirect golang.org/x/sys v0.5.0 // indirect golang.org/x/text v0.7.0 // indirect gopkg.in/sourcemap.v1 v1.0.5 // indirect diff --git a/go.sum b/go.sum index ebc3ab0a0..d19773a23 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,20 @@ +github.com/Azure/azure-sdk-for-go v68.0.0+incompatible h1:fcYLmCpyNYRnvJbPerq7U0hS+6+I79yEDJBqVNcqUzU= +github.com/Azure/azure-sdk-for-go v68.0.0+incompatible/go.mod h1:9XXNKU+eRnpl9moKnB4QOLf1HestfXbmab5FXxiDBjc= +github.com/Azure/go-autorest v14.2.0+incompatible h1:V5VMDjClD3GiElqLWO7mz2MxNAK/vTfRHdAubSIPRgs= +github.com/Azure/go-autorest v14.2.0+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSWATqVooLgysK6ZNox3g/xq24= +github.com/Azure/go-autorest/autorest v0.11.29 h1:I4+HL/JDvErx2LjyzaVxllw2lRDB5/BT2Bm4g20iqYw= +github.com/Azure/go-autorest/autorest v0.11.29/go.mod h1:ZtEzC4Jy2JDrZLxvWs8LrBWEBycl1hbT1eknI8MtfAs= +github.com/Azure/go-autorest/autorest/adal v0.9.22 h1:/GblQdIudfEM3AWWZ0mrYJQSd7JS4S/Mbzh6F0ov0Xc= +github.com/Azure/go-autorest/autorest/adal v0.9.22/go.mod h1:XuAbAEUv2Tta//+voMI038TrJBqjKam0me7qR+L8Cmk= +github.com/Azure/go-autorest/autorest/date v0.3.0 h1:7gUk1U5M/CQbp9WoqinNzJar+8KY+LPI6wiWrP/myHw= +github.com/Azure/go-autorest/autorest/date v0.3.0/go.mod h1:BI0uouVdmngYNUzGWeSYnokU+TrmwEsOqdt8Y6sso74= +github.com/Azure/go-autorest/autorest/mocks v0.4.1/go.mod h1:LTp+uSrOhSkaKrUy935gNZuuIPPVsHlr9DSOxSayd+k= +github.com/Azure/go-autorest/autorest/mocks v0.4.2 h1:PGN4EDXnuQbojHbU0UWoNvmu9AGVwYHG9/fkDYhtAfw= +github.com/Azure/go-autorest/autorest/mocks v0.4.2/go.mod h1:Vy7OitM9Kei0i1Oj+LvyAWMXJHeKH1MVlzFugfVrmyU= +github.com/Azure/go-autorest/logger v0.2.1 h1:IG7i4p/mDa2Ce4TRyAO8IHnVhAVF3RFU+ZtXWSmf4Tg= +github.com/Azure/go-autorest/logger v0.2.1/go.mod h1:T9E3cAhj2VqvPOtCYAvby9aBXkZmbF5NWuPV8+WeEW8= +github.com/Azure/go-autorest/tracing v0.6.0 h1:TYi4+3m5t6K48TGI9AUdb+IzbnSxvnvUMfuitfgcfuo= +github.com/Azure/go-autorest/tracing v0.6.0/go.mod h1:+vhtPC754Xsa23ID7GlGsrdKBpUA79WCAKPPZVC2DeU= github.com/MercuryEngineering/CookieMonster v0.0.0-20180304172713-1584578b3403 h1:EtZwYyLbkEcIt+B//6sujwRCnHuTEK3qiSypAX5aJeM= github.com/MercuryEngineering/CookieMonster v0.0.0-20180304172713-1584578b3403/go.mod h1:mM6WvakkX2m+NgMiPCfFFjwfH4KzENC07zeGEqq9U7s= github.com/PuerkitoBio/goquery v1.8.0 h1:PJTF7AmFCFKk1N6V6jmKfrNH9tV5pNE6lZMkG0gta/U= @@ -36,6 +53,9 @@ github.com/fatih/color v1.13.0 h1:8LOYc1KYPPmyKMuN8QV2DNRWNbLo6LZ0iLs8+mlH53w= github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk= github.com/go-sourcemap/sourcemap v2.1.3+incompatible h1:W1iEw64niKVGogNgBN3ePyLFfuisuzeidWPMPWmECqU= github.com/go-sourcemap/sourcemap v2.1.3+incompatible/go.mod h1:F8jJfvm2KbVjc5NqelyYJmf/v5J0dwNLS2mL4sNA1Jg= +github.com/golang-jwt/jwt/v4 v4.0.0/go.mod h1:/xlHOz8bRuivTWchD4jCa+NbatV+wEUSzwAxVc6locg= +github.com/golang-jwt/jwt/v4 v4.5.0 h1:7cYmW1XlMY7h7ii7UhUyChSgS5wUJEnm9uZVTGqOWzg= +github.com/golang-jwt/jwt/v4 v4.5.0/go.mod h1:m21LjoU+eqJr34lmDMbreY2eSTRJ1cv77w39/MY0Ch0= github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/pprof v0.0.0-20230207041349-798e818bf904 h1:4/hN5RUoecvl+RmJRE2YxKWtnnQls6rQjjW5oV7qg2U= @@ -99,13 +119,18 @@ github.com/urfave/cli/v2 v2.6.0/go.mod h1:oDzoM7pVwz6wHn5ogWgFUU1s4VJayeQS+aEZDq github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/crypto v0.0.0-20220722155217-630584e8d5aa/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= +golang.org/x/crypto v0.6.0 h1:qfktjS5LUO+fFKeJXZ+ikTRijMmljikvG68fpMMruSc= +golang.org/x/crypto v0.6.0/go.mod h1:OFC/31mSvZgRz0V1QTNCzfAI1aIRzbiufJtkMIlEp58= golang.org/x/exp v0.0.0-20220518171630-0b5c67f07fdf h1:oXVg4h2qJDd9htKxb5SCpFBHLipW6hXmL3qpUixS2jw= golang.org/x/exp v0.0.0-20220518171630-0b5c67f07fdf/go.mod h1:yh0Ynu2b5ZUe3MQfp2nM0ecK7wsgouWTDN0FNeJuIys= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.7.0 h1:rJrUqqhjsgNp7KqAIc25s9pZnjU7TUcSY7HcVZjdn1g= golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -127,6 +152,7 @@ golang.org/x/sys v0.5.0 h1:MUK/U/4lj1t1oPg0HfuXDN/Z1wv31ZJ/YcPiGccS4DU= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= From 745fc12d8d18565bf90d8b37359f43f25bfc2dc2 Mon Sep 17 00:00:00 2001 From: shavit Date: Thu, 29 Jun 2023 21:50:00 -0600 Subject: [PATCH 3/4] Update the file sizes in Instagram tests --- extractors/instagram/instagram_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/extractors/instagram/instagram_test.go b/extractors/instagram/instagram_test.go index 0bd4b9bde..7105ed068 100644 --- a/extractors/instagram/instagram_test.go +++ b/extractors/instagram/instagram_test.go @@ -17,7 +17,7 @@ func TestDownload(t *testing.T) { args: test.Args{ URL: "https://www.instagram.com/p/BlIka1ZFCNr", Title: "Instagram BlIka1ZFCNr", - Size: 3003662, + Size: 577298, }, }, { @@ -25,7 +25,7 @@ func TestDownload(t *testing.T) { args: test.Args{ URL: "https://www.instagram.com/p/Bl5oVUyl9Yx", Title: "Instagram Bl5oVUyl9Yx", - Size: 250596, + Size: 101611, }, }, { @@ -33,7 +33,7 @@ func TestDownload(t *testing.T) { args: test.Args{ URL: "https://www.instagram.com/p/Bjyr-gxF4Rb", Title: "Instagram Bjyr-gxF4Rb", - Size: 4599909, + Size: 241466, }, }, } From 28521bb797b7655b889285cf9f2298a878ddcebc Mon Sep 17 00:00:00 2001 From: Xinzhao Xu Date: Thu, 6 Jul 2023 10:16:03 +0800 Subject: [PATCH 4/4] go mod tidy --- go.mod | 9 --------- go.sum | 26 -------------------------- 2 files changed, 35 deletions(-) diff --git a/go.mod b/go.mod index e8aa2b2b7..f28d0b6c6 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,6 @@ module github.com/iawia002/lux go 1.20 require ( - github.com/Azure/azure-sdk-for-go v68.0.0+incompatible github.com/MercuryEngineering/CookieMonster v0.0.0-20180304172713-1584578b3403 github.com/PuerkitoBio/goquery v1.8.0 github.com/cheggaaa/pb/v3 v3.0.8 @@ -21,19 +20,12 @@ require ( ) require ( - github.com/Azure/go-autorest v14.2.0+incompatible // indirect - github.com/Azure/go-autorest/autorest v0.11.29 // indirect - github.com/Azure/go-autorest/autorest/adal v0.9.22 // indirect - github.com/Azure/go-autorest/autorest/date v0.3.0 // indirect - github.com/Azure/go-autorest/logger v0.2.1 // indirect - github.com/Azure/go-autorest/tracing v0.6.0 // indirect github.com/VividCortex/ewma v1.2.0 // indirect github.com/andybalholm/cascadia v1.3.1 // indirect github.com/bitly/go-simplejson v0.5.0 // indirect github.com/cpuguy83/go-md2man/v2 v2.0.1 // indirect github.com/dlclark/regexp2 v1.7.0 // indirect github.com/go-sourcemap/sourcemap v2.1.3+incompatible // indirect - github.com/golang-jwt/jwt/v4 v4.5.0 // indirect github.com/google/pprof v0.0.0-20230207041349-798e818bf904 // indirect github.com/itchyny/timefmt-go v0.1.3 // indirect github.com/kr/text v0.2.0 // indirect @@ -45,7 +37,6 @@ require ( github.com/rivo/uniseg v0.4.3 // indirect github.com/rogpeppe/go-internal v1.9.0 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect - golang.org/x/crypto v0.6.0 // indirect golang.org/x/exp v0.0.0-20220518171630-0b5c67f07fdf // indirect golang.org/x/sys v0.5.0 // indirect golang.org/x/text v0.7.0 // indirect diff --git a/go.sum b/go.sum index d19773a23..ebc3ab0a0 100644 --- a/go.sum +++ b/go.sum @@ -1,20 +1,3 @@ -github.com/Azure/azure-sdk-for-go v68.0.0+incompatible h1:fcYLmCpyNYRnvJbPerq7U0hS+6+I79yEDJBqVNcqUzU= -github.com/Azure/azure-sdk-for-go v68.0.0+incompatible/go.mod h1:9XXNKU+eRnpl9moKnB4QOLf1HestfXbmab5FXxiDBjc= -github.com/Azure/go-autorest v14.2.0+incompatible h1:V5VMDjClD3GiElqLWO7mz2MxNAK/vTfRHdAubSIPRgs= -github.com/Azure/go-autorest v14.2.0+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSWATqVooLgysK6ZNox3g/xq24= -github.com/Azure/go-autorest/autorest v0.11.29 h1:I4+HL/JDvErx2LjyzaVxllw2lRDB5/BT2Bm4g20iqYw= -github.com/Azure/go-autorest/autorest v0.11.29/go.mod h1:ZtEzC4Jy2JDrZLxvWs8LrBWEBycl1hbT1eknI8MtfAs= -github.com/Azure/go-autorest/autorest/adal v0.9.22 h1:/GblQdIudfEM3AWWZ0mrYJQSd7JS4S/Mbzh6F0ov0Xc= -github.com/Azure/go-autorest/autorest/adal v0.9.22/go.mod h1:XuAbAEUv2Tta//+voMI038TrJBqjKam0me7qR+L8Cmk= -github.com/Azure/go-autorest/autorest/date v0.3.0 h1:7gUk1U5M/CQbp9WoqinNzJar+8KY+LPI6wiWrP/myHw= -github.com/Azure/go-autorest/autorest/date v0.3.0/go.mod h1:BI0uouVdmngYNUzGWeSYnokU+TrmwEsOqdt8Y6sso74= -github.com/Azure/go-autorest/autorest/mocks v0.4.1/go.mod h1:LTp+uSrOhSkaKrUy935gNZuuIPPVsHlr9DSOxSayd+k= -github.com/Azure/go-autorest/autorest/mocks v0.4.2 h1:PGN4EDXnuQbojHbU0UWoNvmu9AGVwYHG9/fkDYhtAfw= -github.com/Azure/go-autorest/autorest/mocks v0.4.2/go.mod h1:Vy7OitM9Kei0i1Oj+LvyAWMXJHeKH1MVlzFugfVrmyU= -github.com/Azure/go-autorest/logger v0.2.1 h1:IG7i4p/mDa2Ce4TRyAO8IHnVhAVF3RFU+ZtXWSmf4Tg= -github.com/Azure/go-autorest/logger v0.2.1/go.mod h1:T9E3cAhj2VqvPOtCYAvby9aBXkZmbF5NWuPV8+WeEW8= -github.com/Azure/go-autorest/tracing v0.6.0 h1:TYi4+3m5t6K48TGI9AUdb+IzbnSxvnvUMfuitfgcfuo= -github.com/Azure/go-autorest/tracing v0.6.0/go.mod h1:+vhtPC754Xsa23ID7GlGsrdKBpUA79WCAKPPZVC2DeU= github.com/MercuryEngineering/CookieMonster v0.0.0-20180304172713-1584578b3403 h1:EtZwYyLbkEcIt+B//6sujwRCnHuTEK3qiSypAX5aJeM= github.com/MercuryEngineering/CookieMonster v0.0.0-20180304172713-1584578b3403/go.mod h1:mM6WvakkX2m+NgMiPCfFFjwfH4KzENC07zeGEqq9U7s= github.com/PuerkitoBio/goquery v1.8.0 h1:PJTF7AmFCFKk1N6V6jmKfrNH9tV5pNE6lZMkG0gta/U= @@ -53,9 +36,6 @@ github.com/fatih/color v1.13.0 h1:8LOYc1KYPPmyKMuN8QV2DNRWNbLo6LZ0iLs8+mlH53w= github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk= github.com/go-sourcemap/sourcemap v2.1.3+incompatible h1:W1iEw64niKVGogNgBN3ePyLFfuisuzeidWPMPWmECqU= github.com/go-sourcemap/sourcemap v2.1.3+incompatible/go.mod h1:F8jJfvm2KbVjc5NqelyYJmf/v5J0dwNLS2mL4sNA1Jg= -github.com/golang-jwt/jwt/v4 v4.0.0/go.mod h1:/xlHOz8bRuivTWchD4jCa+NbatV+wEUSzwAxVc6locg= -github.com/golang-jwt/jwt/v4 v4.5.0 h1:7cYmW1XlMY7h7ii7UhUyChSgS5wUJEnm9uZVTGqOWzg= -github.com/golang-jwt/jwt/v4 v4.5.0/go.mod h1:m21LjoU+eqJr34lmDMbreY2eSTRJ1cv77w39/MY0Ch0= github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/pprof v0.0.0-20230207041349-798e818bf904 h1:4/hN5RUoecvl+RmJRE2YxKWtnnQls6rQjjW5oV7qg2U= @@ -119,18 +99,13 @@ github.com/urfave/cli/v2 v2.6.0/go.mod h1:oDzoM7pVwz6wHn5ogWgFUU1s4VJayeQS+aEZDq github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= -golang.org/x/crypto v0.0.0-20220722155217-630584e8d5aa/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= -golang.org/x/crypto v0.6.0 h1:qfktjS5LUO+fFKeJXZ+ikTRijMmljikvG68fpMMruSc= -golang.org/x/crypto v0.6.0/go.mod h1:OFC/31mSvZgRz0V1QTNCzfAI1aIRzbiufJtkMIlEp58= golang.org/x/exp v0.0.0-20220518171630-0b5c67f07fdf h1:oXVg4h2qJDd9htKxb5SCpFBHLipW6hXmL3qpUixS2jw= golang.org/x/exp v0.0.0-20220518171630-0b5c67f07fdf/go.mod h1:yh0Ynu2b5ZUe3MQfp2nM0ecK7wsgouWTDN0FNeJuIys= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= -golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= -golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.7.0 h1:rJrUqqhjsgNp7KqAIc25s9pZnjU7TUcSY7HcVZjdn1g= golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -152,7 +127,6 @@ golang.org/x/sys v0.5.0 h1:MUK/U/4lj1t1oPg0HfuXDN/Z1wv31ZJ/YcPiGccS4DU= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= -golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=