Skip to content

Commit

Permalink
Merge pull request #5 from iawia002/youtube
Browse files Browse the repository at this point in the history
extractors/youtube: Add support
  • Loading branch information
iawia002 committed Mar 13, 2018
2 parents 2be21ab + 3f57deb commit 5c56998
Show file tree
Hide file tree
Showing 6 changed files with 274 additions and 1 deletion.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ Site | URL | Videos | Images | Playlist
半次元 | <https://bcy.net> | | ✓ | |
pixivision | <https://www.pixivision.net> | | ✓ | |
优酷 | <https://www.youku.com> | ✓ | | |
YouTube | <https://www.youtube.com> | ✓ | | |


## Contributing
Expand Down
80 changes: 80 additions & 0 deletions extractors/youtube.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
package extractors

import (
"encoding/json"
"fmt"
"log"
"net/url"
"strings"

"github.com/iawia002/annie/downloader"
"github.com/iawia002/annie/request"
"github.com/iawia002/annie/utils"
)

type args struct {
Title string `json:"title"`
Stream string `json:"url_encoded_fmt_stream_map"`
}

type assets struct {
JS string `json:"js"`
}

type youtubeData struct {
Args args `json:"args"`
Assets assets `json:"assets"`
}

func getSig(sig, js string) string {
html := request.Get(fmt.Sprintf("https://www.youtube.com%s", js))
return decipherTokens(getSigTokens(html), sig)
}

// Youtube download function
func Youtube(uri string) downloader.VideoData {
patterns := []string{
`watch\?v=(\w+)`,
`youtu\.be/([^?/]+)`,
`embed/([^/?]+)`,
`v/([^/?]+)`,
}
vid := utils.MatchOneOf(patterns, uri)
if vid == nil {
log.Fatal("Can't find vid")
}
videoURL := fmt.Sprintf(
"https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999",
vid[1],
)
html := request.Get(videoURL)
ytplayer := utils.Match1(`;ytplayer\.config\s*=\s*({.+?});`, html)[1]
var youtube youtubeData
json.Unmarshal([]byte(ytplayer), &youtube)
title := youtube.Args.Title
streams := strings.Split(youtube.Args.Stream, ",")
stream, _ := url.ParseQuery(streams[0]) // Best quality
quality := stream.Get("quality")
ext := utils.Match1(`video/(\w+);`, stream.Get("type"))[1]
sig := stream.Get("sig")
if sig == "" {
sig = getSig(stream.Get("s"), youtube.Assets.JS)
}
realURL := fmt.Sprintf("%s&signature=%s", stream.Get("url"), sig)
size := request.Size(realURL, uri)
urlData := downloader.URLData{
URL: realURL,
Size: size,
Ext: ext,
}
data := downloader.VideoData{
Site: "YouTube youtube.com",
Title: title,
Type: "video",
URLs: []downloader.URLData{urlData},
Size: size,
Quality: quality,
}
data.Download(uri)
return data
}
136 changes: 136 additions & 0 deletions extractors/youtube_signature.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
package extractors

import (
"fmt"
"log"
"regexp"
"strconv"
"strings"
)

// The algorithm comes from https://github.com/rylio/ytdl, it's also MIT License
// Many thanks
const (
jsvarStr = `[a-zA-Z_\$][a-zA-Z_0-9]*`
reverseStr = `:function\(a\)\{` +
`(?:return )?a\.reverse\(\)` +
`\}`
sliceStr = `:function\(a,b\)\{` +
`return a\.slice\(b\)` +
`\}`
spliceStr = `:function\(a,b\)\{` +
`a\.splice\(0,b\)` +
`\}`
swapStr = `:function\(a,b\)\{` +
`var c=a\[0\];a\[0\]=a\[b%a\.length\];a\[b(?:%a\.length)?\]=c(?:;return a)?` +
`\}`
)

var actionsObjRegexp = regexp.MustCompile(fmt.Sprintf(
`var (%s)=\{((?:(?:%s%s|%s%s|%s%s|%s%s),?\n?)+)\};`,
jsvarStr, jsvarStr, reverseStr, jsvarStr, sliceStr, jsvarStr, spliceStr, jsvarStr, swapStr,
))

var actionsFuncRegexp = regexp.MustCompile(fmt.Sprintf(
`function(?: %s)?\(a\)\{`+
`a=a\.split\(""\);\s*`+
`((?:(?:a=)?%s\.%s\(a,\d+\);)+)`+
`return a\.join\(""\)`+
`\}`,
jsvarStr, jsvarStr, jsvarStr,
))

var reverseRegexp = regexp.MustCompile(fmt.Sprintf(
`(?m)(?:^|,)(%s)%s`, jsvarStr, reverseStr,
))
var sliceRegexp = regexp.MustCompile(fmt.Sprintf(
`(?m)(?:^|,)(%s)%s`, jsvarStr, sliceStr,
))
var spliceRegexp = regexp.MustCompile(fmt.Sprintf(
`(?m)(?:^|,)(%s)%s`, jsvarStr, spliceStr,
))
var swapRegexp = regexp.MustCompile(fmt.Sprintf(
`(?m)(?:^|,)(%s)%s`, jsvarStr, swapStr,
))

func getSigTokens(html string) []string {
objResult := actionsObjRegexp.FindStringSubmatch(html)
funcResult := actionsFuncRegexp.FindStringSubmatch(html)

if len(objResult) < 3 || len(funcResult) < 2 {
log.Fatal("Error parsing signature tokens")
}
obj := strings.Replace(objResult[1], "$", `\$`, -1)
objBody := strings.Replace(objResult[2], "$", `\$`, -1)
funcBody := strings.Replace(funcResult[1], "$", `\$`, -1)

var reverseKey, sliceKey, spliceKey, swapKey string
var result []string

if result = reverseRegexp.FindStringSubmatch(objBody); len(result) > 1 {
reverseKey = strings.Replace(result[1], "$", `\$`, -1)
}
if result = sliceRegexp.FindStringSubmatch(objBody); len(result) > 1 {
sliceKey = strings.Replace(result[1], "$", `\$`, -1)
}
if result = spliceRegexp.FindStringSubmatch(objBody); len(result) > 1 {
spliceKey = strings.Replace(result[1], "$", `\$`, -1)
}
if result = swapRegexp.FindStringSubmatch(objBody); len(result) > 1 {
swapKey = strings.Replace(result[1], "$", `\$`, -1)
}

keys := []string{reverseKey, sliceKey, spliceKey, swapKey}
regex, err := regexp.Compile(fmt.Sprintf(
`(?:a=)?%s\.(%s)\(a,(\d+)\)`, obj, strings.Join(keys, "|"),
))
if err != nil {
log.Fatal(err)
}
results := regex.FindAllStringSubmatch(funcBody, -1)
var tokens []string
for _, s := range results {
switch s[1] {
case swapKey:
tokens = append(tokens, "w"+s[2])
case reverseKey:
tokens = append(tokens, "r")
case sliceKey:
tokens = append(tokens, "s"+s[2])
case spliceKey:
tokens = append(tokens, "p"+s[2])
}
}
return tokens
}

func reverseStringSlice(s []string) {
for i, j := 0, len(s)-1; i < len(s)/2; i, j = i+1, j-1 {
s[i], s[j] = s[j], s[i]
}
}

func decipherTokens(tokens []string, sig string) string {
var pos int
sigSplit := strings.Split(sig, "")
for i, l := 0, len(tokens); i < l; i++ {
tok := tokens[i]
if len(tok) > 1 {
pos, _ = strconv.Atoi(string(tok[1:]))
pos = ^^pos
}
switch string(tok[0]) {
case "r":
reverseStringSlice(sigSplit)
case "w":
s := sigSplit[0]
sigSplit[0] = sigSplit[pos]
sigSplit[pos] = s
case "s":
sigSplit = sigSplit[pos:]
case "p":
sigSplit = sigSplit[pos:]
}
}
return strings.Join(sigSplit, "")
}
41 changes: 41 additions & 0 deletions extractors/youtube_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package extractors

import (
"testing"

"github.com/iawia002/annie/config"
"github.com/iawia002/annie/test"
)

func TestYoutube(t *testing.T) {
config.InfoOnly = true
tests := []struct {
name string
args test.Args
}{
{
name: "normal test",
args: test.Args{
URL: "https://www.youtube.com/watch?v=Gnbch2osEeo",
Title: "Multifandom Mashup 2017",
Size: 60785404,
Quality: "hd720",
},
},
{
name: "normal test",
args: test.Args{
URL: "https://youtu.be/z8eFzkfto2w",
Title: "Circle Of Love | Rudy Mancuso",
Size: 27183162,
Quality: "hd720",
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
data := Youtube(tt.args.URL)
test.Check(t, tt.args, data)
})
}
}
4 changes: 4 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ func main() {
extractors.Pixivision(videoURL)
case "youku":
extractors.Youku(videoURL)
case "youtube":
extractors.Youtube(videoURL)
case "youtu": // youtu.be
extractors.Youtube(videoURL)
default:
extractors.Universal(videoURL)
}
Expand Down
13 changes: 12 additions & 1 deletion utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,17 @@ func Match1(pattern, text string) []string {
return value
}

// MatchOneOf match one of the patterns
func MatchOneOf(patterns []string, text string) []string {
for _, pattern := range patterns {
value := Match1(pattern, text)
if len(value) > 0 {
return value
}
}
return nil
}

// MatchAll return all matching results
func MatchAll(pattern, text string) [][]string {
re := regexp.MustCompile(pattern)
Expand All @@ -37,7 +48,7 @@ func FileSize(filePath string) int64 {
func Domain(url string) string {
domainPattern := `([a-z0-9][-a-z0-9]{0,62})\.` +
`(com\.cn|com\.hk|` +
`cn|com|net|edu|gov|biz|org|info|pro|name|xxx|xyz|` +
`cn|com|net|edu|gov|biz|org|info|pro|name|xxx|xyz|be|` +
`me|top|cc|tv|tt)`
domain := Match1(domainPattern, url)[1]
return domain
Expand Down

0 comments on commit 5c56998

Please sign in to comment.