Skip to content

Commit

Permalink
fix: optimize getting video info by making the watch.html request
Browse files Browse the repository at this point in the history
first

closes #815
  • Loading branch information
fent committed Dec 16, 2020
1 parent 41825b6 commit eb42b9c
Show file tree
Hide file tree
Showing 30 changed files with 49,941 additions and 43,126 deletions.
7 changes: 7 additions & 0 deletions lib/cache.js
Expand Up @@ -28,6 +28,13 @@ module.exports = class Cache extends Map {
} else {
let value = fn();
this.set(key, value);
(async() => {
try {
await value;
} catch (err) {
this.delete(key);
}
})();
return value;
}
}
Expand Down
24 changes: 16 additions & 8 deletions lib/info.js
Expand Up @@ -54,8 +54,8 @@ exports.getBasicInfo = async(id, options) => {
);
};
let info = await pipeline([id, options], validate, retryOptions, [
getWatchJSONPage,
getWatchHTMLPage,
getWatchJSONPage,
getVideoInfoPage,
]);

Expand Down Expand Up @@ -109,13 +109,20 @@ const isNotYetBroadcasted = player_response => {
};


const getHTMLWatchURL = (id, options) => `${VIDEO_URL + id}&hl=${options.lang || 'en'}`;
const getHTMLWatchPageBody = (id, options) => {
const url = getHTMLWatchURL(id, options);
const getWatchHTMLURL = (id, options) => `${VIDEO_URL + id}&hl=${options.lang || 'en'}`;
const getWatchHTMLPageBody = (id, options) => {
const url = getWatchHTMLURL(id, options);
return exports.watchPageCache.getOrSet(url, () => miniget(url, options.requestOptions).text());
};


const EMBED_URL = 'https://www.youtube.com/embed/';
const getEmbedPageBody = (id, options) => {
const embedUrl = `${EMBED_URL + id}?hl=${options.lang || 'en'}`;
return miniget(embedUrl, options.requestOptions).text();
};


const getHTML5player = body => {
let html5playerRes =
/<script\s+src="([^"]+)"(?:\s+type="text\/javascript")?\s+name="player_ias\/base"\s*>|"jsUrl":"([^"]+)"/
Expand All @@ -126,7 +133,7 @@ const getHTML5player = body => {

const getIdentityToken = (id, options, key, throwIfNotFound) =>
exports.cookieCache.getOrSet(key, async() => {
let page = await getHTMLWatchPageBody(id, options);
let page = await getWatchHTMLPageBody(id, options);
let match = page.match(/(["'])ID_TOKEN\1[:,]\s?"([^"]+)"/);
if (!match && throwIfNotFound) {
throw new UnrecoverableError('Cookie header used in request, but unable to find YouTube identity token');
Expand Down Expand Up @@ -253,7 +260,7 @@ const findPlayerResponse = (source, info) => {
};


const getWatchJSONURL = (id, options) => `${getHTMLWatchURL(id, options)}&pbj=1`;
const getWatchJSONURL = (id, options) => `${getWatchHTMLURL(id, options)}&pbj=1`;
const getWatchJSONPage = async(id, options) => {
const reqOptions = Object.assign({ headers: {} }, options.requestOptions);
let cookie = reqOptions.headers.Cookie || reqOptions.headers.cookie;
Expand Down Expand Up @@ -290,7 +297,7 @@ const getWatchJSONPage = async(id, options) => {


const getWatchHTMLPage = async(id, options) => {
let body = await getHTMLWatchPageBody(id, options);
let body = await getWatchHTMLPageBody(id, options);
let info = { page: 'watch' };
try {
info.player_response = findJSON('watch.html', 'player_response',
Expand Down Expand Up @@ -359,7 +366,8 @@ exports.getInfo = async(id, options) => {
);
let funcs = [];
if (info.formats.length) {
info.html5player = info.html5player || getHTML5player(await getHTMLWatchPageBody(id, options));
info.html5player = info.html5player ||
getHTML5player(await getWatchHTMLPageBody(id, options)) || getHTML5player(await getEmbedPageBody(id, options));
if (!info.html5player) {
throw Error('Unable to find html5player file');
}
Expand Down
114 changes: 74 additions & 40 deletions test/basic-info-test.js
Expand Up @@ -23,11 +23,10 @@ describe('ytdl.getBasicInfo()', () => {
assert.strictEqual(info.formats.length, expected.formats.length);
});

it('Retrieves just enough metainfo', async() => {
it('Retrieves just enough metainfo without all formats', async() => {
const id = '5qap5aO4i9A';
const expected = require('./files/videos/live-now/expected-info.json');
const scope = nock(id, 'live-now', {
watchHtml: false,
player: false,
dashmpd: false,
m3u8: false,
Expand Down Expand Up @@ -86,8 +85,24 @@ describe('ytdl.getBasicInfo()', () => {
});
});

describe('From a live video', () => {
it('Returns correct video metainfo', async() => {
const id = '5qap5aO4i9A';
const scope = nock(id, 'live-now', {
player: false,
dashmpd: false,
m3u8: false,
});
let info = await ytdl.getBasicInfo(id);
scope.done();
assert.ok(info.formats.length);
assert.ok(info.videoDetails);
assert.ok(info.videoDetails.title);
});
});

describe('From an age restricted video', () => {
it('Returns correct video metainfo with formats', async() => {
it('Returns correct video metainfo', async() => {
const expected = require('./files/videos/age-restricted/expected-info.json');
const id = 'LuZu9N53Vd0';
const scope = nock(id, 'age-restricted');
Expand Down Expand Up @@ -149,7 +164,7 @@ describe('ytdl.getBasicInfo()', () => {
describe('`x-youtube-identity-token` given', () => {
it('Does not make extra request to watch.html page', async() => {
const scope = nock(id, 'regular', {
watchHtml: false,
watchHtml: [true, 500],
player: false,
});
let info = await ytdl.getBasicInfo(id, {
Expand All @@ -168,6 +183,8 @@ describe('ytdl.getBasicInfo()', () => {
it('Retrieves identity-token from watch.html page', async() => {
const scope = nock(id, 'regular', {
watchHtml: [true, 200, body => `${body}\n{"ID_TOKEN":"abcd"}`],
watchJson: false,
get_video_info: false,
player: false,
});
let info = await ytdl.getBasicInfo(id, {
Expand All @@ -182,7 +199,12 @@ describe('ytdl.getBasicInfo()', () => {
describe('Unable to find token', () => {
it('Returns an error', async() => {
const scope = nock(id, 'regular', {
watchHtml: [
[true, 500],
[true, 200],
],
watchJson: false,
get_video_info: false,
player: false,
});
await assert.rejects(ytdl.getBasicInfo(id, {
Expand All @@ -197,11 +219,16 @@ describe('ytdl.getBasicInfo()', () => {
describe('Called from a web browser with cookies in requests', () => {
it('Tries to get identity-token from watch.html page', async() => {
const scope = nock(id, 'regular', {
watchJson: [true, 200, '}]{"reload":"now"}'],
player: false,
});
const scope2 = nock(id, 'regular', {
watchHtml: false,
watchHtml: [
[true, 500],
[true, 500],
[true, 200],
],
watchJson: [
[true, 200, '}]{"reload":"now"}'],
[true, 200],
],
get_video_info: false,
player: false,
});
let info = await ytdl.getBasicInfo(id, {
Expand All @@ -213,7 +240,6 @@ describe('ytdl.getBasicInfo()', () => {
},
});
scope.done();
scope2.done();
assert.ok(info.formats.length);
});
});
Expand All @@ -222,7 +248,7 @@ describe('ytdl.getBasicInfo()', () => {
it('Does not make extra request to watch.html page', async() => {
ytdl.cache.cookie.set('abc=1', 'token!');
const scope = nock(id, 'regular', {
watchHtml: false,
watchHtml: [true, 500],
player: false,
});
let info = await ytdl.getBasicInfo(id, {
Expand All @@ -246,6 +272,8 @@ describe('ytdl.getBasicInfo()', () => {
const scope = nock(id, 'use-backups', {
watchJson: [true, 200, '{"reload":"now"}'],
watchHtml: [true, 200, '<html></html>'],
embed: false,
player: false,
});
let info = await ytdl.getBasicInfo(id);
scope.done();
Expand All @@ -254,15 +282,16 @@ describe('ytdl.getBasicInfo()', () => {
});

describe('Unable to parse watch.json page config', () => {
it('Uses backup watch.html page', async() => {
it('Uses backup', async() => {
const id = 'LuZu9N53Vd0';
const scope = nock(id, 'use-backups', {
watchHtml: [true, 500],
watchJson: [true, 200, '{]}'],
get_video_info: false,
embed: false,
player: false,
});
let info = await ytdl.getBasicInfo(id);
scope.done();
assert.ok(info.html5player);
assert.ok(info.formats.length);
assert.ok(info.formats[0].url);
});
Expand All @@ -272,17 +301,19 @@ describe('ytdl.getBasicInfo()', () => {
it('Retries the request', async() => {
const id = '_HSylqgVYQI';
const scope1 = nock(id, 'regular', {
watchJson: [true, 200, '{"reload":"now"}'],
watchHtml: [
[true, 500],
[true, 500],
],
watchJson: [
[true, 200, '{"reload":"now"}'],
[true, 200],
],
get_video_info: false,
player: false,
});
const scope2 = nock(id, 'regular', {
watchHtml: false,
player: false,
});
let info = await ytdl.getBasicInfo(id, { requestOptions: { maxRetries: 1 } });
scope1.done();
scope2.done();
assert.ok(info.formats.length);
assert.ok(info.formats[0].url);
});
Expand All @@ -291,13 +322,16 @@ describe('ytdl.getBasicInfo()', () => {
it('Uses backup endpoint', async() => {
const id = 'LuZu9N53Vd0';
const scope = nock(id, 'use-backups', {
watchJson: [true, 200, '{"reload":"now"}'],
get_video_info: false,
});
const scope2 = nock(id, 'use-backups', {
watchJson: [true, 200, '{"reload":"now"}'],
watchHtml: false,
get_video_info: false,
watchHtml: [
[true, 500],
[true, 500],
],
watchJson: [
[true, 200, '{"reload":"now"}'],
[true, 200, '{"reload":"now"}'],
],
embed: false,
player: false,
});
let info = await ytdl.getBasicInfo(id, {
requestOptions: {
Expand All @@ -306,8 +340,6 @@ describe('ytdl.getBasicInfo()', () => {
},
});
scope.done();
scope2.done();
assert.ok(info.html5player);
assert.ok(info.formats.length);
});
});
Expand All @@ -317,8 +349,10 @@ describe('ytdl.getBasicInfo()', () => {
it('Uses backup endpoint', async() => {
const id = 'LuZu9N53Vd0';
const scope1 = nock(id, 'use-backups', {
watchHtml: false,
watchJson: [true, 200, '[]'],
get_video_info: false,
embed: false,
player: false,
});
let info = await ytdl.getBasicInfo(id, { requestOptions: { maxRetries: 0 } });
scope1.done();
Expand All @@ -331,17 +365,15 @@ describe('ytdl.getBasicInfo()', () => {
it('Retries the request', async() => {
const id = '_HSylqgVYQI';
const scope1 = nock(id, 'regular', {
watchJson: [true, 502],
player: false,
});
const scope2 = nock(id, 'regular', {
watchJson: [true, 502],
watchHtml: false,
watchHtml: [
[true, 500],
[true, 200],
],
watchJson: false,
player: false,
});
let info = await ytdl.getBasicInfo(id, { requestOptions: { maxRetries: 1 } });
scope1.done();
scope2.done();
assert.ok(info.formats.length);
assert.ok(info.formats[0].url);
});
Expand All @@ -350,12 +382,12 @@ describe('ytdl.getBasicInfo()', () => {
it('Uses the next endpoint as backup', async() => {
const id = 'LuZu9N53Vd0';
const scope = nock(id, 'use-backups', {
watchJson: [true, 502],
get_video_info: false,
watchHtml: [true, 502],
embed: false,
player: false,
});
let info = await ytdl.getBasicInfo(id);
scope.done();
assert.ok(info.html5player);
assert.ok(info.formats.length);
assert.ok(info.formats[0].url);
assert.ok(!info.videoDetails.age_restricted);
Expand Down Expand Up @@ -399,6 +431,8 @@ describe('ytdl.getBasicInfo()', () => {
watchJson: [true, 500],
watchHtml: [true, 500],
get_video_info: [true, 500],
embed: false,
player: false,
});
await assert.rejects(ytdl.getBasicInfo(id, {
requestOptions: { maxRetries: 0 },
Expand Down

0 comments on commit eb42b9c

Please sign in to comment.