Skip to content

Commit

Permalink
Add WIP YoutubeDlCoprocessor stub.
Browse files Browse the repository at this point in the history
  • Loading branch information
chfoo committed Jan 27, 2015
1 parent cdb236b commit 163fb44
Show file tree
Hide file tree
Showing 4 changed files with 96 additions and 7 deletions.
24 changes: 22 additions & 2 deletions wpull/builder.py
Expand Up @@ -25,6 +25,7 @@
from wpull.converter import BatchDocumentConverter
from wpull.cookie import DeFactoCookiePolicy, RelaxedMozillaCookieJar
from wpull.coprocessor.proxy import ProxyCoprocessor
from wpull.coprocessor.youtubedl import YoutubeDlCoprocessor
from wpull.database.sqltable import URLTable as SQLURLTable, GenericSQLURLTable
from wpull.database.wrap import URLTableHookWrapper
from wpull.debug import DebugConsoleHandler
Expand Down Expand Up @@ -151,6 +152,7 @@ def __init__(self, args, unit_test=False):
'WebProcessor': WebProcessor,
'WebProcessorFetchParams': WebProcessorFetchParams,
'WebProcessorInstances': WebProcessorInstances,
'YoutubeDlCoprocessor': YoutubeDlCoprocessor,
})
self._url_infos = None
self._ca_certs_file = None
Expand Down Expand Up @@ -791,7 +793,7 @@ def _build_web_processor(self):
url_rewriter=self._factory.get('URLRewriter'),
)

if args.phantomjs or args.proxy_server:
if args.phantomjs or args.youtube_dl or args.proxy_server:
proxy_server, proxy_port, proxy_task = self._build_proxy_server()
application = self._factory['Application']
# XXX: Should we be sticking these into application?
Expand All @@ -805,6 +807,11 @@ def _build_web_processor(self):
else:
phantomjs_coprocessor = None

if args.youtube_dl:
youtube_dl_coprocessor = self._build_youtube_dl_coprocessor(proxy_port)
else:
youtube_dl_coprocessor = None

web_processor_instances = self._factory.new(
'WebProcessorInstances',
fetch_rule=fetch_rule,
Expand All @@ -813,6 +820,7 @@ def _build_web_processor(self):
file_writer=file_writer,
statistics=self._factory['Statistics'],
phantomjs_coprocessor=phantomjs_coprocessor,
youtube_dl_coprocessor=youtube_dl_coprocessor,
)

web_processor_fetch_params = self._factory.new(
Expand Down Expand Up @@ -1190,7 +1198,8 @@ def _build_phantomjs_coprocessor(self, proxy_port):
)

extra_args = [
'--proxy', 'localhost:{0}'.format(proxy_port),
'--proxy',
'{}:{}'.format(self._args.proxy_server_address, proxy_port),
'--ignore-ssl-errors=true'
]

Expand All @@ -1211,6 +1220,17 @@ def _build_phantomjs_coprocessor(self, proxy_port):

return phantomjs_coprocessor

def _build_youtube_dl_coprocessor(self, proxy_port):
'''Build youtube-dl coprocessor.'''
coprocessor = self.factory.new(
'YoutubeDlCoprocessor',
self._args.youtube_dl_exe,
(self._args.proxy_server_address, proxy_port)
)

return coprocessor


def _build_proxy_server(self):
'''Build MITM proxy server.'''
proxy_server = self._factory.new(
Expand Down
41 changes: 41 additions & 0 deletions wpull/coprocessor/youtubedl.py
@@ -0,0 +1,41 @@
import gettext
import logging

from trollius import From, Return
import trollius

from wpull.backport.logging import BraceMessage as __
from wpull.driver.process import Process


_logger = logging.getLogger(__name__)
_ = gettext.gettext


class YoutubeDlCoprocessor(object):
def __init__(self, youtube_dl_path, proxy_address):
self._youtube_dl_path = youtube_dl_path
self._proxy_address = proxy_address

@trollius.coroutine
def process(self, url_item, request, response, file_writer_session):
host, port = self._proxy_address
url = url_item.url_info.url
youtube_dl_process = Process([
self._youtube_dl_path,
'--proxy', 'http://{}:{}'.format(host, port),
url
])

_logger.info(__(
_('youtube-dl fetching ‘{url}’.'),
url=url
))

yield From(youtube_dl_process.start())
yield From(youtube_dl_process.process.wait())

_logger.info(__(
_('youtube-dl fetched ‘{url}’.'),
url=url
))
15 changes: 15 additions & 0 deletions wpull/options.py
Expand Up @@ -201,6 +201,7 @@ def _add_app_args(self):
self._add_accept_args()
self._add_proxy_server_args()
self._add_phantomjs_args()
self._add_youtube_dl_args()

def _add_startup_args(self):
group = self.add_argument_group(_('startup'))
Expand Down Expand Up @@ -1291,6 +1292,20 @@ def _add_phantomjs_args(self):
help=_('always scroll the page to maximum scroll count option'),
)

def _add_youtube_dl_args(self):
group = self.add_argument_group(_('PhantomJS'))
group.add_argument(
'--youtube-dl',
action='store_true',
help=_('use youtube-dl for downloading videos'),
)
group.add_argument(
'--youtube-dl-exe',
metavar='PATH',
default='youtube-dl',
help=_('path of youtube-dl executable')
)

def _post_parse_args(self, args):
if args.warc_file:
self._post_warc_args(args)
Expand Down
23 changes: 18 additions & 5 deletions wpull/processor/web.py
Expand Up @@ -52,6 +52,7 @@
('file_writer', NullWriter()),
('statistics', Statistics()),
('phantomjs_coprocessor', None),
('youtube_dl_coprocessor', None),
]
)
'''WebProcessorInstances
Expand All @@ -63,6 +64,7 @@
file_writer (:class`.writer.BaseWriter`): The file writer.
phantomjs_coprocessor (:class:`.coprocessor.phantomjs.PhantomJSCoprocessor`): The PhantomJS
corprocessor.
youtube_dl_coprocessor (:class:`.coprocessor.youtubedl.YoutubeDlCoprocessor`): youtube-dl coprocessor.
'''


Expand Down Expand Up @@ -324,11 +326,7 @@ def response_callback(dummy, callback_response):
self._log_response(request, response)
action = self._handle_response(request, response)

phantomjs_coprocessor = self._processor.instances.phantomjs_coprocessor
if phantomjs_coprocessor:
yield From(phantomjs_coprocessor.process(
self._url_item, request, response, self._file_writer_session
))
yield From(self._run_coprocessors(request, response))

response.body.close()

Expand Down Expand Up @@ -466,3 +464,18 @@ def _close_instance_body(self, instance):
'''
if hasattr(instance, 'body'):
instance.body.close()

def _run_coprocessors(self, request, response):
phantomjs_coprocessor = self._processor.instances.phantomjs_coprocessor

if phantomjs_coprocessor:
yield From(phantomjs_coprocessor.process(
self._url_item, request, response, self._file_writer_session
))

youtube_dl_coprocessor = self._processor.instances.youtube_dl_coprocessor

if youtube_dl_coprocessor:
yield From(youtube_dl_coprocessor.process(
self._url_item, request, response, self._file_writer_session
))

0 comments on commit 163fb44

Please sign in to comment.