# Test URL Download

In [1]:
%run ./setup-logging.ipynb
%run ./jupyter-test-support.ipynb
%run ./create_container.ipynb

Created [container]
  * app_root
  * blob_root_directory
  * blob_store
  * environment_name
  * event_publisher
  * mongodb_client
  * mongodb_credentials
  * queue_receiver
  * queue_receiver_manager
  * queue_sender
  * rabbitmq_connection
  * rabbitmq_url
  * url_download_handler
  * url_downloaded_handler
  * url_info_store
  * url_store


In [2]:
from rook.wcrawl.event import events

import threading
import _thread
import time

In [3]:
%%cell_testsuite

@cell_test
def test_send_urls(self):
    publisher = container.event_publisher()
    publisher.send_url_download('https://www.nytimes.com/')
    publisher.send_url_download('https://www.yahoo.com/')

URL download request: https://www.nytimes.com/
INFO|2020-11-24 22:06:45,838|MainThread|pika.adapters.utils.connection_workflow|Pika version 1.1.0 connecting to ('::1', 5672, 0, 0)
INFO|2020-11-24 22:06:45,839|MainThread|pika.adapters.utils.io_services_utils|Socket connected: <socket.socket fd=1188, family=AddressFamily.AF_INET6, type=SocketKind.SOCK_STREAM, proto=6, laddr=('::1', 61226, 0, 0), raddr=('::1', 5672, 0, 0)>
INFO|2020-11-24 22:06:45,839|MainThread|pika.adapters.utils.connection_workflow|Streaming transport linked up: (<pika.adapters.utils.io_services_utils._AsyncPlaintextTransport object at 0x0000025C8DD57648>, _StreamingProtocolShim: <SelectConnection PROTOCOL transport=<pika.adapters.utils.io_services_utils._AsyncPlaintextTransport object at 0x0000025C8DD57648> params=<URLParameters host=localhost port=5672 virtual_host=/ ssl=False>>).
INFO|2020-11-24 22:06:45,843|MainThread|pika.adapters.utils.connection_workflow|AMQPConnector - reporting success: <SelectConnection OPEN 

## Try receiving
The utilities below have a hack to interupt the queue receiver after a delay.

In [4]:
def send_interrupt_after(seconds):
    def execute():
        time.sleep(seconds)
        _thread.interrupt_main()
    t = threading.Thread(target=execute)
    t.start()
    
def run_and_interrupt_after(func, seconds):
    send_interrupt_after(seconds)
    try:
        func()
    except KeyboardInterrupt:
        pass
    
def download_requests_for(seconds):
    q_receiver = container.queue_receiver()
    requests = []
    def request_handler(request):
        requests.append(request)
        
    run_and_interrupt_after(lambda: q_receiver.receive(events.URL_DOWNLOAD, request_handler), seconds)
    return requests

In [5]:
%%cell_testsuite

@cell_test
def test_receive(self):
    # Empty the queue
    download_requests_for(1)

    # Add a record
    publisher = container.event_publisher()
    publisher.send_url_download('https://www.bing.com/')
    
    requests = download_requests_for(1)
    print(requests)
    self.assertEqual(1, len(requests))
    self.assertEqual('https://www.bing.com/', requests[0]['url'])

INFO|2020-11-24 22:07:23,887|MainThread|pika.adapters.utils.connection_workflow|Pika version 1.1.0 connecting to ('::1', 5672, 0, 0)
INFO|2020-11-24 22:07:23,890|MainThread|pika.adapters.utils.io_services_utils|Socket connected: <socket.socket fd=1328, family=AddressFamily.AF_INET6, type=SocketKind.SOCK_STREAM, proto=6, laddr=('::1', 61246, 0, 0), raddr=('::1', 5672, 0, 0)>
INFO|2020-11-24 22:07:23,892|MainThread|pika.adapters.utils.connection_workflow|Streaming transport linked up: (<pika.adapters.utils.io_services_utils._AsyncPlaintextTransport object at 0x0000025C8ED76048>, _StreamingProtocolShim: <SelectConnection PROTOCOL transport=<pika.adapters.utils.io_services_utils._AsyncPlaintextTransport object at 0x0000025C8ED76048> params=<URLParameters host=localhost port=5672 virtual_host=/ ssl=False>>).
INFO|2020-11-24 22:07:23,899|MainThread|pika.adapters.utils.connection_workflow|AMQPConnector - reporting success: <SelectConnection OPEN transport=<pika.adapters.utils.io_services_util