From 53b1f92375000cf799e3b9217bc40b24a8dbb7f4 Mon Sep 17 00:00:00 2001 From: Venu Vardhan Reddy Tekula Date: Wed, 27 May 2020 12:26:13 +0530 Subject: [PATCH] [backend] Add Zulip Backend Signed-off-by: Venu Vardhan Reddy Tekula --- README.md | 10 ++ bin/perceval | 1 + perceval/backends/core/zulip.py | 254 ++++++++++++++++++++++++++++++++ tests/data/zulip/message_page_1 | 63 ++++++++ tests/data/zulip/message_page_2 | 63 ++++++++ tests/test_zulip.py | 238 ++++++++++++++++++++++++++++++ 6 files changed, 629 insertions(+) create mode 100644 perceval/backends/core/zulip.py create mode 100644 tests/data/zulip/message_page_1 create mode 100644 tests/data/zulip/message_page_2 create mode 100644 tests/test_zulip.py diff --git a/README.md b/README.md index ccbd08bf9..7cdb20cac 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,7 @@ are: supybot Fetch messages from Supybot log files telegram Fetch messages from the Telegram server twitter Fetch tweets from the Twitter Search API + zulip Fetch messages from a Zulip stream optional arguments: -h, --help show this help message and exit @@ -385,6 +386,15 @@ https://gist.github.com/valeriocos/7d4d28f72f53fbce49f1512ba77ef5f6 $ perceval twitter grimoirelab -t 12345678abcdefgh ``` +### Zulip + +Zulip backend needs `BOT_EMAIL_ADDRESS` and `BOT_API_KEY`. `EMAIL_ADDRESS` and `API_KEY` can +be used if the bots are restricted. More information at https://zulipchat.com/api/api-keys. + +``` +$ perceval zulip 'https://example.zulipchat.com/' 'stream' -e 'BOT_EMAIL_ADDRESS' -t 'BOT_API_KEY' +``` + ## Running tests Perceval comes with a comprehensive list of unit tests. diff --git a/bin/perceval b/bin/perceval index 98e9344ba..7d1a28035 100755 --- a/bin/perceval +++ b/bin/perceval @@ -79,6 +79,7 @@ are: stackexchange Fetch questions from StackExchange sites supybot Fetch messages from Supybot log files telegram Fetch messages from the Telegram server + zulip Fetch messages from a Zulip stream optional arguments: -h, --help show this help message and exit diff --git a/perceval/backends/core/zulip.py b/perceval/backends/core/zulip.py new file mode 100644 index 000000000..cf91b42e8 --- /dev/null +++ b/perceval/backends/core/zulip.py @@ -0,0 +1,254 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2015-2020 Bitergia +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# Authors: +# Venu Vardhan Reddy Tekula +# + + +import logging +import json + +from grimoirelab_toolkit.uris import urijoin + +from ...backend import (Backend, + BackendCommand, + BackendCommandArgumentParser) +from ...client import HttpClient + +logger = logging.getLogger(__name__) + +CATEGORY_MESSAGE = "message" + +DEFAULT_SEARCH_FIELD = 'item_id' +DEFAULT_OFFSET = 1 + + +class Zulip(Backend): + """Zulip Backend. + + This class retrieves the messages sent to a Zulip stream. + To access the server an API token and Bot email is required. + + The orgin of the data will be set using this `url` plus the + stream from which the data is obtained (i.e: https://example.zulipchat.com/stream). + + :param url: URL of the Zulip chat server + :param stream: stream from which the messages are to be fetched + :param email: bot email + :param api_token: key needed to use the API + :param tag: label used to mark the data + :param archive: archive to store/retrieve items + :param ssl_verify: enable/disable SSL verification + """ + + version = '0.1.0' + + CATEGORIES = [CATEGORY_MESSAGE] + + def __init__(self, url, stream, email, api_token, tag=None, archive=None, ssl_verify=True): + origin = urijoin(url, stream) + + super().__init__(origin, tag=tag, archive=archive, ssl_verify=ssl_verify) + self.url = url + self.stream = stream + self.api_token = api_token + self.email = email + + self.client = None + + def search_fields(self, item): + """Add search fields to an item. + + It adds the values of `metadata_id` and `stream`. + + :param item: the item to extract the search fields values + + :returns: a dict of search fields + """ + search_fields = { + DEFAULT_SEARCH_FIELD: self.metadata_id(item), + 'stream': self.stream, + } + + return search_fields + + def fetch(self, category=CATEGORY_MESSAGE, offset=DEFAULT_OFFSET): + """Fetch the message from the stream. + + This method fetches the messages sent in the Zulip stream + starting on the given offset. + + :param category: the category of items to fetch + :param offset: obtain messages from this offset + + :return: a generator of messages + """ + kwargs = {"offset": offset} + items = super().fetch(category, **kwargs) + + return items + + def fetch_items(self, category, **kwargs): + """Fetch the messages. + + :param category: the category of items to fetch + :param kwargs: backend arguments + + :return: a generator of items + """ + anchor = 'oldest' + found_newest = False + + fetching = True + + while fetching: + raw_messages = self.client.get_messages(anchor) + anchor, found_newest, messages = self.parse_messages(raw_messages) + yield messages[0] + + if found_newest: + fetching = False + yield messages[1] + + def parse_messages(self, raw_messages): + result = json.loads(raw_messages) + + found_newest = result.get('found_newest', False) + messages = result.get('messages', None) + anchor = messages[1]['id'] if messages else None + + return anchor, found_newest, messages + + @classmethod + def has_archiving(cls): + """Returns whether it supports archiving items on the fetch process. + + :returns: this backend supports items archive + """ + return True + + @classmethod + def has_resuming(cls): + """Returns whether it supports to resume the fetch process. + + :returns: this backend does not support items resuming + """ + return True + + @staticmethod + def metadata_id(item): + """Extracts the identifier of a Zulip item.""" + + return str(item['id']) + + @staticmethod + def metadata_updated_on(item): + """Extracts and coverts the update time from a Zulip item. + + The timestamp is extracted from 'ts' field and converted + to a UNIX timestamp. + + :param item: item generated by the backend + + :returns: a UNIX timestamp + """ + ts = float(item['timestamp']) + return ts + + @ staticmethod + def metadata_category(item): + """Extracts the category from a Zulip item. + + This backend only generates one type of item which is 'message'. + """ + return CATEGORY_MESSAGE + + def _init_client(self, from_archive=False): + """Init client""" + + return ZulipClient(self.url, self.stream, self.email, self.api_token, + archive=self.archive, from_archive=from_archive, ssl_verify=self.ssl_verify) + + +class ZulipClient(HttpClient): + """Zulip API client. + + Client for fetching information from the Zulip server using its + REST API. + + :param url: URL of the Zulip chat server + :param stream: stream from which the messages are to be fetched + :param email: bot email + :param api_token: key needed to use the API + :param archive: archive to store/retrieve items + :param from_archive: it tells whether to write/read the archive + :param ssl_verify: enable/disable SSL verification + """ + + def __init__(self, url, stream, email, api_token, + archive=None, from_archive=False, ssl_verify=True): + self.url = url + self.stream = stream + self.email = email + self.api_token = api_token + + super().__init__(url, archive=archive, from_archive=from_archive, ssl_verify=ssl_verify) + + def get_messages(self, anchor): + """Fetch the messages.""" + + params = { + 'anchor': '{}'.format(anchor), + 'num_before': '0', + 'num_after': '2', + 'apply_markdown': 'false', + 'narrow': json.dumps([{'operator': 'stream', 'operand': '{}'.format(self.stream)}]) + } + + path = urijoin(self.url, "/api/v1/messages") + + r = self.fetch(path, payload=params, auth=(self.email, self.api_token)) + + return r.text + + +class ZulipCommand(BackendCommand): + """Class to run Zulip backend from the command line.""" + + BACKEND = Zulip + + @classmethod + def setup_cmd_parser(cls): + """Returns the Zulip argument pareser.""" + + parser = BackendCommandArgumentParser(cls.BACKEND, + token_auth=True, + archive=True, + ssl_verify=True) + + # Backend Token is required + action = parser.parser._option_string_actions['--api-token'] + action.required = True + + # # Required arguments + parser.parser.add_argument('url', help="Zulip chat URL") + parser.parser.add_argument('stream', help='Zulip chat stream name') + parser.parser.add_argument('-e', '--email', dest='email', + help="Zulip bot email") + + return parser diff --git a/tests/data/zulip/message_page_1 b/tests/data/zulip/message_page_1 new file mode 100644 index 000000000..79daac032 --- /dev/null +++ b/tests/data/zulip/message_page_1 @@ -0,0 +1,63 @@ +{ + "result":"success", + "msg":"", + "messages":[ + { + "avatar_url": "https://zulip-avatars.s3.amazonaws.com/1000/1cd87892343b6620726b112851f0b66cbda4a68f?x=x&version=4%22,avatar_url%22:%20%22https://zulip-avatars.s3.amazonaws.com/1000/1cd87892343b6620726b112851f0b66cbda4a68f?x=x&version=4", + "client": "ZulipElectron", + "content": "This is test messgae 2", + "content_type": "text/x-markdown", + "display_recipient": "abcdefghijkl", + "flags": [ + "read", + "historical" + ], + "id": 159310824, + "is_me_message": false, + "reactions": [], + "recipient_id": 303900, + "sender_email": "bot@zulipchat.com", + "sender_full_name": "Bot", + "sender_id": 113001, + "sender_realm_str": "example", + "sender_short_name": "bot", + "stream_id": 187177, + "subject": "Test subject 2", + "submessages": [], + "timestamp": 1551067006, + "topic_links": [], + "type": "stream" + }, + { + "avatar_url": "https://zulip-avatars.s3.amazonaws.com/1000/1cd87892343b6620726b112851f0b66cbda4a68f?x=x&version=4%22,avatar_url%22:%20%22https://zulip-avatars.s3.amazonaws.com/1000/1cd87892343b6620726b112851f0b66cbda4a68f?x=x&version=4", + "client": "ZulipElectron", + "content": "This is test messgae 1", + "content_type": "text/x-markdown", + "display_recipient": "abcdefghijkl", + "flags": [ + "read", + "historical" + ], + "id": 159310770, + "is_me_message": false, + "reactions": [], + "recipient_id": 303900, + "sender_email": "bot@zulipchat.com", + "sender_full_name": "Bot", + "sender_id": 113001, + "sender_realm_str": "example", + "sender_short_name": "bot", + "stream_id": 187177, + "subject": "Test subject 1", + "submessages": [], + "timestamp": 1551066955, + "topic_links": [], + "type": "stream" + } + ], + "found_anchor":true, + "found_oldest":false, + "found_newest":true, + "history_limited":false, + "anchor":175270341 +} \ No newline at end of file diff --git a/tests/data/zulip/message_page_2 b/tests/data/zulip/message_page_2 new file mode 100644 index 000000000..f907368c9 --- /dev/null +++ b/tests/data/zulip/message_page_2 @@ -0,0 +1,63 @@ +{ + "result":"success", + "msg":"", + "messages":[ + { + "avatar_url": "https://zulip-avatars.s3.amazonaws.com/1000/1cd87892343b6620726b112851f0b66cbda4a68f?x=x&version=4%22,avatar_url%22:%20%22https://zulip-avatars.s3.amazonaws.com/1000/1cd87892343b6620726b112851f0b66cbda4a68f?x=x&version=4", + "client": "ZulipElectron", + "content": "This is test messgae 1", + "content_type": "text/x-markdown", + "display_recipient": "abcdefghijkl", + "flags": [ + "read", + "historical" + ], + "id": 159310770, + "is_me_message": false, + "reactions": [], + "recipient_id": 303900, + "sender_email": "bot@zulipchat.com", + "sender_full_name": "Bot", + "sender_id": 113001, + "sender_realm_str": "example", + "sender_short_name": "bot", + "stream_id": 187177, + "subject": "Test subject 1", + "submessages": [], + "timestamp": 1551066955, + "topic_links": [], + "type": "stream" + }, + { + "avatar_url": "https://zulip-avatars.s3.amazonaws.com/1000/1cd87892343b6620726b112851f0b66cbda4a68f?x=x&version=4%22,avatar_url%22:%20%22https://zulip-avatars.s3.amazonaws.com/1000/1cd87892343b6620726b112851f0b66cbda4a68f?x=x&version=4", + "client": "ZulipElectron", + "content": "This is test messgae 2", + "content_type": "text/x-markdown", + "display_recipient": "abcdefghijkl", + "flags": [ + "read", + "historical" + ], + "id": 159310824, + "is_me_message": false, + "reactions": [], + "recipient_id": 303900, + "sender_email": "bot@zulipchat.com", + "sender_full_name": "Bot", + "sender_id": 113001, + "sender_realm_str": "example", + "sender_short_name": "bot", + "stream_id": 187177, + "subject": "Test subject 2", + "submessages": [], + "timestamp": 1551067006, + "topic_links": [], + "type": "stream" + } + ], + "found_anchor":true, + "found_oldest":false, + "found_newest":true, + "history_limited":false, + "anchor":175270341 +} \ No newline at end of file diff --git a/tests/test_zulip.py b/tests/test_zulip.py new file mode 100644 index 000000000..5afec4a8d --- /dev/null +++ b/tests/test_zulip.py @@ -0,0 +1,238 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2015-2020 Bitergia +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# Authors: +# Venu Vardhan Reddy Tekula +# + + +import unittest +import os + +import httpretty +import pkg_resources + +pkg_resources.declare_namespace('perceval.backends') + +from perceval.backend import BackendCommandArgumentParser +from perceval.backends.core.zulip import (Zulip, + ZulipClient, + ZulipCommand) + +ZULIP_CHAT_URL = 'https://example.zulipchat.com' +ZULIP_CHAT_API_URL = '/api/v1/messages' +ZULIP_MESSAGE_URL = ZULIP_CHAT_URL + ZULIP_CHAT_API_URL + +email = 'bot@zulipchat.com' +api_token = 'aaaa' +AUTH = (email, api_token) + + +def read_file(filename): + with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), filename), 'rb') as f: + content = f.read() + return content + + +def setup_http_server(): + """Setup a mock HTTP server""" + + http_requests = [] + message_page_1 = read_file('data/zulip/message_page_1') + message_page_2 = read_file('data/zulip/message_page_2') + + httpretty.register_uri(httpretty.GET, + ZULIP_MESSAGE_URL, + body=message_page_1, + status=200) + + httpretty.register_uri(httpretty.GET, + ZULIP_MESSAGE_URL, + body=message_page_2, + status=200) + + +class MockedZulipClient(ZulipClient): + """Mocked Zulip client for testing""" + + def __init__(self, url, stream, email, api_token, archive=None, + from_archive=False, ssl_verify=True): + super().__init__(url, stream, email, api_token, + archive=archive, + from_archive=from_archive, + ssl_verify=ssl_verify + ) + + +class TestZulipBackend(unittest.TestCase): + """Zulip backend tests""" + + def test_inialization(self): + """Test whether attributes are initializated""" + + backend = Zulip(url='https://example.zulipchat.com/', stream='abcdefghijkl', + email='bot@zulipchat.com', api_token='aaaa', tag='test') + self.assertEqual(backend.origin, 'https://example.zulipchat.com/abcdefghijkl') + self.assertEqual(backend.url, 'https://example.zulipchat.com/') + self.assertEqual(backend.stream, 'abcdefghijkl') + self.assertEqual(backend.email, 'bot@zulipchat.com') + self.assertEqual(backend.api_token, 'aaaa') + self.assertEqual(backend.tag, 'test') + self.assertTrue(backend.ssl_verify) + self.assertIsNone(backend.client) + + backend = Zulip(url='https://example.zulipchat.com/', stream='abcdefghijkl', + email='bot@zulipchat.com', api_token='aaaa', tag=None) + self.assertEqual(backend.origin, 'https://example.zulipchat.com/abcdefghijkl') + self.assertEqual(backend.tag, 'https://example.zulipchat.com/abcdefghijkl') + + backend = Zulip(url='https://example.zulipchat.com/', stream='abcdefghijkl', + email='bot@zulipchat.com', api_token='aaaa', tag='') + self.assertEqual(backend.origin, 'https://example.zulipchat.com/abcdefghijkl') + self.assertEqual(backend.tag, 'https://example.zulipchat.com/abcdefghijkl') + + backend = Zulip(url='https://example.zulipchat.com/', stream='abcdefghijkl', + email='bot@zulipchat.com', api_token='aaaa', tag='', ssl_verify=False) + self.assertEqual(backend.origin, 'https://example.zulipchat.com/abcdefghijkl') + self.assertEqual(backend.tag, 'https://example.zulipchat.com/abcdefghijkl') + self.assertFalse(backend.ssl_verify) + + def test_has_archiving(self): + """Test if it returns True when has_archiving is called""" + + self.assertTrue(Zulip.has_archiving()) + + def test_has_resuming(self): + """Test if it returns False when has_resuming is called""" + + self.assertTrue(Zulip.has_resuming()) + + @httpretty.activate + def test_fetch(self): + """Test whether a list of messages is returned""" + + setup_http_server() + + backend = Zulip(url='https://example.zulipchat.com/', stream='abcdefghijkl', + email='bot@zulipchat.com', api_token='aaaa') + messages = [m for m in backend.fetch()] + + self.assertEqual(len(messages), 2) + + message = messages[0] + self.assertEqual(message['data']['id'], 159310770) + self.assertEqual(message['origin'], 'https://example.zulipchat.com/abcdefghijkl') + self.assertEqual(message['uuid'], '20d0159b91d0b912886264f2f1dad39689282559') + self.assertEqual(message['updated_on'], 1551066955.0) + self.assertEqual(message['category'], 'message') + self.assertEqual(message['tag'], 'https://example.zulipchat.com/abcdefghijkl') + self.assertEqual(message['data']['content'], 'This is test messgae 1') + self.assertEqual(message['data']['sender_id'], 113001) + self.assertEqual(message['data']['sender_full_name'], 'Bot') + + message = messages[1] + self.assertEqual(message['data']['id'], 159310824) + self.assertEqual(message['origin'], 'https://example.zulipchat.com/abcdefghijkl') + self.assertEqual(message['uuid'], '330afca6d053fb05579e3763c7a553c1ee663cb6') + self.assertEqual(message['updated_on'], 1551067006.0) + self.assertEqual(message['category'], 'message') + self.assertEqual(message['tag'], 'https://example.zulipchat.com/abcdefghijkl') + self.assertEqual(message['data']['content'], 'This is test messgae 2') + self.assertEqual(message['data']['sender_id'], 113001) + self.assertEqual(message['data']['sender_full_name'], 'Bot') + + @httpretty.activate + def test_search_fields_messages(self): + """Test whether the search_fields is properly set""" + + setup_http_server() + + backend = Zulip(url='https://example.zulipchat.com/', stream='abcdefghijkl', + email='bot@zulipchat.com', api_token='aaaa') + + messages = [m for m in backend.fetch()] + message = messages[0] + self.assertEqual(message['search_fields']['item_id'], backend.metadata_id(message['data'])) + self.assertEqual(message['search_fields']['stream'], 'abcdefghijkl') + + +class TestZulipClient(unittest.TestCase): + """Tests for ZulipClient class""" + + def test_init(self): + """Check attributes initialization""" + + client = ZulipClient(url='https://example.zulipchat.com/', stream='abcdefghijkl', + email='bot@zulipchat.com', api_token='aaaa', ssl_verify=True) + self.assertIsInstance(client, ZulipClient) + self.assertEqual(client.email, 'bot@zulipchat.com') + self.assertEqual(client.api_token, 'aaaa') + self.assertTrue(client.ssl_verify) + + client = ZulipClient(url='https://example.zulipchat.com/', stream='abcdefghijkl', + email='bot@zulipchat.com', api_token='aaaa', ssl_verify=False) + self.assertIsInstance(client, ZulipClient) + self.assertEqual(client.email, 'bot@zulipchat.com') + self.assertEqual(client.api_token, 'aaaa') + self.assertFalse(client.ssl_verify) + + +class TestZulipCommand(unittest.TestCase): + """Tests for ZulipCommand class""" + + def test_backend_class(self): + """Test if the backend class is Zulip""" + + self.assertIs(ZulipCommand.BACKEND, Zulip) + + def test_setup_cmd_parser(self): + """Test if it parser object is correctly initialized""" + + parser = ZulipCommand.setup_cmd_parser() + self.assertIsInstance(parser, BackendCommandArgumentParser) + self.assertEqual(parser._backend, Zulip) + + args = ['-t', 'aaaa', + '-e', 'bot@zulipchat.com', + '--tag', 'test', + 'https://example.zulipchat.com/', 'abcdefghijkl'] + + parsed_args = parser.parse(*args) + self.assertEqual(parsed_args.url, 'https://example.zulipchat.com/') + self.assertEqual(parsed_args.stream, 'abcdefghijkl') + self.assertEqual(parsed_args.email, 'bot@zulipchat.com') + self.assertEqual(parsed_args.api_token, 'aaaa') + self.assertEqual(parsed_args.tag, 'test') + self.assertTrue(parsed_args.ssl_verify) + + args = ['-t', 'aaaa', + '-e', 'bot@zulipchat.com', + '--tag', 'test', + '--no-ssl-verify', + 'https://example.zulipchat.com/', 'abcdefghijkl'] + + parsed_args = parser.parse(*args) + self.assertEqual(parsed_args.url, 'https://example.zulipchat.com/') + self.assertEqual(parsed_args.stream, 'abcdefghijkl') + self.assertEqual(parsed_args.email, 'bot@zulipchat.com') + self.assertEqual(parsed_args.api_token, 'aaaa') + self.assertEqual(parsed_args.tag, 'test') + self.assertFalse(parsed_args.ssl_verify) + + +if __name__ == '__main__': + unittest.main(warnings='ignore')