Skip to content

Commit

Permalink
Merge branch 'mattermost_improve_fields' of 'https://github.com/zhqua…
Browse files Browse the repository at this point in the history
…n/GrimoireELK'

Merges #982
Closes #982
  • Loading branch information
zhquan committed May 25, 2021
2 parents e25d7b2 + 201ab93 commit df179d7
Show file tree
Hide file tree
Showing 3 changed files with 105 additions and 51 deletions.
120 changes: 73 additions & 47 deletions grimoire_elk/enriched/mattermost.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,11 @@

import logging


from .enrich import Enrich, metadata
from ..elastic_mapping import Mapping as BaseMapping

from grimoirelab_toolkit.datetime import unixtime_to_datetime


logger = logging.getLogger(__name__)


Expand All @@ -44,7 +42,7 @@ def get_elastic_mappings(es_major):
mapping = """
{
"properties": {
"text_analyzed": {
"message_analyzed": {
"type": "text",
"fielddata": true,
"index": true
Expand All @@ -56,7 +54,6 @@ def get_elastic_mappings(es_major):


class MattermostEnrich(Enrich):

# This enricher must compatible with the Slack enricher to reuse the Kibiter panel

mapping = Mapping
Expand Down Expand Up @@ -121,66 +118,59 @@ def get_rich_item(self, item):
message = item['data']

eitem["reply_count"] = 0 # be sure it is always included
eitem['message_id'] = message['id']

# data fields to copy
copy_fields = ["text", "type", "reply_count", "subscribed", "subtype",
"unread_count", "user"]
copy_fields = ["message", "type", "reply_count", "hashtags", "is_pinned"]
for f in copy_fields:
if f in message:
eitem[f] = message[f]
else:
eitem[f] = None

eitem['text_analyzed'] = eitem['text']

eitem['number_attachs'] = 0
if 'attachments' in message and message['attachments']:
eitem['number_attachs'] = len(message['attachments'])
eitem['message_analyzed'] = eitem['message']

eitem['reaction_count'] = 0
if 'reactions' in message:
eitem['reaction_count'] = len(message['reactions'])
eitem['reactions'] = []
for rdata in message['reactions']:
for i in range(0, rdata['count']):
eitem['reactions'].append(rdata["name"])

if 'file' in message:
eitem['file_type'] = message['file']['pretty_type']
eitem['file_title'] = message['file']['title']
eitem['file_size'] = message['file']['size']
eitem['file_name'] = message['file']['name']
eitem['file_mode'] = message['file']['mode']
eitem['file_is_public'] = message['file']['is_public']
eitem['file_is_external'] = message['file']['is_external']
eitem['file_id'] = message['file']['id']
eitem['file_is_editable'] = message['file']['editable']
eitem['file_count'] = 0
if 'metadata' in message:
if 'reactions' in message['metadata']:
eitem['reaction_count'] = len(message['metadata']['reactions'])
eitem['reactions'] = self.__get_reactions(message['metadata']['reactions'])
if 'files' in message['metadata']:
eitem['file_count'] = len(message['metadata']['files'])
eitem['files'] = self.__get_files(message['metadata']['files'])

if 'user_data' in message:
user_data = message['user_data']
eitem['roles'] = user_data['roles']
eitem['position'] = user_data['position']
eitem['team_id'] = None # not exists in Mattermost
if 'timezone' in message['user_data']:
if message['user_data']['timezone']['useAutomaticTimezone']:
eitem['tz'] = message['user_data']['timezone']['automaticTimezone']
if 'timezone' in user_data:
if user_data['timezone']['useAutomaticTimezone']:
eitem['tz'] = user_data['timezone']['automaticTimezone']
else:
eitem['tz'] = message['user_data']['timezone']['manualTimezone']
eitem['tz'] = user_data['timezone']['manualTimezone']
# tz must be in -12h to 12h interval, so seconds -> hours
if eitem['tz']:
eitem['tz'] = round(int(eitem['tz']) / (60 * 60))
if 'is_admin' in message['user_data']:
eitem['is_admin'] = message['user_data']['is_admin']
if 'is_owner' in message['user_data']:
eitem['is_owner'] = message['user_data']['is_owner']
if 'is_primary_owner' in message['user_data']:
eitem['is_primary_owner'] = message['user_data']['is_primary_owner']
if 'profile' in message['user_data']:
if 'title' in message['user_data']['profile']:
eitem['profile_title'] = message['user_data']['profile']['title']
eitem['avatar'] = message['user_data']['profile']['image_32']

eitem['channel_name'] = message['channel_data']['name']
eitem['channel_id'] = message['channel_data']['id']
eitem['channel_created'] = unixtime_to_datetime(message['channel_data']['create_at'] / 1000).isoformat()
eitem['channel_member_count'] = None

if 'channel_data' in message:
channel_data = message['channel_data']
eitem['channel_name'] = channel_data['display_name']
eitem['channel_id'] = channel_data['id']
eitem['channel_create_at'] = unixtime_to_datetime(channel_data['create_at'] / 1000).isoformat()
eitem['channel_delete_at'] = None if channel_data['delete_at'] == 0 else \
unixtime_to_datetime(channel_data['delete_at'] / 1000).isoformat()
eitem['channel_update_at'] = unixtime_to_datetime(channel_data['update_at'] / 1000).isoformat()
eitem['channel_member_count'] = None
eitem['channel_message_count'] = channel_data['total_msg_count']
eitem['channel_team_id'] = channel_data['team_id']

eitem['is_reply'] = False
eitem['parent_id'] = None
if message['parent_id']:
eitem['is_reply'] = True
eitem['parent_id'] = message['parent_id']

eitem = self.__convert_booleans(eitem)

Expand All @@ -196,6 +186,42 @@ def get_rich_item(self, item):
self.add_metadata_filter_raw(eitem)
return eitem

@staticmethod
def __get_files(message):
files = []
for file in message:
new_file = {
'file_user_id': file['user_id'],
'file_post_id': file['post_id'],
'file_create_at': unixtime_to_datetime(file['create_at'] / 1000).isoformat(),
'file_update_at': unixtime_to_datetime(file['update_at'] / 1000).isoformat(),
'file_delete_at': None if file['delete_at'] == 0 else unixtime_to_datetime(
file['delete_at'] / 1000).isoformat(),
'file_name': file['name'],
'file_extension': file['extension'],
'file_size': file['size'],
'file_type': file['mime_type'],
'file_mini_preview': file['mini_preview']
}
files.append(new_file)
return files

@staticmethod
def __get_reactions(message):
reactions = []
for reaction in message:
new_reaction = {
'reaction_user_id': reaction['user_id'],
'reaction_post_id': reaction['post_id'],
'reaction_emoji_name': reaction['emoji_name'],
'reaction_create_at': unixtime_to_datetime(reaction['create_at'] / 1000).isoformat(),
'reaction_update_at': unixtime_to_datetime(reaction['update_at'] / 1000).isoformat(),
'reaction_delete_at': None if reaction['delete_at'] == 0 else unixtime_to_datetime(
reaction['delete_at'] / 1000).isoformat()
}
reactions.append(new_reaction)
return reactions

def __convert_booleans(self, eitem):
""" Convert True/False to 1/0 for better kibana processing """

Expand Down
34 changes: 31 additions & 3 deletions schema/mattermost.csv
Original file line number Diff line number Diff line change
Expand Up @@ -9,31 +9,59 @@ author_org_name,keyword,true,"Organization name."
author_user_name,keyword,true,"Author username from SortingHat."
author_uuid,keyword,true,"Author UUID from SortingHat."
channel_created,date,true,"Date when the channel was created."
channel_delete_at,date,true,"Date when the channel was deleted."
channel_id,keyword,true,"ID of the channel."
channel_message_count,number,true,"Number of messages."
channel_name,keyword,true,"Name of the channel."
channel_team_id,keyword,true,"ID of the channel team."
channel_update_at,date,true,"Date when the channel was updated."
file_count,number,true,"Number of files."
files.file_create_at,date,true,"Date when the file was created."
files.file_extension,keyword,true,"File extension."
files.file_mini_preview,keyword,true,"File mini preview."
files.file_name,keyword,true,"File name."
files.file_post_id,keyword,true,"File post ID."
files.file_size,number,true,"File size."
files.file_type,keyword,true,"File type."
files.file_update_at,date,true,"Date when the file was updated."
files.file_user_id,keyword,true,"File user ID."
grimoire_creation_date,date,true,"Date when the message was updated."
hashtags,keyword,true,"Hashtags."
is_mattermost_message,long,true,"Field containing '1' that allows to sum fields when concatenating with other indexes."
is_pinned,number,true,"1 if the message is pinned or 0 in other case."
is_reply,number,true,"1 if the message is a reply or 0 in other case."
message,keyword,true,"Message."
message_analyzed,keyword,true,"Content message split by terms to allow searching."
message_id,keyword,true,"Message ID."
metadata__enriched_on,date,true,"Date when the item was enriched."
metadata__gelk_backend_name,keyword,true,"Name of the backend used to enrich information."
metadata__gelk_version,keyword,true,"Version of the backend used to enrich information."
metadata__timestamp,date,true,"Date when the item was stored in RAW index."
metadata__updated_on,date,true,"Date when the item was updated in its original data source."
number_attachs,long,true,"Number of message attachments."
origin,keyword,true,"Original URL where the repository was retrieved from."
parent_id,keyword,true,"Parent message ID."
position,keyword,true,"User position."
project,keyword,true,"Project."
project_1,keyword,true,"Project (if more than one level is allowed in project hierarchy)."
reaction_count,long,true,"Number of reactions."
reactions.reaction_create_at,date,true,"Date when the reaction was created.
reactions.reaction_emoji_name,keyword,true,"Name of the emoji."
reactions.reaction_post_id,keyword,true,"Reaction post ID."
reactions.reaction_update_at,date,true,"Date when the reaction was updated."
reactions.reaction_user_id,keyword,true,"Reaction user ID."
reply_count,number,true,"Number of replies."
roles,keyword,true,"User roles."
tag,keyword,true,"Perceval tag."
text_analyzed,text,false,"Content message split by terms to allow searching."
type,keyword,true,"Message type."
tz,long,true,"Timezone in which the message was made by its original author."
user_data_bot,boolean,true,"True if the given user is identified as a bot."
user_data_domain,keyword,true,"Domain associated to the user in SortingHat profile."
user_data_gender,keyword,true,"Message user gender."
user_data_gender_acc,keyword,true,"Accuracy of the user gender."
user_data_id,keyword,true,"User Id from SortingHat."
user_data_multi_org_names,keyword,true,"Multi organization names."
user_data_name,keyword,true,"Name of the user."
user_data_org_name,keyword,true,"Organization name."
user_data_user_name,keyword,true,"Username of the user."
user_data_uuid,keyword,true,"User UUID from SortingHat."
uuid,keyword,true,"Perceval UUID."
uuid,keyword,true,"Perceval UUID."
2 changes: 1 addition & 1 deletion tests/test_mattermost.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def test_raw_to_enrich(self):

for item in self.items:
eitem = enrich_backend.get_rich_item(item)
self.assertEqual(eitem['channel_name'], "test-channel")
self.assertEqual(eitem['channel_name'], "Eclipse Che")

def test_enrich_repo_labels(self):
"""Test whether the field REPO_LABELS is present in the enriched items"""
Expand Down

0 comments on commit df179d7

Please sign in to comment.