diff --git a/.ci/build.sh b/.ci/build.sh index 79a7dda9..2f1e57f8 100755 --- a/.ci/build.sh +++ b/.ci/build.sh @@ -25,6 +25,8 @@ python manage.py migrate python manage.py import_contributors_data python manage.py import_issues_data python manage.py import_merge_requests_data +python manage.py download_nltk_data +python manage.py import_messages_data python manage.py create_config_data python manage.py create_participants python manage.py update_participants_data diff --git a/.gitignore b/.gitignore index 63220721..b4a5410f 100644 --- a/.gitignore +++ b/.gitignore @@ -268,6 +268,9 @@ tags .idea/**/dictionaries .idea/**/shelf +# Generated files +.idea/**/contentModel.xml + # Sensitive or high-churn files .idea/**/dataSources/ .idea/**/dataSources.ids @@ -483,10 +486,5 @@ cscope.po.out _site/ /public/ -# Patch & backup files -*.patch -*.orig -*.diff - # Pytest profile files prof/ diff --git a/.moban.yaml b/.moban.yaml index c92377a9..a643de35 100644 --- a/.moban.yaml +++ b/.moban.yaml @@ -7,6 +7,7 @@ packages: - inactive_issues - data - gci + - gitter - gsoc - gamification - log diff --git a/.nocover.yaml b/.nocover.yaml index 987773ce..fce502a9 100644 --- a/.nocover.yaml +++ b/.nocover.yaml @@ -8,6 +8,7 @@ nocover_file_globs: - community/git.py - gci/*.py - gsoc/*.py + - gitter/nlp/dict_tagger.py - log/*.py - meta_review/handler.py - model/*.py diff --git a/community/settings.py b/community/settings.py index ac3a1655..57ca6228 100644 --- a/community/settings.py +++ b/community/settings.py @@ -40,6 +40,7 @@ 'model', 'gamification', 'meta_review', + 'gitter', 'django.contrib.contenttypes', 'django.contrib.staticfiles', 'django_distill', diff --git a/gamification/migrations/0002_auto_20180811_2208.py b/gamification/migrations/0002_auto_20180811_2208.py new file mode 100644 index 00000000..6d87c64c --- /dev/null +++ b/gamification/migrations/0002_auto_20180811_2208.py @@ -0,0 +1,30 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.15 on 2018-08-11 22:08 +from __future__ import unicode_literals + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('gamification', '0001_initial'), + ] + + operations = [ + migrations.AddField( + model_name='activity', + name='answers_given', + field=models.IntegerField(default=0, null=True), + ), + migrations.AddField( + model_name='activity', + name='ignore_messages', + field=models.IntegerField(default=0, null=True), + ), + migrations.AddField( + model_name='activity', + name='questions_asked', + field=models.IntegerField(default=0, null=True), + ), + ] diff --git a/gamification/models.py b/gamification/models.py index d9de46ee..6bc035d2 100644 --- a/gamification/models.py +++ b/gamification/models.py @@ -22,6 +22,15 @@ class Activity(models.Model): # The date and time this activity was last updated updated_at = models.DateTimeField(null=True) + # Number of questions asked by the participant + questions_asked = models.IntegerField(default=0, null=True) + + # Number of answers given by the participant + answers_given = models.IntegerField(default=0, null=True) + + # Number of ignore messages send by the participant + ignore_messages = models.IntegerField(default=0, null=True) + def __str__(self): return self.name @@ -81,7 +90,8 @@ def __str__(self): class Meta: ordering = ['-score'] - def add_points(self, points, activity_string, performed_at, updated_at): + def add_points(self, points, activity_string, performed_at, updated_at, + questions_asked, answers_given, ignore_messages): """ Update score, level and add activities peformed. @@ -94,16 +104,26 @@ def add_points(self, points, activity_string, performed_at, updated_at): and time when this activity was performed. :param updated_at: a datetime object representing the date and time when this activity was performed. + :param question_asked: an integer representing the number of questions asked + by the participant during performing this activity. + :param answers_given: an integer representing the number of answers given + by the participant during performing this activity. + :param ignore_messages: an integer representing the number of ignore messages + sent by the participant during performing this activity. """ self.update_score_and_level(points) self.add_activity(points, activity_string, performed_at, updated_at, + questions_asked, + answers_given, + ignore_messages, ) def deduct_points(self, points_to_deduct, activity_string, - performed_at, updated_at): + performed_at, updated_at, questions_asked, + answers_given, ignore_messages): """ Deduct points for performing some specific activities. """ @@ -111,6 +131,9 @@ def deduct_points(self, points_to_deduct, activity_string, activity_string, performed_at, updated_at, + questions_asked, + answers_given, + ignore_messages, ) def find_level_for_score(self, score): @@ -132,7 +155,8 @@ def update_score_and_level(self, points): self.level = self.find_level_for_score(self.score) self.save() - def add_activity(self, points, activity_string, performed_at, updated_at): + def add_activity(self, points, activity_string, performed_at, updated_at, + questions_asked, answers_given, ignore_messages): """ Add a new activity to the participant. """ @@ -141,7 +165,10 @@ def add_activity(self, points, activity_string, performed_at, updated_at): points=points, performer=self, performed_at=performed_at, - updated_at=updated_at) + updated_at=updated_at, + questions_asked=questions_asked, + answers_given=answers_given, + ignore_messages=ignore_messages) self.activities.add(activity) def find_badges_for_activity(self, activities): diff --git a/gamification/process/update.py b/gamification/process/update.py index dd5b928c..db157165 100644 --- a/gamification/process/update.py +++ b/gamification/process/update.py @@ -10,6 +10,7 @@ from gamification.labels import NEGATIVE_POINT_LABELS from gamification.data.points import MERGE_REQUEST_CLOSED_WITHOUT_MERGE from gamification.models import Participant +from gitter.models import Message def get_mr_objects(): @@ -46,6 +47,18 @@ def update_participants_data_with_mr(mr): mr_author = Participant.objects.get(username=mr.author) mr_performed_at = mr.created_at mr_updated_at = mr.updated_at + mr_questions_asked = Message.objects.filter( + message_type='question', + sent_at__gte=mr_performed_at, + sent_at__lte=mr_updated_at).count() + mr_answers_given = Message.objects.filter( + message_type='answer', + sent_at__gte=mr_performed_at, + sent_at__lte=mr_updated_at).count() + mr_ignore_messages = Message.objects.filter( + message_type='ignore', + sent_at__gte=mr_performed_at, + sent_at__lte=mr_updated_at).count() if mr.state == 'merged': mr_labels = mr.labels.values('name') @@ -59,6 +72,9 @@ def update_participants_data_with_mr(mr): mr_activity_string, mr_performed_at, mr_updated_at, + mr_questions_asked, + mr_answers_given, + mr_ignore_messages, ) # Get all the issues this mr is closing try: @@ -70,6 +86,18 @@ def update_participants_data_with_mr(mr): issue_labels = issue.labels.values('name') issue_performed_at = issue.created_at issue_updated_at = issue.updated_at + issue_questions_asked = Message.objects.filter( + message_type='question', + sent_at__gte=issue_performed_at, + sent_at__lte=issue_updated_at).count() + issue_answers_given = Message.objects.filter( + message_type='answer', + sent_at__gte=issue_performed_at, + sent_at__lte=issue_updated_at).count() + issue_ignore_messages = Message.objects.filter( + message_type='ignore', + sent_at__gte=issue_performed_at, + sent_at__lte=issue_updated_at).count() # Get activity and points based on the labels on the issue issue_points, issue_activity_string = get_activity_with_points( 'issue', issue_labels) @@ -82,6 +110,9 @@ def update_participants_data_with_mr(mr): issue_activity_string, issue_performed_at, issue_updated_at, + issue_questions_asked, + issue_answers_given, + issue_ignore_messages, ) elif mr.state == 'closed': @@ -91,6 +122,9 @@ def update_participants_data_with_mr(mr): mr_activity_string, mr_performed_at, mr_updated_at, + mr_questions_asked, + mr_answers_given, + mr_ignore_messages, ) @@ -122,10 +156,25 @@ def update_participants_data_with_issue(issue): issue_points, issue_activity = get_activity_with_points('issue', labels) issue_performed_at = issue.created_at issue_updated_at = issue.updated_at + issue_questions_asked = Message.objects.filter( + message_type='question', + sent_at__gte=issue_performed_at, + sent_at__lte=issue_updated_at).count() + issue_answers_given = Message.objects.filter( + message_type='answer', + sent_at__gte=issue_performed_at, + sent_at__lte=issue_updated_at).count() + issue_ignore_messages = Message.objects.filter( + message_type='ignore', + sent_at__gte=issue_performed_at, + sent_at__lte=issue_updated_at).count() issue_author.deduct_points(issue_points, issue_activity, issue_performed_at, issue_updated_at, + issue_questions_asked, + issue_answers_given, + issue_ignore_messages, ) issue_author.save() diff --git a/gamification/tests/test_management_commands.py b/gamification/tests/test_management_commands.py index 7519d3ad..70cc9afd 100644 --- a/gamification/tests/test_management_commands.py +++ b/gamification/tests/test_management_commands.py @@ -81,6 +81,7 @@ class UpdateParticipantsTest(TestCase): def setUpTestData(cls): call_command('import_issues_data') call_command('import_merge_requests_data') + call_command('import_messages_data') call_command('create_config_data') call_command('create_participants') call_command('update_participants_data') @@ -98,3 +99,17 @@ def test_command_update_particiapants_data(self): number_of_badges = participant.badges.all().count() self.assertEquals(number_of_badges, 2) + + activity1 = participant.activities.all()[0] + + # Number of questions aksed during performing this + # activity should be one + self.assertEquals(activity1.questions_asked, 1) + + # Number of answers given during performing this + # activity should be one + self.assertEquals(activity1.answers_given, 1) + + # Number of ignore messages send during performing this + # activity should be one + self.assertEquals(activity1.ignore_messages, 1) diff --git a/gamification/tests/test_models.py b/gamification/tests/test_models.py index 898b0c80..711e95a6 100644 --- a/gamification/tests/test_models.py +++ b/gamification/tests/test_models.py @@ -30,12 +30,21 @@ def test_field_label(self): 'performed_at').verbose_name updated_at = activity._meta.get_field( 'updated_at').verbose_name + questions_asked = activity._meta.get_field( + 'questions_asked').verbose_name + answers_given = activity._meta.get_field( + 'answers_given').verbose_name + ignore_messages = activity._meta.get_field( + 'ignore_messages').verbose_name self.assertEquals(name, 'name') self.assertEquals(points, 'points') self.assertEquals(number_of_times, 'number of times') self.assertEquals(performer, 'performer') self.assertEquals(updated_at, 'updated at') self.assertEquals(performed_at, 'performed at') + self.assertEquals(questions_asked, 'questions asked') + self.assertEquals(answers_given, 'answers given') + self.assertEquals(ignore_messages, 'ignore messages') def test_object_name_is_activity_name(self): activity = Activity.objects.get(id=1) @@ -212,10 +221,16 @@ def test_add_points_method(self): activity_string = 'Created a difficulty/newcomer bug issue' performed_at = '2017-08-24 05:59:31+00:00' updated_at = '2018-06-02 17:06:18+00:00' + questions_asked = 1 + answers_given = 1 + ignore_messages = 1 participant.add_points(points, activity_string, performed_at, updated_at, + questions_asked, + answers_given, + ignore_messages, ) self.assertEquals(participant.score, 10) self.assertEquals(participant.level.name, 'Beginner-I') @@ -229,10 +244,16 @@ def test_deduct_points_method(self): activity_string = 'Merge request was closed without merge' performed_at = '2017-08-24 05:59:31+00:00' updated_at = '2018-06-02 17:06:18+00:00' + questions_asked = 1 + answers_given = 1 + ignore_messages = 1 participant.deduct_points(points, activity_string, performed_at, updated_at, + questions_asked, + answers_given, + ignore_messages, ) self.assertEquals(participant.score, 0) self.assertEquals(participant.level.name, 'Fresher') @@ -277,10 +298,16 @@ def test_add_activity_method(self): activity = 'Created a difficulty/newcomer type/bug issue' performed_at = '2017-08-24 05:59:31+00:00' updated_at = '2018-06-02 17:06:18+00:00' + questions_asked = 1 + answers_given = 1 + ignore_messages = 1 participant.add_activity(points, activity, performed_at, updated_at, + questions_asked, + answers_given, + ignore_messages, ) # After applying add_activity @@ -294,6 +321,9 @@ def test_add_activity_method(self): activity, performed_at, updated_at, + questions_asked, + answers_given, + ignore_messages, ) # A new activity is added diff --git a/gitter/__init__.py b/gitter/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/gitter/apps.py b/gitter/apps.py new file mode 100644 index 00000000..0144ab2a --- /dev/null +++ b/gitter/apps.py @@ -0,0 +1,5 @@ +from django.apps import AppConfig + + +class GitterConfig(AppConfig): + name = 'gitter' diff --git a/gitter/management/commands/download_nltk_data.py b/gitter/management/commands/download_nltk_data.py new file mode 100644 index 00000000..bbbecdac --- /dev/null +++ b/gitter/management/commands/download_nltk_data.py @@ -0,0 +1,11 @@ +from django.core.management.base import BaseCommand + +import nltk + + +class Command(BaseCommand): + help = 'Download nltk data' + + def handle(self, *args, **options): + nltk.download('averaged_perceptron_tagger') + nltk.download('punkt') diff --git a/gitter/management/commands/import_messages_data.py b/gitter/management/commands/import_messages_data.py new file mode 100644 index 00000000..25ed4ce8 --- /dev/null +++ b/gitter/management/commands/import_messages_data.py @@ -0,0 +1,12 @@ +from django.core.management.base import BaseCommand + +from gitter.messages import get_messages, import_messages + + +class Command(BaseCommand): + help = 'Import Newcomer Messages' + + def handle(self, *args, **options): + messages = get_messages() + if messages: + import_messages(messages) diff --git a/gitter/messages.py b/gitter/messages.py new file mode 100644 index 00000000..5ed99151 --- /dev/null +++ b/gitter/messages.py @@ -0,0 +1,66 @@ +import logging + +import requests + +from data.webservices import webservices_url +from gitter.nlp.score import sentiment_score +from gitter.models import Message + + +def get_messages(): + """ + Get all the messages send by newcomers on the gitter rooms. + """ + logger = logging.getLogger(__name__) + import_url = webservices_url('messages') + headers = {'Content-Type': 'application/json'} + try: + response = requests.get( + url=import_url, + headers=headers, + ) + response.raise_for_status() + except Exception as e: + logger.error(e) + return + + data = response.json() + return data + + +def message_type(message): + """ + Get the type of a message from a message_dict. + + :param message: a message dict of type: + { + "identifier": "5b588269c0fa8016e7379191", + "room": "offtopic", + "sent_at": "2018-07-25 14:00:09.456000+00:00", + "sent_by": "Naveenaidu", + "text": "How can I solve this issue?", + }, + :return: a string representing the type of the message. + i.e. 'question' or 'answer'. + + """ + text = message['text'] + score = sentiment_score(text) + if score >= 0: + if len(text) > 60: + m_type = 'answer' + else: + m_type = 'ignore' + return m_type + else: + m_type = 'question' + return m_type + + +def import_messages(messages): + message_objects_list = [] + for message in messages: + m_type = message_type(message) + message['message_type'] = m_type + message_objects_list.append(Message(**message)) + Message.objects.bulk_create(message_objects_list) diff --git a/gitter/migrations/0001_initial.py b/gitter/migrations/0001_initial.py new file mode 100644 index 00000000..81ca31c1 --- /dev/null +++ b/gitter/migrations/0001_initial.py @@ -0,0 +1,28 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.15 on 2018-08-11 22:59 +from __future__ import unicode_literals + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ] + + operations = [ + migrations.CreateModel( + name='Message', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('identifier', models.CharField(max_length=500)), + ('room', models.CharField(max_length=300)), + ('text', models.TextField()), + ('sent_at', models.DateTimeField()), + ('sent_by', models.CharField(max_length=300)), + ('message_type', models.CharField(max_length=100)), + ], + ), + ] diff --git a/gitter/migrations/__init__.py b/gitter/migrations/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/gitter/models.py b/gitter/models.py new file mode 100644 index 00000000..0b19e434 --- /dev/null +++ b/gitter/models.py @@ -0,0 +1,13 @@ +from django.db import models + + +class Message(models.Model): + identifier = models.CharField(max_length=500) + room = models.CharField(max_length=300) + text = models.TextField() + sent_at = models.DateTimeField() + sent_by = models.CharField(max_length=300) + message_type = models.CharField(max_length=100) + + def __str__(self): + return (str(self.identifier) + ': ' + self.text) diff --git a/gitter/nlp/dict_tagger.py b/gitter/nlp/dict_tagger.py new file mode 100644 index 00000000..db6c884a --- /dev/null +++ b/gitter/nlp/dict_tagger.py @@ -0,0 +1,67 @@ +from ruamel.yaml import YAML + + +class DictionaryTagger(object): + def __init__(self, dictionary_paths): + yaml = YAML() + files = [open(path, 'r') for path in dictionary_paths] + dictionaries_list = [yaml.load(dict_file) for dict_file in files] + map(lambda x: x.close(), files) + self.dictionaries = {} + self.max_key_size = 0 + for curr_dict in dictionaries_list: + for key in curr_dict: + if key in self.dictionaries: + self.dictionaries[key].extend(curr_dict[key]) + else: + self.dictionaries[key] = curr_dict[key] + self.max_key_size = max(self.max_key_size, len(key)) + + def tag(self, pos_tagged_sentences): + return ( + [self.tag_sentence(sentence) for sentence in pos_tagged_sentences]) + + def tag_sentence(self, sentence, tag_with_lemmas=False): + """ + the result is only one tagging of all the possible ones. + The resulting tagging is determined by these two priority rules: + - longest matches have higher priority + - search is made from left to right + """ + tag_sentences = [] + N = len(sentence) + if self.max_key_size == 0: + self.max_key_size = N + i = 0 + while (i < N): + j = min(i + self.max_key_size, N) # avoid overflow + tagged = False + while (j > i): + expression_form = ' '.join( + [word[0] for word in sentence[i:j]]).lower() + expression_lemma = ' '.join( + [word[1] for word in sentence[i:j]]).lower() + if tag_with_lemmas: + literal = expression_lemma + else: + literal = expression_form + if literal in self.dictionaries: + is_single_token = j - i == 1 + original_position = i + i = j + taggings = [tag for tag in self.dictionaries[literal]] + tagged_expression = ( + expression_form, expression_lemma, taggings) + # if the tagged literal is a single token, + # conserve its previous taggings: + if is_single_token: + original_token_taggings = sentence[original_position][2] + tagged_expression[2].extend(original_token_taggings) + tag_sentences.append(tagged_expression) + tagged = True + else: + j = j - 1 + if not tagged: + tag_sentences.append(sentence[i]) + i += 1 + return tag_sentences diff --git a/gitter/nlp/pos_tagger.py b/gitter/nlp/pos_tagger.py new file mode 100644 index 00000000..1f61c2a0 --- /dev/null +++ b/gitter/nlp/pos_tagger.py @@ -0,0 +1,25 @@ +import nltk + + +class POSTagger(object): + + def pos_tag(self, sentences): + """ + input format: list of lists of words + e.g.: [['this', 'is', 'a', 'sentence'], + ['this', 'is', 'another', 'one']] + output format: list of lists of tagged tokens. + Each tagged tokens has a + form, a lemma, and a list of tags + e.g: [[('this', 'this', ['DT']), + ('is', 'be', ['VB']), ('a', 'a', ['DT']), + ('sentence', 'sentence', ['NN'])], + [('this', 'this', ['DT']), ('is', 'be', ['VB']), + ('another', 'another', ['DT']), ('one', 'one', ['CARD'])]] + """ + + pos = [nltk.pos_tag(sentence) for sentence in sentences] + # adapt format + pos = [[(word, word, [postag]) for (word, postag) in sentence] + for sentence in pos] + return pos diff --git a/gitter/nlp/score.py b/gitter/nlp/score.py new file mode 100644 index 00000000..f38d65fa --- /dev/null +++ b/gitter/nlp/score.py @@ -0,0 +1,34 @@ +from gitter.nlp.tagger import MessageTagger + + +def value_of(sentiment): + """ + Get a value based on the sentiment. + + :param sentiment: a string representing the sentiment of + a word. i.e. 'ignore' or 'question'. + :return: an integer representing the value of the sentiment. + """ + if sentiment == 'ignore': + return 1 + if sentiment == 'question': + return -1 + return 0 + + +def sentiment_score(text): + """ + Get the score of a message. + + :param text: a string representing the text of a message. + e.g. 'How to solve this issue?' + :return: an integer representing the value of a message. + """ + files = [ + 'gitter/nlp/training_data/question.yml', + 'gitter/nlp/training_data/ignore.yml', + ] + message_tagger = MessageTagger(files) + return sum( + [value_of(tag) for sentence in message_tagger.tagged_message( + text) for token in sentence for tag in token[2]]) diff --git a/gitter/nlp/splitter.py b/gitter/nlp/splitter.py new file mode 100644 index 00000000..8b8fd8aa --- /dev/null +++ b/gitter/nlp/splitter.py @@ -0,0 +1,22 @@ +import nltk + + +class Splitter(object): + + def __init__(self): + self.nltk_splitter = nltk.data.load('tokenizers/punkt/english.pickle') + self.nltk_tokenizer = nltk.tokenize.TreebankWordTokenizer() + + def split(self, text): + """ + Split a paragraph into list of lists of words. + + :param text: a paragraph of text + :return: a list of lists of words. + e.g.: [['this', 'is', 'a', 'sentence'], + ['this', 'is', 'another', 'one']] + """ + sentences = self.nltk_splitter.tokenize(text) + tokenized_sentences = [ + self.nltk_tokenizer.tokenize(sent) for sent in sentences] + return tokenized_sentences diff --git a/gitter/nlp/tagger.py b/gitter/nlp/tagger.py new file mode 100644 index 00000000..5ee676cb --- /dev/null +++ b/gitter/nlp/tagger.py @@ -0,0 +1,42 @@ +from gitter.nlp.splitter import Splitter +from gitter.nlp.pos_tagger import POSTagger +from gitter.nlp.dict_tagger import DictionaryTagger + + +class MessageTagger: + + def __init__(self, files): + self.splitter = Splitter() + self.postagger = POSTagger() + self.dicttagger = DictionaryTagger(files) + + def tagged_message(self, text): + """ + Process a message with NLP classes. + + Step-1: Use Splitter class to split the text message + into list of lists of word. + + Step-2: Use POSTagger class to tag the splitted message + with pos. + + Step-3: Use DictionaryTagger to tag the message based on + provided dictionaries. i.e. 'training_data/question.yml' + and 'training_data/answers.yml'. + + :param text: a string representing the message. + e.g. 'How to solve this issue?' + :return: a list of lists of words tagged with DictionaryTagger + and POSTagger. + e.g. [[('how', 'how', ['question', 'WRB']), + ('can', 'can', ['MD']), + ('I', 'I', ['PRP']), + ('solve', 'solve', ['VB']), + ('this', 'this', ['DT']), + ('issue', 'issue', ['NN']), + ('?', '?', ['question', '.'])]] + """ + splitted_message = self.splitter.split(text) + pos_tagged_message = self.postagger.pos_tag(splitted_message) + dict_tagged_message = self.dicttagger.tag(pos_tagged_message) + return dict_tagged_message diff --git a/gitter/nlp/training_data/__init__.py b/gitter/nlp/training_data/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/gitter/nlp/training_data/ignore.yml b/gitter/nlp/training_data/ignore.yml new file mode 100644 index 00000000..3ac67a08 --- /dev/null +++ b/gitter/nlp/training_data/ignore.yml @@ -0,0 +1,38 @@ +welcome: [ignore] +wlcm: [ignore] +thank you: [ignore] +thanks: [ignore] +thx: [ignore] +okay: [ignore] +ok: [ignore] +great: [ignore] +awesome: [ignore] +nice: [ignore] +superb: [ignore] +anytime: [ignore] +assign: [ignore] +corobo assign: [ignore] +sorry: [ignore] +yeah: [ignore] +yup: [ignore] +yep: [ignore] +hello world: [ignore] +hello: [ignore] +updated: [ignore] +please: [ignore] +pls: [ignore] +on it: [ignore] +got it: [ignore] +sure: [ignore] +replied: [ignore] +review: [ignore] +oh: [ignore] +ah: [ignore] +afk: [ignore] +cool: [ignore] +looking: [ignore] +Here you go: [ignore] +alright: [ignore] +am I: [ignore] +joined: [ignore] +no problem: [ignore] diff --git a/gitter/nlp/training_data/question.yml b/gitter/nlp/training_data/question.yml new file mode 100644 index 00000000..031b1fc2 --- /dev/null +++ b/gitter/nlp/training_data/question.yml @@ -0,0 +1,14 @@ +when: [question] +why: [question] +how: [question] +?: [question] +what: [question] +where: [question] +which: [question] +can you: [question] +can anyone: [question] +can I: [question] +can someone: [question] +question: [question] +should I: [question] +could you: [question] diff --git a/gitter/tests/__init__.py b/gitter/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/gitter/tests/message_assertions.py b/gitter/tests/message_assertions.py new file mode 100644 index 00000000..ef5bc3df --- /dev/null +++ b/gitter/tests/message_assertions.py @@ -0,0 +1,27 @@ +from django.test import TestCase + +from gitter.messages import message_type + + +class MessageAssertion(TestCase): + + def assertIsQuestion(self, message): + message_dict = {} + message_dict['text'] = message + type_of_message = message_type(message_dict) + if type_of_message != 'question': + raise AssertionError('This message is not a question') + + def assertIsAnswer(self, message): + message_dict = {} + message_dict['text'] = message + type_of_message = message_type(message_dict) + if type_of_message != 'answer': + raise AssertionError('This message is not a answer') + + def assertIsIgnore(self, message): + message_dict = {} + message_dict['text'] = message + type_of_message = message_type(message_dict) + if type_of_message != 'ignore': + raise AssertionError('This message is not a ignore') diff --git a/gitter/tests/test_management_commands.py b/gitter/tests/test_management_commands.py new file mode 100644 index 00000000..ac49e7e6 --- /dev/null +++ b/gitter/tests/test_management_commands.py @@ -0,0 +1,16 @@ +from django.test import TestCase +from django.core.management import call_command + +from gitter.models import Message + + +class ImportContributorDataTest(TestCase): + + @classmethod + def setUpTestData(cls): + call_command('import_messages_data') + + def test_command_import_contributors_data(self): + messages = Message.objects.all() + self.assertIn('testuser', + [message.sent_by for message in messages]) diff --git a/gitter/tests/test_message_type.py b/gitter/tests/test_message_type.py new file mode 100644 index 00000000..bcb57f36 --- /dev/null +++ b/gitter/tests/test_message_type.py @@ -0,0 +1,84 @@ +from gitter.tests.message_assertions import MessageAssertion + + +class MessgeTypeTest(MessageAssertion): + + def test_ignore_message(self): + """ + This method is responsible for testing the positive + sentiment score. It means that the message should be ignored. + """ + self.assertIsIgnore('Please assign me this issue') + self.assertIsIgnore('Thank you.') + self.assertIsIgnore('Okay thank you so much :)') + self.assertIsIgnore('I have replied to your comment') + self.assertIsIgnore('OK thanks i got it') + self.assertIsIgnore('ok Cool') + self.assertIsIgnore('I have made the PR') + self.assertIsIgnore('Yes, pushing the commit.') + self.assertIsIgnore('Thanks a lot! Its working now :D') + self.assertIsIgnore('I\'ll check them again.') + self.assertIsIgnore('I tried it locally and it works') + self.assertIsIgnore('I\'ll try never to repeat these mistakes again.') + self.assertIsIgnore('It still doesnt open up the pr') + self.assertIsIgnore('Cool ill do that') + self.assertIsIgnore('Okay .....then I will wait.Anyways thank you') + self.assertIsIgnore('Ok thank you, sorry for the multipost') + self.assertIsIgnore('Okay thanks! And I saw it, will fix it quick.') + self.assertIsIgnore('Thanks. I read first I have to run on a project.') + self.assertIsIgnore( + 'Please @jayvdb can help me with solving this issue.') + + def test_question_message(self): + """ + This method is responsible for testing the positive + sentiment score. It means that the message should be ignored. + """ + self.assertIsQuestion('I\'ve got a question can i use the corobo ' + 'commands in any channel or is there a ' + 'special channel?') + self.assertIsQuestion('Can someone tell me if this pr is okay ' + 'or it needs more changes? ') + self.assertIsQuestion('Can I work on this?') + self.assertIsQuestion('Can someone assign to me?') + self.assertIsQuestion('How did you put a tickmark on a commit ?') + self.assertIsQuestion('what to do after rebase?') + self.assertIsQuestion('how to Use github interface for review?') + self.assertIsQuestion('But dont i have to wait for my previous ' + 'PR to get merged?') + self.assertIsQuestion('is there a separate bears room?') + self.assertIsQuestion('What does invoking with a single option mean?') + self.assertIsQuestion('Any hint in which file should i add this test?') + self.assertIsQuestion('Should i click on the create pull request?') + self.assertIsQuestion('is the bot down?') + self.assertIsQuestion('can someone tell me which is the docstring ' + 'file in -utils') + self.assertIsQuestion('I have done a newcomer issue, and also ' + 'reviewed a newcomer issue, What furthere ' + 'steps for becoming a developer?') + self.assertIsQuestion( + 'ok, can you give me some more time to work on this?') + self.assertIsQuestion('I had a question why is corobo down?') + self.assertIsQuestion('ok, so what should I do now?') + self.assertIsQuestion('Okay I will stick to it but why will ' + 'he won\'t approve it?') + self.assertIsQuestion('How do you squash a commit? I\'m ' + 'sorry I\'m new to this.') + self.assertIsQuestion( + 'I actually used that guide, did I do something wrong?') + self.assertIsQuestion('Do i need to wait for it to be merged?') + self.assertIsQuestion('Hi, may i know why the bot check for ' + 'failed for continuous-integration?') + self.assertIsQuestion('Should i add the file , commit and push?') + self.assertIsQuestion('so how to go about it?') + self.assertIsQuestion('does bear support python 3.7?') + + def test_answer_message(self): + """ + This method is responsible for testing the positive + sentiment score. It means that the message should be ignored. + """ + self.assertIsAnswer('The commit looks good to me .You can ' + 'wait for it to get merged.') + self.assertIsAnswer('No create a pull request.The Travis CI ' + 'build will continue its checking.') diff --git a/gitter/tests/test_messages.py b/gitter/tests/test_messages.py new file mode 100644 index 00000000..f4a5e6c9 --- /dev/null +++ b/gitter/tests/test_messages.py @@ -0,0 +1,12 @@ +import requests_mock + +from django.test import TestCase + +from gitter.messages import get_messages + + +class GetMessagesTest(TestCase): + + def test_get_messages(self): + with requests_mock.Mocker(): + get_messages() diff --git a/gitter/tests/test_models.py b/gitter/tests/test_models.py new file mode 100644 index 00000000..79e6a792 --- /dev/null +++ b/gitter/tests/test_models.py @@ -0,0 +1,36 @@ +from django.test import TestCase + +from gitter.models import Message + + +class LabelModelTest(TestCase): + + @classmethod + def setUpTestData(cls): + # Set up non-modified objects used by all methods + Message.objects.create(identifier='5b5f1dc4d4527523f640004c', + room='test/test', + text='Is this issue valid?', + sent_at='2018-07-25 14:00:09.456000', + sent_by='testuser', + message_type='question') + + def test_field_label(self): + message = Message.objects.get(identifier='5b5f1dc4d4527523f640004c') + identifier = message._meta.get_field('identifier').verbose_name + room = message._meta.get_field('room').verbose_name + text = message._meta.get_field('text').verbose_name + sent_at = message._meta.get_field('sent_at').verbose_name + sent_by = message._meta.get_field('sent_by').verbose_name + message_type = message._meta.get_field('message_type').verbose_name + self.assertEquals(identifier, 'identifier') + self.assertEquals(room, 'room') + self.assertEquals(text, 'text') + self.assertEquals(sent_at, 'sent at') + self.assertEquals(sent_by, 'sent by') + self.assertEquals(message_type, 'message type') + + def test_object_name_is_repr_return(self): + message = Message.objects.get(identifier='5b5f1dc4d4527523f640004c') + expected_object_name = '5b5f1dc4d4527523f640004c: Is this issue valid?' + self.assertEquals(expected_object_name, str(message)) diff --git a/requirements.txt b/requirements.txt index 700c2551..afd234fa 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,3 +11,4 @@ markdown2 python_dateutil PyGithub xmltodict +nltk diff --git a/setup.cfg b/setup.cfg index 2ce7836a..25c2b1ca 100644 --- a/setup.cfg +++ b/setup.cfg @@ -12,6 +12,7 @@ testpaths = inactive_issues data gci + gitter gsoc gamification log @@ -66,6 +67,7 @@ source = inactive_issues data gci + gitter gsoc gamification log @@ -81,6 +83,7 @@ omit = community/git.py gci/*.py gsoc/*.py + gitter/nlp/dict_tagger.py log/*.py meta_review/handler.py model/*.py