Skip to content

Commit

Permalink
Finalize Sphinx configuration (#64)
Browse files Browse the repository at this point in the history
* Add missing extension to docs

Signed-off-by: ArthurTemporim <arthurrtl@gmail.com>

* Improve source/conf.py

Signed-off-by: ArthurTemporim <arthurrtl@gmail.com>

* Add adapters.rst documentation and solve docs build issue

Signed-off-by: ArthurTemporim <arthurrtl@gmail.com>

* Create docs/source/modules and update components/component.py docstring

Signed-off-by: ArthurTemporim <arthurrtl@gmail.com>

* Apply black to project

Signed-off-by: ArthurTemporim <arthurrtl@gmail.com>

* #24 Refactory forgotten code to SimpleNLU

* #24 git keep static doc folder

* Update docs/source/conf.py and create requirements-docs.txt

Signed-off-by: ArthurTemporim <arthurrtl@gmail.com>

* Reorganize all documentation and improve david/utils/io.py docstrings

Signed-off-by: ArthurTemporim <arthurrtl@gmail.com>

Co-authored-by: Raphael Pinto <raphaelcpinto@gmail.com>
  • Loading branch information
arthurTemporim and ralphg6 committed Feb 12, 2020
1 parent 6194741 commit de7d14d
Show file tree
Hide file tree
Showing 21 changed files with 287 additions and 111 deletions.
7 changes: 7 additions & 0 deletions david/adapters/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,15 @@


class Adapter(Module):
"""
Adapter Class documentation
"""

@classmethod
def validade_data(self, payload: Dict) -> bool:
"""
validate_data method documentation
"""
return True

@classmethod
Expand Down
6 changes: 3 additions & 3 deletions david/components/component.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@

class UnsupportedLanguageError(Exception):
"""Raised when a component is created but the language is not supported.
Attributes:
component -- component name
language -- language that component doesn't support
:param component: component name
:param language: language that component doesn't support
"""

def __init__(self, component: Text, language: Text) -> None:
Expand Down
55 changes: 43 additions & 12 deletions david/components/nlu/simple.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import json
import os
from typing import Any, Dict, Optional, Text
import re
from typing import Any, Dict, List, Optional, Text

import unidecode
from Levenshtein import distance

import david.util as util
from david.components import Component
from david.config import DavidConfig
from david.constants import INTENTS_ATTRIBUTE, TEXT_ATTRIBUTE
Expand All @@ -15,6 +16,32 @@
SIMMILARITY_ERROR_ACCEPTED = 0.3


NON_CONTENT = r"[^\w\d\s]"


def tokenize(stopwords: List[str], sentence: str):
# print "sentence",sentence
# remove accents
sentence = unidecode.unidecode(sentence)
# print "sentence1",sentence
# remove non content
sentence = re.sub(NON_CONTENT, "", sentence)
# print "sentence2",sentence
# lower
sentence = sentence.lower()
# print "sentence3",sentence
# split
tokens = sentence.split(" ")

tokens = list(filter(lambda t: t not in stopwords, tokens))

tokens = list(filter(lambda t: len(t) > 0, tokens))

# print("tokens", tokens)

return tokens


def simmilarity(a, b):
d = distance(a, b)
t = float(len(a) + len(b)) / 2
Expand Down Expand Up @@ -70,18 +97,22 @@ def train(
self, training_data: TrainingData, cfg: DavidConfig, **kwargs: Any
) -> None:

self.intent_model = {}
self.intent_model = {"stopwords": training_data.data["nlu"]["stopwords"]}

intents = {}

for intent, samples in training_data.data["nlu"]["intents"].items():
self.intent_model[intent] = {}
intents[intent] = {}
for sample in samples:
self.intent_model[intent][sample] = {"total": 0, "tokens": {}}
for t in util.tokenize(sample):
self.intent_model[intent][sample]["total"] += 1
if t in self.intent_model[intent][sample]["tokens"]:
self.intent_model[intent][sample]["tokens"][t] += 1
intents[intent][sample] = {"total": 0, "tokens": {}}
for t in tokenize(self.intent_model["stopwords"], sample):
intents[intent][sample]["total"] += 1
if t in intents[intent][sample]["tokens"]:
intents[intent][sample]["tokens"][t] += 1
else:
self.intent_model[intent][sample]["tokens"][t] = 1
intents[intent][sample]["tokens"][t] = 1

self.intent_model["intents"] = intents

def persist(self, file_name: Text, model_dir: Text) -> Optional[Dict[Text, Any]]:
model_file = os.path.join(model_dir, file_name)
Expand All @@ -92,10 +123,10 @@ def process(self, message: Message, **kwargs: Any) -> None:

input = message.get(TEXT_ATTRIBUTE)

tokens = util.tokenize(input)
tokens = tokenize(self.intent_model["stopwords"], input)
# print ("tokens", tokens)
intents = {}
for intent, samples in self.intent_model.items():
for intent, samples in self.intent_model["intents"].items():
intents[intent] = 0
for s, smeta in samples.items():
brutal_score = 0
Expand Down
38 changes: 0 additions & 38 deletions david/util.py
Original file line number Diff line number Diff line change
@@ -1,44 +1,6 @@
import json
import re
from typing import Any, Text

import unidecode


def fetch_stopwords():
return set(line.strip() for line in open("./data/stopwords.txt", "r"))


stopwords = fetch_stopwords()
# stopwords = nltk.corpus.stopwords.words('portuguese')
# print "stopwords",stopwords

NON_CONTENT = r"[^\w\d\s]"


# TODO(ralphg6) Move to SimpleNLU
def tokenize(sentence):
# print "sentence",sentence
# remove accents
sentence = unidecode.unidecode(sentence)
# print "sentence1",sentence
# remove non content
sentence = re.sub(NON_CONTENT, "", sentence)
# print "sentence2",sentence
# lower
sentence = sentence.lower()
# print "sentence3",sentence
# split
tokens = sentence.split(" ")

tokens = list(filter(lambda t: t not in stopwords, tokens))

tokens = list(filter(lambda t: len(t) > 0, tokens))

# print("tokens", tokens)

return tokens


def json_to_string(obj: Any, **kwargs: Any) -> Text:
indent = kwargs.pop("indent", 2)
Expand Down
Empty file added docs/source/_static/.gitkeep
Empty file.
24 changes: 14 additions & 10 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,29 +12,31 @@
#
import os
import sys
sys.path.insert(0, os.path.abspath('../../david/'))

sys.path.insert(0, os.path.abspath("../../"))

from david import version as david_version


# -- Project information -----------------------------------------------------

project = 'David'
copyright = '2020, Raphael Pinto'
author = 'Raphael Pinto'
project = "David"
copyright = "2020, Raphael Pinto"
author = "Raphael Pinto"

# The full version, including alpha/beta/rc tags
release = '0.0.1'
release = david_version.__version__


# -- General configuration ---------------------------------------------------

# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
]
extensions = ["sphinx.ext.autodoc"]

# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
templates_path = ["_templates"]

# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
Expand All @@ -47,9 +49,11 @@
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = 'sphinx_rtd_theme'
html_theme = "sphinx_rtd_theme"

# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
html_static_path = ["_static"]

master_doc = "index"
7 changes: 5 additions & 2 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,11 @@ Welcome to David's documentation!
=================================

.. toctree::
:maxdepth: 2
:caption: Contents:
:maxdepth: 2
:caption: Contents:
:glob:

modules/*



Expand Down
7 changes: 0 additions & 7 deletions docs/source/modules.rst

This file was deleted.

22 changes: 22 additions & 0 deletions docs/source/modules/david.adapters.adapters.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
david.adapters.adapters package
===============================

Submodules
----------

david.adapters.adapters.google module
-------------------------------------

.. automodule:: david.adapters.adapters.google
:members:
:undoc-members:
:show-inheritance:


Module contents
---------------

.. automodule:: david.adapters.adapters
:members:
:undoc-members:
:show-inheritance:
29 changes: 29 additions & 0 deletions docs/source/modules/david.adapters.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
david.adapters package
======================

Subpackages
-----------

.. toctree::

david.adapters.adapters

Submodules
----------

david.adapters.adapter module
-----------------------------

.. automodule:: david.adapters.adapter
:members:
:undoc-members:
:show-inheritance:


Module contents
---------------

.. automodule:: david.adapters
:members:
:undoc-members:
:show-inheritance:
22 changes: 22 additions & 0 deletions docs/source/modules/david.components.dialogue.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
david.components.dialogue package
=================================

Submodules
----------

david.components.dialogue.simple module
---------------------------------------

.. automodule:: david.components.dialogue.simple
:members:
:undoc-members:
:show-inheritance:


Module contents
---------------

.. automodule:: david.components.dialogue
:members:
:undoc-members:
:show-inheritance:
22 changes: 22 additions & 0 deletions docs/source/modules/david.components.nlu.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
david.components.nlu package
============================

Submodules
----------

david.components.nlu.simple module
----------------------------------

.. automodule:: david.components.nlu.simple
:members:
:undoc-members:
:show-inheritance:


Module contents
---------------

.. automodule:: david.components.nlu
:members:
:undoc-members:
:show-inheritance:
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
david.components package
========================

Subpackages
-----------

.. toctree::

david.components.dialogue
david.components.nlu

Submodules
----------

Expand All @@ -12,6 +20,14 @@ david.components.component module
:undoc-members:
:show-inheritance:

david.components.engine module
------------------------------

.. automodule:: david.components.engine
:members:
:undoc-members:
:show-inheritance:


Module contents
---------------
Expand Down

0 comments on commit de7d14d

Please sign in to comment.