dpalmasan · supersonic1999 · Apr 15, 2021 · Apr 15, 2021 · Apr 16, 2021 · Apr 16, 2021
diff --git a/README.md b/README.md
@@ -1,5 +1,5 @@
 <p align="center">
-<img style="width: 50%; height: 50%" src="https://raw.githubusercontent.com/dpalmasan/TRUNAJOD2.0/master/imgs/trunajod_logo.png">
+<img style="width: 30%; height: 30%" src="https://raw.githubusercontent.com/dpalmasan/TRUNAJOD2.0/master/imgs/trunajod_logo.png">
 </p>
 
 # TRUNAJOD: A text complexity library for text analysis built on spaCy

diff --git a/docs/api_reference/ttr.rst b/docs/api_reference/ttr.rst
@@ -6,4 +6,4 @@ Type Token Ratios
 .. automodule:: TRUNAJOD.ttr
     :members:
 
-.. bibliography:: ttr.bib
+.. bibliography:: ttr.bib
diff --git a/docs/api_reference/ttr_ref.bib b/docs/api_reference/ttr_ref.bib
@@ -0,0 +1,6 @@
+@misc{herdan1961problemes,
+  title={Probl{\`e}mes et m{\'e}thodes de la statistique linguistique},
+  author={Herdan, Gustav},
+  year={1961},
+  publisher={JSTOR}
+}
diff --git a/imgs/trunajod_logo.png b/imgs/trunajod_logo.png
diff --git a/src/TRUNAJOD/lexico_semantic_norms.py b/src/TRUNAJOD/lexico_semantic_norms.py
@@ -13,6 +13,7 @@
 We provide two downloadable models of these variables, which come from
 :cite:`duchon2013espal` and :cite:`guasch2016spanish`.
 """
+from spacy.tokens import Doc
 from TRUNAJOD.lexicosemantic_norms_espal import LEXICOSEMANTIC_ESPAL
 from TRUNAJOD.lexicosemantic_norms_espal import LSNorm
 from TRUNAJOD.utils import lemmatize
@@ -28,7 +29,12 @@ class LexicoSemanticNorm(object):
     :cite:`guasch2016spanish`.
     """
 
-    def __init__(self, doc, lexico_semantic_norm_dict, lemmatizer=None):
+    def __init__(
+        self,
+        doc: Doc,
+        lexico_semantic_norm_dict: dict,
+        lemmatizer: dict = None,
+    ):
         """Initialize lexico semantic norm object.
 
         Calculate average over number of tokens given a text.
@@ -102,47 +108,47 @@ def __init__(self, doc, lexico_semantic_norm_dict, lemmatizer=None):
             self.__context_avilability /= count
             self.__familiarity /= count
 
-    def get_arousal(self):
+    def get_arousal(self) -> float:
         """Get arousal.
 
         :return: Average arousal.
         :rtype: float
         """
         return self.__arousal
 
-    def get_concreteness(self):
+    def get_concreteness(self) -> float:
         """Get concreteness.
 
         :return: Average concreteness.
         :rtype: float
         """
         return self.__concreteness
 
-    def get_context_availability(self):
+    def get_context_availability(self) -> float:
         """Get context_availability.
 
         :return: Average context_availability.
         :rtype: float
         """
         return self.__context_avilability
 
-    def get_familiarity(self):
+    def get_familiarity(self) -> float:
         """Get familiarity.
 
         :return: Average familiarity.
         :rtype: float
         """
         return self.__familiarity
 
-    def get_imageability(self):
+    def get_imageability(self) -> float:
         """Get imageability.
 
         :return: Average imageability.
         :rtype: float
         """
         return self.__imageability
 
-    def get_valence(self):
+    def get_valence(self) -> float:
         """Get valence.
 
         :return: Average valence.
@@ -151,7 +157,7 @@ def get_valence(self):
         return self.__valence
 
 
-def get_conc_imag_familiarity(doc):
+def get_conc_imag_familiarity(doc: Doc) -> [float, float, float]:
     """Get lexico-semantic variables.
 
     Computes three lexico-semantic variables: Concreteness, Imageability and

diff --git a/src/TRUNAJOD/semantic_measures.py b/src/TRUNAJOD/semantic_measures.py
@@ -6,9 +6,10 @@
 semantic measurements require word vectors (word embeddings) obtained from
 CORPUS semantics.
 """
+from spacy.tokens import Doc
 
 
-def avg_w2v_semantic_similarity(docs, N):
+def avg_w2v_semantic_similarity(docs: Doc, N: int) -> float:
     """Compute average semantic similarity between adjacent sentences.
 
     This is using word2vec :cite:`mikolov2013word2vec` model based on SPACY
@@ -43,7 +44,7 @@ def avg_w2v_semantic_similarity(docs, N):
     return avg_sim / float(N - 1)
 
 
-def get_synsets(lemma, synset_dict):
+def get_synsets(lemma: str, synset_dict: dict) -> str:
     """Return synonym set given a word lemma.
 
     The function requires that the synset_dict is passed into it. In our case
@@ -61,7 +62,7 @@ def get_synsets(lemma, synset_dict):
     return synset_dict.get(lemma, {lemma})
 
 
-def overlap(lemma_list_group, synset_dict):
+def overlap(lemma_list_group: list, synset_dict: dict) -> float:
     """Compute average overlap in a text.
 
     Computes semantic synset overlap (synonyms), based on a lemma list group

diff --git a/src/TRUNAJOD/utils.py b/src/TRUNAJOD/utils.py
@@ -2,6 +2,8 @@
 """Utility functions for TRUNAJOD library."""
 from enum import Enum
 
+from spacy.tokens import Doc
+
 
 class SupportedModels(str, Enum):
     """Enum for supported Doc models."""
@@ -10,7 +12,7 @@ class SupportedModels(str, Enum):
     STANZA = "stanza"
 
 
-def flatten(list_of_lists):
+def flatten(list_of_lists: {}) -> {}:
     """Flatten a list of list.
 
     This is a utility function that takes a list of lists and
@@ -25,7 +27,9 @@ def flatten(list_of_lists):
     return [item for sublist in list_of_lists for item in sublist]
 
 
-def get_sentences_lemmas(docs, lemma_dict, stopwords=[]):  # pragma: no cover
+def get_sentences_lemmas(
+    docs: Doc, lemma_dict: dict, stopwords=[]
+) -> {}:  # pragma: no cover
     """Get lemmas from sentences.
 
     Get different types of lemma measurements, such as noun lemmas, verb
@@ -88,7 +92,7 @@ def get_sentences_lemmas(docs, lemma_dict, stopwords=[]):  # pragma: no cover
     )
 
 
-def get_stopwords(filename):
+def get_stopwords(filename: str) -> set:
     """Read stopword list from file.
 
     Assumes that the list is defined as a newline separated words. It is
@@ -109,7 +113,9 @@ def get_stopwords(filename):
     return stopwords
 
 
-def get_token_lemmas(doc, lemma_dict, stopwords=[]):  # pragma: no cover
+def get_token_lemmas(
+    doc: Doc, lemma_dict: dict, stopwords=[]
+) -> {}:  # pragma: no cover
     """Return lemmas from a sentence.
 
     From a sentence, extracts the following lemmas:
@@ -170,7 +176,7 @@ def get_token_lemmas(doc, lemma_dict, stopwords=[]):  # pragma: no cover
     )
 
 
-def is_adjective(pos_tag):
+def is_adjective(pos_tag: str) -> bool:
     """Return ``True`` if ``pos_tag`` is ``ADJ``, False otherwise.
 
     :param pos_tag: Part of Speech tag
@@ -181,7 +187,7 @@ def is_adjective(pos_tag):
     return pos_tag == "ADJ"
 
 
-def is_adverb(pos_tag):
+def is_adverb(pos_tag: str) -> bool:
     """Return ``True`` if ``pos_tag`` is ``ADV``, False otherwise.
 
     :param pos_tag: Part of Speech tag
@@ -192,7 +198,7 @@ def is_adverb(pos_tag):
     return pos_tag == "ADV"
 
 
-def is_noun(pos_tag):
+def is_noun(pos_tag: str) -> bool:
     """Return ``True`` if ``pos_tag`` is ``NOUN`` or ``PROPN``, False otherwise.
 
     :param pos_tag: Part of Speech tag
@@ -203,7 +209,7 @@ def is_noun(pos_tag):
     return pos_tag == "PROPN" or pos_tag == "NOUN"
 
 
-def is_pronoun(pos_tag):
+def is_pronoun(pos_tag: str) -> bool:
     """Return ``True`` if ``pos_tag`` is ``PRON``, False otherwise.
 
     :param pos_tag: Part of Speech tag
@@ -214,7 +220,7 @@ def is_pronoun(pos_tag):
     return pos_tag == "PRON"
 
 
-def is_stopword(word, stopwords):
+def is_stopword(word: str, stopwords: {}) -> bool:
     """Return ``True`` if ``word`` is in ``stopwords``, False otherwise.
 
     :param word: Word to be checked
@@ -227,7 +233,7 @@ def is_stopword(word, stopwords):
     return word in stopwords
 
 
-def is_verb(pos_tag):
+def is_verb(pos_tag: str) -> bool:
     """Return ``True`` if ``pos_tag`` is ``VERB``, False otherwise.
 
     :param pos_tag: Part of Speech tag
@@ -238,7 +244,7 @@ def is_verb(pos_tag):
     return pos_tag == "VERB"
 
 
-def is_word(pos_tag):
+def is_word(pos_tag: str) -> bool:
     """Return ``True`` if ``pos_tag`` is not punctuation, False otherwise.
 
     This method checks that the ``pos_tag`` does not belong to the following
@@ -252,7 +258,7 @@ def is_word(pos_tag):
     return pos_tag != "PUNCT" and pos_tag != "SYM" and pos_tag != "SPACE"
 
 
-def lemmatize(lemma_dict, word):
+def lemmatize(lemma_dict: {}, word: str) -> str:
     """Lemmatize a word.
 
     Lemmatizes a word using a lemmatizer which is represented as a dict that
@@ -272,7 +278,7 @@ def lemmatize(lemma_dict, word):
     return lemma_dict.get(word, word)
 
 
-def process_text(text, sent_tokenize):
+def process_text(text: str, sent_tokenize) -> {}:
     """Process text by tokenizing sentences given a tokenizer.
 
     :param text: Text to be processed
@@ -285,7 +291,7 @@ def process_text(text, sent_tokenize):
     return sent_tokenize(text)
 
 
-def read_text(filename):
+def read_text(filename: str) -> str:
     """Read a ``utf-8`` encoded text file and returns the text as ``string``.
 
     This is just a utily function, that is not recommended to use if the text