In [3]:
def __count_paragraphs(text: str) -> int:
    """
    Count the number of paragraphs in the given text.

    Args:
        text (str): The text to count paragraphs in.

    Returns:
        int: The number of paragraphs in the text.
    """
    paragraphs = re.split('\n\n', text)
    return len(paragraphs)

In [7]:
def ____read_words_and_punctuation(file_path, num_words):
    with open(file_path, 'r', encoding='utf-8') as file:
        word_count = 0
        paragraph = ''
        prev_line_empty = False

        for line in file:
            line = line.strip()
            if not line:
                if not prev_line_empty:
                    paragraph += '\n\n'
                prev_line_empty = True
                continue
            prev_line_empty = False

            words = line.split()
            for word in words:
                paragraph += word + ' '
                word_count += 1
                if word_count >= num_words:
                    return paragraph.rstrip(), word_count

    return paragraph.rstrip(), word_count

In [3]:
def _get_text_with_paragraph(file_path, num_words):
    def paragraphs_generator(file_path):
        with open(file_path, 'r') as file:
            paragraph = []
            for line in file:
                line = line.strip()
                if line:
                    paragraph.append(line)
                elif paragraph:
                    yield ' '.join(paragraph)
                    paragraph = []
            if paragraph:
                yield ' '.join(paragraph)

    selected_paragraphs = []
    word_count = 0
    paragraph_count = 0
    
    for paragraph in paragraphs_generator(file_path):
        words = paragraph.split()
        paragraph_word_count = len(words)

        if word_count + paragraph_word_count <= num_words:
            selected_paragraphs.append(paragraph)
            word_count += paragraph_word_count
            paragraph_count += 1
        else:
            break

    final_text = '\n\n'.join(selected_paragraphs)
    return final_text, word_count, paragraph_count




In [4]:
def count_paragraphs(text):
    # Split the text into paragraphs using double line breaks
    paragraphs = re.split('\n\n|\n\s+', text)
    # Count the number of paragraphs
    return len(paragraphs)

In [5]:
def read_words_and_punctuation(text, num_words):
    # Split the text into paragraphs using double line breaks
    paragraphs = text.split('\n\n|\n\s+')
    
    # Initialize variables to keep track of the selected paragraphs and the total word count
    selected_paragraphs = []
    word_count = 0
    
    # Iterate through the paragraphs
    for paragraph in paragraphs:
        # Split the paragraph into words
        words = paragraph.split()
        # Calculate the number of words in the paragraph
        paragraph_word_count = len(words)
        
        # If adding this paragraph doesn't exceed the desired word count, add it
        if word_count + paragraph_word_count <= num_words:
            selected_paragraphs.append(paragraph)
            word_count += paragraph_word_count
        else:
            # If adding the paragraph would exceed the word count, break the loop
            break
    
    # Join the selected paragraphs with double line breaks to form the final text
    final_text = '\n\n'.join(selected_paragraphs)
    
    return final_text


In [3]:
class Texteval():
    def __init__(self):
        self._lang = None
        self._nlp = None
        self._hyphen = None
        # self._dc = Dictionaries()
        # self._sw = Stopwords()
        self._input_file = None
        self._requested_words = None
        self._content = None
        self._word_count = 0
        
        self._paragraph_count = 0
        self._sentence_count = 0
        self._punctuation_count = 0
        self._syllables_count = 0


    def count_sentences_and_punctuation(text):
        # Process the text
        doc = self._nlp(text)

        sentence_count = 0
        punctuation_count = 0

        for sentence in self._pyphen.sents:
            sentence_count += 1
            punctuation_count += sum(1 for token in sentence if token.text in string.punctuation)

        return sentence_count, punctuation_count


    def count_syllables(self, word):
        return len(self._hyphen.inserted(word).split("-"))

    def get_text_with_paragraph(self,file_path, num_words):
        def paragraphs_generator(file_path):
            with open(file_path, 'r') as file:
                paragraph = []
                for line in file:
                    line = line.strip()
                    if line:
                        paragraph.append(line)
                    elif paragraph:
                        yield ' '.join(paragraph)
                        paragraph = []
                if paragraph:
                    yield ' '.join(paragraph)

        selected_paragraphs = []
        word_count = 0
        paragraph_count = 0
        syllables_count = 0
        letter_count = 0

        for paragraph in paragraphs_generator(file_path):
            words = paragraph.split()
            paragraph_word_count = len(words)

            if word_count + paragraph_word_count <= num_words:
                selected_paragraphs.append(paragraph)
                word_count += paragraph_word_count
                paragraph_count += 1

                # Count syllables and letters in words within the paragraph
                for word in words:
                    syllables_count += self.count_syllables(word)
                    letter_count += sum(1 for letter in word if letter.isalpha())
            else:
                break

        final_text = '\n\n'.join(selected_paragraphs)

        self._content = final_text
        self._word_count = word_count
        self._paragraph_count = paragraph_count
        self._sentence_count = sentence_count
        self._punctuation_count = punctuation_count
        self._syllables_count = syllables_count
        self._letter_count = letter_count
               

    @property
    def lang(self):
        return self._lang
    
    lang.setter
    def lang(self, lang):
        self._lang = lang
        # Load the language models
        if language == "en":
            self._nlp = spacy.load("en_core_web_sm")
            self._hyphen = pyphen.Pyphen(lang='en_US')
        elif language == 'es':
            self._nlp = spacy.load("es_core_news_sm")
            self._hyphen = pyphen.Pyphen(lang='es_ES')
        elif language == 'it':
            self._nlp = spacy.load("it_core_news_sm")
            self._hyphen = pyphen.Pyphen(lang='it_IT')
        else:
            raise ValueError("Unsupported language")  
        
        
    @property
    def content(self):
        return self._content

    @content.setter
    def content(self, content):
        self._content = content
        
    @property
    def word_count(self):
        return self._word_count
    
    @property
    def paragraph_count(self):
        return self._paragraph_count
    
    @property
    def sentence_count(self):
        return self._sentence_count
    
    @property
    def punctuation_count(self):
        return self._punctuation_count
    
    @property
    def syllables_count(self):
        return self._syllables_count
    
    @property
    def letter_count(self):
        return self._letter_count
        
    @property
    def input_file(self):
        return self._input_file
    
    @input_file.setter
    def input_file(self, file_info):
        if isinstance(file_info, tuple) and len(file_info) == 2:
            filename, numwords = file_info
            success, message = diy_file_validate(filename)
            if not success:
                sys.exit(f"File {filename}: {message}")
            self._input_file = filename
            self._requested_words = numwords
            self.get_text_with_paragraph(filename, numwords)
        else:
            raise ValueError("Input must be a tuple with (filename, numwords).")

         

In [13]:
class Texteval:
    def __init__(self):
        self._lang = None
        self._nlp = None
        self._hyphen = None
        self._input_file = None
        self._requested_words = None
        self._content = None
        self._word_count = 0
        self._paragraph_count = 0
        self._sentence_count = 0
        self._punctuation_count = 0
        self._syllables_count = 0
        self._letter_count = 0

    def count_sentences_and_punctuation(self, text):
        doc = self._nlp(text)
        sentence_count = 0
        punctuation_count = 0
        for sentence in doc.sents:
            sentence_count += 1
            punctuation_count += sum(1 for token in sentence if token.text in string.punctuation)
        return sentence_count, punctuation_count

    def count_syllables(self, word):
        return len(self._hyphen.inserted(word).split("-"))

    def get_text_with_paragraph(self, file_path, num_words):
        def count_words(text):
            # Use the \w+ pattern to match word characters (letters, numbers, underscores) in Spanish.
            # This pattern may need to be adjusted based on your specific requirements for word boundaries.
            word_pattern = r'\w+'

            # Find all matches of the word pattern in the text.
            words = re.findall(word_pattern, text, re.UNICODE)

            # Return the count of words.
            return len(words))

        selected_paragraphs = []
        word_count = 0
        paragraph_count = 0
        syllables_count = 0
        letter_count = 0

        with open(file_path, 'r', encoding='utf-8') as file:
            current_paragraph = []
            for line in file:
                line = line.strip()
                if line:
                    current_paragraph.append(line)
                    word_count += count_words(line)

                    # Process and count syllables and letters here as well if needed
                elif current_paragraph:
                    # Check if adding this paragraph exceeds the word limit
                    if word_count <= num_words:
                        selected_paragraphs.append('\n'.join(current_paragraph))
                        paragraph_count += 1
                        current_paragraph = []
                    else:
                        break

            # Handle the last paragraph, if any
            if current_paragraph:
                selected_paragraphs.append('\n'.join(current_paragraph))
                paragraph_count += 1

        final_text = '\n\n'.join(selected_paragraphs)

        self._content = final_text
        self._word_count = word_count
        self._paragraph_count = paragraph_count
        self._sentence_count, self._punctuation_count = self.count_sentences_and_punctuation(final_text)
        self._syllables_count = syllables_count
        self._letters_count = letter_count


    @property
    def lang(self):
        return self._lang

    @lang.setter
    def lang(self, language):
        self._lang = language
        if language == "en":
            self._nlp = spacy.load("en_core_web_sm")
            self._hyphen = None  # Modify this for your use case
        elif language == 'es':
            self._nlp = spacy.load("es_core_news_sm")
            self._hyphen = pyphen.Pyphen(lang='es')
        elif language == 'it':
            self._nlp = spacy.load("it_core_news_sm")
            self._hyphen = pyphen.Pyphen(lang='it_IT')
        else:
            raise ValueError("Unsupported language")

    @property
    def content(self):
        return self._content

    @property
    def word_count(self):
        return self._word_count

    @property
    def paragraph_count(self):
        return self._paragraph_count

    @property
    def sentence_count(self):
        return self._sentence_count

    @property
    def punctuation_count(self):
        return self._punctuation_count

    @property
    def syllables_count(self):
        return self._syllables_count

    @property
    def letter_count(self):
        return self._letter_count

    @property
    def input_file(self):
        return self._input_file

    @input_file.setter
    def input_file(self, file_info):
        if isinstance(file_info, tuple) and len(file_info) == 2:
            filename, num_words = file_info
            success, message = diy_file_validate(filename)
            if not success:
                sys.exit(f"File {filename}: {message}")
            self._input_file = filename
            self._requested_words = num_words
            self.get_text_with_paragraph(filename, num_words)
        else:
            raise ValueError("Input must be a tuple with (filename, num_words)")