## Synonym Replacement


In [None]:
!pip install nltk



In [None]:
import nltk
from nltk.corpus import wordnet
import random

In [None]:
# Download required NLTK data files
nltk.download('wordnet')
nltk.download('punkt')

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [None]:
def get_synonym(word):
    synonyms = set()
    for syn in wordnet.synsets(word):
        for lemma in syn.lemmas():
            if lemma.name().lower() != word.lower():
                synonyms.add(lemma.name().replace('_', ' '))
    return synonyms

In [None]:
def synonym_replace(paragraph, level):
    words = nltk.word_tokenize(paragraph)
    num_words = len(words)
    if level < 1 or level > 5:
        raise ValueError("Level must be between 1 and 5")

    replace_count = num_words * level // 5

    words_to_replace = random.sample(range(num_words), replace_count)

    for i in words_to_replace:
        word = words[i]
        synonyms = get_synonym(word)
        if synonyms:
            replacement = random.choice(list(synonyms))
            words[i] = replacement

    return ' '.join(words)

In [None]:
paragraph = "This is an example paragraph for testing the synonym replacement function. It should replace words based on the specified level."
level = 5
result = synonym_replace(paragraph, level)

In [None]:
result

'This cost Associate in Nursing exercise paragraph for try out the equivalent word substitute mapping . information technology should supersede Word of God establish along the define tier .'

## Word Insertion

In [None]:
import nltk
import random

In [None]:
nltk.download('punkt')
nltk.download('words')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Unzipping corpora/words.zip.


True

In [None]:
from nltk.corpus import words

In [1]:
def add_random_characters(paragraph, level):
    if level < 1 or level > 5:
        raise ValueError("Level must be between 1 and 5")

    sentences = nltk.sent_tokenize(paragraph)
    severity_ratio = level / 5

    new_sentences = []

    for sentence in sentences:
        words_in_sentence = nltk.word_tokenize(sentence)
        num_perturbations = int(len(words_in_sentence) * severity_ratio)

        for _ in range(num_perturbations):
            word_index = random.randint(0, len(words_in_sentence) - 1)
            word = words_in_sentence[word_index]

            if len(word) > 1:  # To ensure there's space to add perturbations
                if random.choice([True, False]):
                    # Add a space at a random position
                    position = random.randint(0, len(word))
                    word = word[:position] + ' ' + word[position:]
                else:
                    # Add a random character at a random position
                    random_char = random.choice(string.ascii_letters)
                    position = random.randint(0, len(word))
                    word = word[:position] + random_char + word[position:]

                # Update the word in the list
                words_in_sentence[word_index] = word

        new_sentences.append(' '.join(words_in_sentence))

    return ' '.join(new_sentences)

In [None]:
paragraph = "This is an example paragraph for testing the random word addition function. It should add words based on the specified level."


In [None]:
level = 1
result = add_random_words(paragraph, level)

In [None]:
result

'withdraw This squawflower is an example paragraph for testing the random word addition function . dissipation It turbosupercharger should add words based on the specified level .'

## Character Level Perturbation

In [None]:
import nltk
import random
import string

# Download required NLTK data files
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [None]:
def insert_random_char(word):
    pos = random.randint(0, len(word))
    char = random.choice(string.ascii_letters)
    return word[:pos] + char + word[pos:]

In [None]:
def remove_random_char(word):
    if len(word) == 1:
        return word
    pos = random.randint(0, len(word) - 1)
    return word[:pos] + word[pos + 1:]

In [None]:
def replace_random_char(word):
    pos = random.randint(0, len(word) - 1)
    char = random.choice(string.ascii_letters)
    return word[:pos] + char + word[pos + 1:]

In [None]:
def perturb_word(word):
    if len(word) == 1:
        return word
    perturbation_type = random.choice(['insert', 'remove', 'replace'])
    if perturbation_type == 'insert':
        return insert_random_char(word)
    elif perturbation_type == 'remove':
        return remove_random_char(word)
    elif perturbation_type == 'replace':
        return replace_random_char(word)

In [None]:
def character_perturbation(paragraph, level):
    if level < 1 or level > 5:
        raise ValueError("Level must be between 1 and 5")

    words = nltk.word_tokenize(paragraph)
    num_words = len(words)
    severity_ratio = level / 5
    words_to_perturb = random.sample(range(num_words), int(num_words * severity_ratio))

    for i in words_to_perturb:
        words[i] = perturb_word(words[i])

    return ' '.join(words)

In [None]:
paragraph = "This is an example paragraph for testing the character perturbation function. It should perturb words based on the specified level."
level = 3
result = character_perturbation(paragraph, level)
print(result)

This Rs n Lxample paragraph for testing theP Hcharacter perturbation functin . VIt should perturb words ased n theK Fspecified leRel .


## Grammatical Mistakes

In [None]:
import nltk
import random
import re

# Download required NLTK data files
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [None]:
def subject_verb_agreement(sentence):
    # Example: "He go to school." instead of "He goes to school."
    words = nltk.word_tokenize(sentence)
    for i, word in enumerate(words):
        if word.lower() in ['is', 'are', 'was', 'were', 'has', 'have', 'goes', 'go']:
            if random.choice([True, False]):
                words[i] = word[:-1] if word.endswith('s') else word + 's'
    return ' '.join(words)

In [None]:
def incorrect_tense(sentence):
    # Example: "He went to school." to "He gone to school."
    words = nltk.word_tokenize(sentence)
    verbs = [word for word in words if wordnet.synsets(word, pos='v')]
    if verbs:
        verb_to_change = random.choice(verbs)
        tense_forms = ['go', 'gone', 'going']  # Simplified tense forms
        new_verb = random.choice(tense_forms)
        sentence = sentence.replace(verb_to_change, new_verb, 1)
    return sentence

In [None]:
def missing_punctuation(sentence):
    # Example: "He went to school" to "He went to school."
    if not sentence.endswith('.'):
        sentence += '.'
    return sentence

In [None]:
def random_capitalization(sentence):
    # Example: "He went to School."
    words = sentence.split()
    random_word = random.choice(words)
    if random.choice([True, False]):
        random_word = random_word.capitalize() if random_word.islower() else random_word.lower()
    return sentence.replace(random_word, random_word, 1)

In [None]:
def incorrect_article(sentence):
    # Example: "He is a engineer." instead of "He is an engineer."
    words = nltk.word_tokenize(sentence)
    for i, word in enumerate(words):
        if word.lower() in ['a', 'an']:
            words[i] = 'an' if word == 'a' else 'a'
    return ' '.join(words)

In [None]:

def introduce_grammatical_errors(sentence):
    functions = [subject_verb_agreement, incorrect_tense, missing_punctuation, random_capitalization, incorrect_article]
    random.shuffle(functions)
    return functions[0](sentence)

In [None]:
def grammatical_mistakes(paragraph, level):
    if level < 1 or level > 5:
        raise ValueError("Level must be between 1 and 5")

    sentences = nltk.sent_tokenize(paragraph)
    severity_ratio = level / 5
    num_sentences_to_perturb = int(len(sentences) * severity_ratio)
    sentences_to_perturb = random.sample(range(len(sentences)), num_sentences_to_perturb)

    for i in sentences_to_perturb:
        sentences[i] = introduce_grammatical_errors(sentences[i])

    return ' '.join(sentences)

In [None]:
paragraph = "He goes to school. She has a cat. They were playing in the park."
level = 5
result = grammatical_mistakes(paragraph, level)
print(result)

He goe to school . She has a cat . They were playing in the park.


## Re-arrange Sentences

In [None]:
import nltk
import random

# Download required NLTK data files
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [None]:
def rearrange_sentences(paragraph, level):
    if level < 1 or level > 5:
        raise ValueError("Level must be between 1 and 5")

    sentences = nltk.sent_tokenize(paragraph)
    num_sentences = len(sentences)
    severity_ratio = level / 5
    num_sentences_to_rearrange = int(num_sentences * severity_ratio)

    indices_to_rearrange = random.sample(range(num_sentences), num_sentences_to_rearrange)
    sentences_to_rearrange = [sentences[i] for i in indices_to_rearrange]

    # Shuffle the selected sentences
    random.shuffle(sentences_to_rearrange)

    # Place the shuffled sentences back into their original positions
    for i, index in enumerate(indices_to_rearrange):
        sentences[index] = sentences_to_rearrange[i]

    return ' '.join(sentences)

In [None]:
paragraph = "He goes to school. She has a cat. They were playing in the park. It was a sunny day. Everyone was happy."
level = 3
result = rearrange_sentences(paragraph, level)
print(result)

He goes to school. She has a cat. Everyone was happy. They were playing in the park. It was a sunny day.


## Re-arrange words in a sentence

In [None]:
def rearrange_words_in_sentence(sentence, severity_ratio):
    words = nltk.word_tokenize(sentence)
    num_words = len(words)
    num_words_to_rearrange = int(num_words * severity_ratio)

    if num_words_to_rearrange <= 1:
        return sentence  # No rearrangement needed if 1 or fewer words to rearrange

    indices_to_rearrange = random.sample(range(num_words), num_words_to_rearrange)
    words_to_rearrange = [words[i] for i in indices_to_rearrange]

    # Shuffle the selected words
    random.shuffle(words_to_rearrange)

    # Place the shuffled words back into their original positions
    for i, index in enumerate(indices_to_rearrange):
        words[index] = words_to_rearrange[i]

    return ' '.join(words)

In [None]:
def rearrange_words(paragraph, level):
    if level < 1 or level > 5:
        raise ValueError("Level must be between 1 and 5")

    severity_ratio = level / 5
    sentences = nltk.sent_tokenize(paragraph)
    rearranged_sentences = [rearrange_words_in_sentence(sentence, severity_ratio) for sentence in sentences]

    return ' '.join(rearranged_sentences)

In [None]:
paragraph = "He goes to school. She has a cat. They were playing in the park. It was a sunny day. Everyone was happy."
level = 3
result = rearrange_words(paragraph, level)
print(result)

school goes to He . She has cat a . playing were park in the They . It sunny a was day . Everyone was . happy


## Adding Random Sentences

In [None]:
import nltk
import random
import string

# Download required NLTK data files
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [None]:
def generate_random_sentence():
    subjects = ["The cat", "A dog", "The bird", "A child", "An old man", "The woman", "A car", "The teacher"]
    verbs = ["jumps", "runs", "flies", "drives", "sleeps", "eats", "sings", "talks"]
    objects = ["over the fence", "to the store", "in the sky", "on the road", "on the bed", "a meal", "a song", "to the students"]

    subject = random.choice(subjects)
    verb = random.choice(verbs)
    obj = random.choice(objects)

    sentence = f"{subject} {verb} {obj}."
    return sentence

In [None]:
def add_unnecessary_sentences(paragraph, level):
    if level < 1 or level > 5:
        raise ValueError("Level must be between 1 and 5")

    sentences = nltk.sent_tokenize(paragraph)
    num_sentences = len(sentences)
    severity_ratio = level / 5
    num_extra_sentences = int(num_sentences * severity_ratio * 5)  # 5/6 times bigger

    # Generate random sentences to add
    extra_sentences = [generate_random_sentence() for _ in range(num_extra_sentences)]

    # Combine original and extra sentences
    combined_sentences = sentences + extra_sentences
    random.shuffle(combined_sentences)

    return ' '.join(combined_sentences)

In [None]:
# Example usage
paragraph = "He goes to school. She has a cat. They were playing in the park. It was a sunny day. Everyone was happy."
level = 5
result = add_unnecessary_sentences(paragraph, level)
print(result)

A dog flies to the students. An old man eats a meal. A dog talks on the bed. A dog talks on the road. It was a sunny day. The woman drives to the students. The bird flies to the students. Everyone was happy. She has a cat. An old man sleeps to the store. An old man sings to the students. A car sleeps to the store. A dog eats on the bed. An old man flies on the road. The cat sings over the fence. An old man jumps over the fence. They were playing in the park. A car runs in the sky. The cat runs on the road. The woman flies to the students. He goes to school. A car jumps over the fence. The woman sleeps in the sky. The cat runs to the students. An old man drives a meal. The cat talks on the road. A child jumps a meal. An old man flies a meal. The woman jumps on the bed. A car runs a song.
