In [221]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.tag import pos_tag
from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet as wn
import random

# Ensure all necessary NLTK data is downloaded
#nltk.download('stopwords')
#nltk.download('punkt')
#nltk.download('averaged_perceptron_tagger')
#nltk.download('wordnet')
#nltk.download('omw-1.4')
word_dict ={}
# Function to map NLTK's POS tags to WordNet's POS tags
def get_wordnet_pos(treebank_tag):
    if treebank_tag.startswith('J'):
        return wn.ADJ
    elif treebank_tag.startswith('V'):
        return wn.VERB
    elif treebank_tag.startswith('N'):
        return wn.NOUN
    elif treebank_tag.startswith('R'):
        return wn.ADV
    else:
        return wn.NOUN  # Default to noun if not found

# Initialize lemmatizer
lemmatizer = WordNetLemmatizer()

# Clean and normalize a word by removing non-alphanumeric characters
def clean_word(word):
    return ''.join(char for char in word if char.isalnum())

# Process the sentence: clean it, remove stopwords, lemmatize, and find synonyms
sentence = "Such an amazing movie cannot complain."
stop_words = set(stopwords.words('english'))

# Tokenize, clean, and remove stopwords
words = [clean_word(word) for word in word_tokenize(sentence.lower()) if word.lower() not in stop_words and word.isalnum()]

# POS tagging
tagged_words = pos_tag(words)

# Lemmatize words based on their POS tags
lemmatized_words = [lemmatizer.lemmatize(word, pos=get_wordnet_pos(tag)) for word, tag in tagged_words]

# Function to find synonyms and calculate similarity
def find_synonyms_and_similarity(word, pos):
    base_synsets = wn.synsets(word, pos=pos)
    if not base_synsets:
        print(f"No synsets found for {word}.")
        return
    
    base_synset = base_synsets[0]  # Use the first synset as the base for comparison
    synonyms = set()
    similarities = {}

    for synset in wn.synsets(word, pos=pos):
        for lemma in synset.lemmas():
            synonym = lemma.name()
            if synonym != word:
                synonyms.add(synonym)
                synonym_synsets = wn.synsets(synonym, pos=pos)
                if synonym_synsets:
                    similarity = base_synset.wup_similarity(synonym_synsets[0])
                    if similarity is not None:
                        similarities[synonym] = similarity
    
    for synonym, similarity in similarities.items():
        if word_dict.get(word) is None:
            word_dict[word] = [] 
        word_dict[word].append([synonym,similarity])
# Print synonyms and similarity scores for each lemmatized word
for word, tag in tagged_words:
    wordnet_pos = get_wordnet_pos(tag)
    find_synonyms_and_similarity(word, wordnet_pos)


No synsets found for complain.


In [222]:

def replace_with_similar_word(sentence, synonym_dict):
    """Replace a random word in the sentence with its most similar synonym based on similarity scores.

    Args:
        sentence (str): The sentence where a word will be replaced.
        synonym_dict (dict): A dictionary where keys are words from the sentence and 
                             values are lists of tuples (synonym, similarity_score).

    Returns:
        str: The updated sentence with a word replaced by its most similar synonym.

    Raises:
        ValueError: If synonym_dict is empty or does not contain synonyms for any word in the sentence.
    """
    if not synonym_dict:
        raise ValueError("The synonym dictionary is empty, no synonyms to choose from.")

    # Split the sentence into words
    sentence_words = sentence.split()

    # Choose a random word from the sentence that has a synonym
    words_with_synonyms = [word for word in sentence_words if word in synonym_dict]
    if not words_with_synonyms:
        raise ValueError("No words in the sentence have synonyms in the dictionary.")

    word_to_replace = random.choice(words_with_synonyms)

    # Choose the synonym with the highest similarity score
    synonyms_with_scores = synonym_dict[word_to_replace]
    replacement_word = max(synonyms_with_scores, key=lambda x: x[1])[0]

    # Replace the word in the sentence
    updated_sentence = ' '.join([
        replacement_word if word == word_to_replace else word
        for word in sentence_words
    ])

    return updated_sentence

In [223]:
def clean_word(word):
    """Return a cleaned version of the word."""
    return ''.join(char for char in word if char.isalnum()).lower()

def replace_specific_word(sentence, word_to_replace, replacement_word):
    """Replace a specific word in the sentence with the provided replacement word."""
    words = sentence.split()
    replaced_sentence = ' '.join([replacement_word if clean_word(word) == clean_word(word_to_replace) else word for word in words])
    return replaced_sentence

def focused_tabu_search(sentence, word_dict, target_word):
    """Perform a focused tabu search on a single word's synonyms."""
    cleaned_word = clean_word(target_word)
    
    if cleaned_word not in word_dict:
        print(f"No synonyms found for {target_word}.")
        return sentence
    
    synonyms = [syn for syn in word_dict[cleaned_word] if syn[1] < 1.0]
    synonyms = sorted(synonyms, key=lambda x: x[1], reverse=True)  # Sort synonyms by similarity
    
    for synonym, _ in synonyms:
        modified_sentence = replace_specific_word(sentence, target_word, synonym)
        print(f"Replacing '{target_word}' with '{synonym}': {modified_sentence}")
    
    return modified_sentence

In [224]:
def clean_word(word):
    """Clean and normalize a word."""
    return ''.join(char for char in word if char.isalnum()).lower()

def replace_specific_word(sentence, word_to_replace, replacement_word):
    """Replace a specific word in the sentence with the provided replacement word."""
    words = sentence.split()
    replaced_sentence = ' '.join([replacement_word if clean_word(word) == clean_word(word_to_replace) else word for word in words])
    return replaced_sentence

def focused_tabu_search(sentence, word_dict, max_tabu_size=5):
    """Perform a focused tabu search on synonyms of words in the sentence, with tabu list and rule."""
    words = word_tokenize(sentence)
    tabu_list = []
    
    for _ in range(len(words)):  # Limit iterations to the number of words to prevent infinite loops
        target_word = choose_random_target_word(sentence, word_dict, tabu_list)
        if not target_word:
            print("No eligible target word found or all words are in the tabu list.")
            break

        cleaned_word = clean_word(target_word)
        print(f"Target word for replacement: {target_word}")
        
        # Filter synonyms to exclude those in the tabu list
        synonyms = [syn for syn in word_dict[cleaned_word] if syn[1] < 1.0 and syn[0] not in tabu_list]
        synonyms = sorted(synonyms, key=lambda x: x[1], reverse=True)  # Sort synonyms by similarity
        
        if synonyms:
            synonym, _ = synonyms[0]
            sentence = replace_specific_word(sentence, target_word, synonym)
            print(f"Replacing '{target_word}' with '{synonym}': {sentence}")
            
            # Update tabu list
            tabu_list.append(cleaned_word)  # Add the replaced word to tabu list
            tabu_list.extend([syn[0] for syn in synonyms])  # Add synonyms to tabu list
            tabu_list = tabu_list[-1:]

    return sentence,tabu_list

def choose_random_target_word(sentence, word_dict, tabu_list):
    """Choose a random target word from the sentence that is present in the word_dict and not in the tabu list."""
    words = word_tokenize(sentence)
    eligible_words = [word for word in words if clean_word(word) in word_dict and clean_word(word) not in tabu_list]
    if not eligible_words:
        return None
    return random.choice(eligible_words)

# Example usage

modified_sentence = focused_tabu_search(sentence, word_dict)
print("Final Modified Sentence:", modified_sentence)


Target word for replacement: movie
Replacing 'movie' with 'picture': Such an amazing picture cannot complain.
Target word for replacement: amazing
Replacing 'amazing' with 'get': Such an get picture cannot complain.
No eligible target word found or all words are in the tabu list.
Final Modified Sentence: ('Such an get picture cannot complain.', ['stick'])


In [225]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline

# Example training data
X_train = ['This is a great movie', 'I hated this movie', 'This was a great experience', 'I love this book']
y_train = ['positive', 'negative', 'positive', 'positive']

# Train the classifier
model = make_pipeline(TfidfVectorizer(), MultinomialNB())
model.fit(X_train, y_train)

def model_predict(sentence):
    """Predict the class of a given sentence."""
    prediction = model.predict([sentence])
    return prediction[0]


In [228]:
def generate_adversarial_example(sentence, word_dict, model_predict, max_iterations=10):
    original_prediction = model_predict(sentence)
    print(f"Original sentence: '{sentence}' predicted as {original_prediction}")
    
    modified_sentence = sentence
    tabu_list = []
    iterations = 0
    
    while iterations < max_iterations:
        # Assume focused_tabu_search is adapted to return a tuple (modified_sentence, word_replaced)
        modified_sentence, word_replaced = focused_tabu_search(modified_sentence, word_dict, tabu_list)
        
        if not word_replaced:  # If no word was replaced, stop the iteration
            print("No more words left to replace.")
            break
        
        new_prediction = model_predict(modified_sentence)
        print(f"Modified sentence: '{modified_sentence}' predicted as {new_prediction}")
        
        if new_prediction != original_prediction:
            print("Adversarial example found!")
            return modified_sentence
        
        iterations += 1
    return None


In [229]:
generate_adversarial_example(sentence, word_dict, model_predict, max_iterations=10)

Original sentence: 'Such an amazing movie cannot complain.' predicted as positive
Target word for replacement: amazing
Replacing 'amazing' with 'get': Such an get movie cannot complain.
Target word for replacement: movie
Replacing 'movie' with 'picture': Such an get picture cannot complain.
No eligible target word found or all words are in the tabu list.
Modified sentence: 'Such an get picture cannot complain.' predicted as positive
No eligible target word found or all words are in the tabu list.
No more words left to replace.
