In [1]:
import torch
from transformers import RobertaTokenizer, RobertaForMaskedLM
import pandas as pd

# load RoBERTa model and tokenizer
model = RobertaForMaskedLM.from_pretrained('roberta-base')
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

# read the tsv file
filename = "./data/trial/tsar2022_en_trial_none.tsv"
data = pd.read_csv(filename, sep='\t', header=None, names=["sentence", "complex_word"])

In [2]:
# substitute generation


# in each row, mask the complex word and generate substitutes
for index, row in data.iterrows():
    sentence, complex_word = row["sentence"], row["complex_word"]
    
    # in the sentence, replace the complex word with a masked word
    sentence_masked_word = sentence.replace(complex_word, "<mask>")
    
    # concatenate the sentence with the complex word and the sentence with the masked word 
    sentences_concat = f"{sentence} {tokenizer.sep_token} {sentence_masked_word}"
    
    # tokenize the concatenated sentence
    sentences_concat_tokenized = tokenizer.encode(sentences_concat, return_tensors='pt')
    
    # find the masked word in the tokenized sentence
    mask_location = torch.where( sentences_concat_tokenized == tokenizer.mask_token_id)[1].item()

    # generate predictions for the masked word
    with torch.no_grad():
        outputs = model(sentences_concat_tokenized)
        predictions = outputs.logits

    # get the top-k predictions
    top_k = 30
    top_tokens = torch.topk(predictions[0, mask_location], top_k).indices

    # decode the top-k tokens
    substitutes = [tokenizer.decode(token.item()).strip() for token in top_tokens]
    
    # print sentence, complex word, and the top_k substitutes for the complex word
    print(f"Sentence: {sentence}")
    print(f"Complex word: {complex_word}")
    print(f"Top {top_k} substitutes: {substitutes}\n")

Sentence: A Spanish government source, however, later said that banks able to cover by themselves losses on their toxic property assets will not be forced to remove them from their books while it will be compulsory for those receiving public help.
Complex word: compulsory
Top 30 substitutes: ['compulsory', 'mandatory', 'obligatory', 'voluntary', 'required', 'optional', 'obliged', 'uniform', 'necessary', 'available', 'mandated', 'sufficient', 'routine', 'forced', 'customary', 'prerequisite', 'feasible', 'indispensable', 'forthcoming', 'universal', 'requirement', 'involuntary', 'obligated', 'compelled', 'conditional', 'enforced', 'contingent', 'possible', 'compulsion', 'Mandatory']

Sentence: Rajoy's conservative government had instilled markets with a brief dose of confidence by stepping into Bankia, performing a U-turn on its refusal to spend public money to rescue banks.
Complex word: instilled
Top 30 substitutes: ['infused', 'injected', 'endowed', 'illed', 'inst', 'furnished', 'suppl

#### Cosine similarity

In [3]:
from sklearn.preprocessing import normalize
from sklearn.metrics.pairwise import cosine_similarity

In [4]:
# using sklearn cosine similarity with padded sentence lengths to account for different lengths due to substitutes
# problem: values are very close to each other, probably the cosine similarity of both sentences is calculated. And a complex word that is the same as its substitute did not get a value near 1.



# set the maximum length for both sentences
max_length = 128


# in each row, for each complex word: 
for index, row in data.iterrows():

    # 1. Substitute Generation (SG): perform masking and generate substitutes:
    
    ## print the sentence and the complex word
    sentence, complex_word = row["sentence"], row["complex_word"]
    print(f"Sentence: {sentence}")
    print(f"Complex word: {complex_word}")

    ## in the sentence, replace the complex word with a masked word
    sentence_masked_word = sentence.replace(complex_word, "<mask>")

    ## concatenate the sentence with the complex word and the sentence with the masked word, by using RoBERTa's separator token to create one string of both sentences
    sentences_concat = f"{sentence} {tokenizer.sep_token} {sentence_masked_word}"

    ## tokenize the concatenated sentence
    sentences_concat_tokenized = tokenizer.encode(sentences_concat, add_special_tokens=True)

    ## make sure the input length is not longer than max_length
    if len(sentences_concat_tokenized) > max_length:
        sentences_concat_tokenized = sentences_concat_tokenized[:max_length]

    ## pad the tokenized sentence to max_length
    padding_length = max_length - len(sentences_concat_tokenized)
    sentences_concat_tokenized += [tokenizer.pad_token_id] * padding_length

    ## convert the tokenized sentence to a tensor
    sentences_concat_tokenized = torch.tensor(sentences_concat_tokenized)

    ## find the masked word in the tokenized sentence
    mask_location = torch.where(sentences_concat_tokenized == tokenizer.mask_token_id)[0].item()

    ## generate predictions for the masked word (forward pass not needed for predictions, only for training)
    with torch.no_grad():
        outputs = model(sentences_concat_tokenized.unsqueeze(0))
        predictions = outputs.logits[0]

    ## get the top-k substitutes based on the predicted logits
    top_k = 30
    top_tokens = torch.topk(predictions[mask_location], top_k).indices

    ## decode the top-k substitutes and print them
    substitutes = [tokenizer.decode(token.item()).strip() for token in top_tokens]
    print(f"SG step: generated substitutes: {substitutes}")
    print()
    
    
    # 2. Substitute Selection (SS): apply cosine similarity, of:
    # - The contextualized embedding of the complex word in the context of the original sentence, 
    # - and the contextualized embedding of each substitute after replacing the complex word in the original sentence with the substitute word.

    ## get the contextualized embedding of the complex word in the original sentence
    with torch.no_grad():
        complex_word_tokenized = tokenizer.encode(complex_word, add_special_tokens=True)

        ## make sure the input length is not longer than max_length
        if len(complex_word_tokenized) > max_length:
            complex_word_tokenized = complex_word_tokenized[:max_length]

        ## pad the tokenized sentence to max_length
        padding_length = max_length - len(complex_word_tokenized)
        complex_word_tokenized += [tokenizer.pad_token_id] * padding_length

        ## convert the tokenized sentence to a tensor
        complex_word_tokenized = torch.tensor(complex_word_tokenized)

        complex_word_embedding = model.get_input_embeddings()(complex_word_tokenized.unsqueeze(0)).squeeze(0).flatten()
        complex_word_embedding_norm = normalize(complex_word_embedding.reshape(1, -1))


    ## calculate the cosine similarity between the complex word embedding and each substitute embedding
    for substitute in substitutes:
        ## replace the complex word in the original sentence with the substitute
        sentence_substitute = sentence.replace(complex_word, substitute)

        ## tokenize the sentence with the substitute
        sentence_substitute_tokenized = tokenizer.encode(sentence_substitute, add_special_tokens=True)

        ## make sure the input length is not longer than max_length
        if len(sentence_substitute_tokenized) > max_length:
            sentence_substitute_tokenized = sentence_substitute_tokenized[:max_length]

        ## pad the tokenized sentence to max_length
        padding_length = max_length - len(sentence_substitute_tokenized)
        sentence_substitute_tokenized += [tokenizer.pad_token_id] * padding_length

        ## convert the tokenized sentence to a tensor
        sentence_substitute_tokenized = torch.tensor(sentence_substitute_tokenized)

        ## get the contextualized embedding of the substitute word in the modified sentence
        with torch.no_grad():
            substitute_embedding = model.get_input_embeddings()(sentence_substitute_tokenized.unsqueeze(0)).squeeze(0).flatten()
            substitute_embedding_norm = normalize(substitute_embedding.reshape(1, -1))


        ## calculate cosine similarity
        similarity = cosine_similarity(complex_word_embedding_norm, substitute_embedding_norm)
        print(f"SS step: substitute: {substitute}, cosine similarity: {1-similarity[0][0]}")
    print()
    
    

Sentence: A Spanish government source, however, later said that banks able to cover by themselves losses on their toxic property assets will not be forced to remove them from their books while it will be compulsory for those receiving public help.
Complex word: compulsory
SG step: generated substitutes: ['compulsory', 'mandatory', 'obligatory', 'obliged', 'voluntary', 'uniform', 'required', 'mandated', 'optional', 'dogma', 'forced', 'intrinsic', 'advisable', 'routine', 'forthcoming', 'prerequisite', 'ubiquitous', 'involuntary', 'forcibly', 'feasible', 'compelled', 'indispensable', 'necessary', 'obligated', 'liable', 'habitual', 'enforced', 'universal', 'contingent', 'commonplace']

SS step: substitute: compulsory, cosine similarity: 0.9015046486291226
SS step: substitute: mandatory, cosine similarity: 0.9021189359409612
SS step: substitute: obligatory, cosine similarity: 0.9014813422164168
SS step: substitute: obliged, cosine similarity: 0.9015624717592371
SS step: substitute: voluntar

In [5]:
# with ranking on cos sim scores

# using sklearn cosine similarity with padded sentence lengths to account for different lengths due to substitutes
# problem: values are very close to each other, probably the cosine similarity of both sentences is calculated. And a complex word that is the same as its substitute did not get a value near 1.


# set the maximum length for both sentences
max_length = 128


# in each row, for each complex word: 
for index, row in data.iterrows():

    # 1. Substitute Generation (SG): perform masking and generate substitutes:
    
    ## print the sentence and the complex word
    sentence, complex_word = row["sentence"], row["complex_word"]
    print(f"Sentence: {sentence}")
    print(f"Complex word: {complex_word}")

    ## in the sentence, replace the complex word with a masked word
    sentence_masked_word = sentence.replace(complex_word, "<mask>")

    ## concatenate the sentence with the complex word and the sentence with the masked word, by using RoBERTa's separator token to create one string of both sentences
    sentences_concat = f"{sentence} {tokenizer.sep_token} {sentence_masked_word}"

    ## tokenize the concatenated sentence
    sentences_concat_tokenized = tokenizer.encode(sentences_concat, add_special_tokens=True)

    ## make sure the input length is not longer than max_length
    if len(sentences_concat_tokenized) > max_length:
        sentences_concat_tokenized = sentences_concat_tokenized[:max_length]

    ## pad the tokenized sentence to max_length
    padding_length = max_length - len(sentences_concat_tokenized)
    sentences_concat_tokenized += [tokenizer.pad_token_id] * padding_length

    ## convert the tokenized sentence to a tensor
    sentences_concat_tokenized = torch.tensor(sentences_concat_tokenized)

    ## find the masked word in the tokenized sentence
    mask_location = torch.where(sentences_concat_tokenized == tokenizer.mask_token_id)[0].item()

    ## generate predictions for the masked word (forward pass not needed for predictions, only for training)
    with torch.no_grad():
        outputs = model(sentences_concat_tokenized.unsqueeze(0))
        predictions = outputs.logits[0]

    ## get the top-k substitutes based on the predicted logits
    top_k = 30
    top_tokens = torch.topk(predictions[mask_location], top_k).indices

    ## decode the top-k substitutes and print them
    substitutes = [tokenizer.decode(token.item()).strip() for token in top_tokens]
    print(f"SG step: generated substitutes: {substitutes}")
    print()
    
    
    # 2. Substitute Selection (SS): apply cosine similarity, of:
    # - The contextualized embedding of the complex word in the context of the original sentence, 
    # - and the contextualized embedding of each substitute after replacing the complex word in the original sentence with the substitute word.

    ## get the contextualized embedding of the complex word in the original sentence
    with torch.no_grad():
        complex_word_tokenized = tokenizer.encode(complex_word, add_special_tokens=True)

        ## make sure the input length is not longer than max_length
        if len(complex_word_tokenized) > max_length:
            complex_word_tokenized = complex_word_tokenized[:max_length]

        ## pad the tokenized sentence to max_length
        padding_length = max_length - len(complex_word_tokenized)
        complex_word_tokenized += [tokenizer.pad_token_id] * padding_length

        ## convert the tokenized sentence to a tensor
        complex_word_tokenized = torch.tensor(complex_word_tokenized)

        complex_word_embedding = model.get_input_embeddings()(complex_word_tokenized.unsqueeze(0)).squeeze(0).flatten()
        complex_word_embedding_norm = normalize(complex_word_embedding.reshape(1, -1))


    ## calculate the cosine similarity between the complex word embedding and each substitute embedding
    # Create a list to store the cosine similarity scores of the substitutes
    substitute_similarities = []
    for substitute in substitutes:
        ## replace the complex word in the original sentence with the substitute
        sentence_substitute = sentence.replace(complex_word, substitute)

        ## tokenize the sentence with the substitute
        sentence_substitute_tokenized = tokenizer.encode(sentence_substitute, add_special_tokens=True)

        ## make sure the input length is not longer than max_length
        if len(sentence_substitute_tokenized) > max_length:
            sentence_substitute_tokenized = sentence_substitute_tokenized[:max_length]

        ## pad the tokenized sentence to max_length
        padding_length = max_length - len(sentence_substitute_tokenized)
        sentence_substitute_tokenized += [tokenizer.pad_token_id] * padding_length

        ## convert the tokenized sentence to a tensor
        sentence_substitute_tokenized = torch.tensor(sentence_substitute_tokenized)

        ## get the contextualized embedding of the substitute word in the modified sentence
        with torch.no_grad():
            substitute_embedding = model.get_input_embeddings()(sentence_substitute_tokenized.unsqueeze(0)).squeeze(0).flatten()
            substitute_embedding_norm = normalize(substitute_embedding.reshape(1, -1))


        # Calculate cosine similarity
        similarity = cosine_similarity(complex_word_embedding_norm, substitute_embedding_norm)
        substitute_similarities.append((substitute, similarity[0][0]))

    # Sort the substitutes based on their cosine similarity scores
    substitute_similarities_sorted = sorted(substitute_similarities, key=lambda x: x[1], reverse=True)

    # Print the sorted substitutes and their cosine similarity scores
    for substitute, similarity in substitute_similarities_sorted:
        print(f"SS step: substitute: {substitute}, cosine similarity: {similarity}")

    print()






    

Sentence: A Spanish government source, however, later said that banks able to cover by themselves losses on their toxic property assets will not be forced to remove them from their books while it will be compulsory for those receiving public help.
Complex word: compulsory
SG step: generated substitutes: ['compulsory', 'mandatory', 'obligatory', 'obliged', 'voluntary', 'uniform', 'required', 'mandated', 'optional', 'dogma', 'forced', 'intrinsic', 'advisable', 'routine', 'forthcoming', 'prerequisite', 'ubiquitous', 'involuntary', 'forcibly', 'feasible', 'compelled', 'indispensable', 'necessary', 'obligated', 'liable', 'habitual', 'enforced', 'universal', 'contingent', 'commonplace']

SS step: substitute: advisable, cosine similarity: 0.0994430327015135
SS step: substitute: involuntary, cosine similarity: 0.0990138854119515
SS step: substitute: prerequisite, cosine similarity: 0.09882613672329427
SS step: substitute: compelled, cosine similarity: 0.09877225733489117
SS step: substitute: o

In [6]:
import re

In [7]:
# using regex
# using sklearn cosine similarity with padded sentence lengths to account for different lengths due to substitutes
# trying cosine similarity of the embeddings of complex word vs. embeddings of substitute word.
## values are very much the same, a lot lower than above, and still a complex word that is the same as its substitute did not get a value near 1.




# set the maximum length for both sentences
max_length = 128


# in each row, for each complex word: 
for index, row in data.iterrows():

    # 1. Substitute Generation (SG): perform masking and generate substitutes:
    
    ## print the sentence and the complex word
    sentence, complex_word = row["sentence"], row["complex_word"]
    print(f"Sentence: {sentence}")
    print(f"Complex word: {complex_word}")

    ## in the sentence, replace the complex word with a masked word
    sentence_masked_word = sentence.replace(complex_word, "<mask>")

    ## concatenate the sentence with the complex word and the sentence with the masked word, by using RoBERTa's separator token to create one string of both sentences
    sentences_concat = f"{sentence} {tokenizer.sep_token} {sentence_masked_word}"

    ## tokenize the concatenated sentence
    sentences_concat_tokenized = tokenizer.encode(sentences_concat, add_special_tokens=True)

    ## make sure the input length is not longer than max_length
    if len(sentences_concat_tokenized) > max_length:
        sentences_concat_tokenized = sentences_concat_tokenized[:max_length]

    ## pad the tokenized sentence to max_length
    padding_length = max_length - len(sentences_concat_tokenized)
    sentences_concat_tokenized += [tokenizer.pad_token_id] * padding_length

    ## convert the tokenized sentence to a tensor
    sentences_concat_tokenized = torch.tensor(sentences_concat_tokenized)

    ## find the masked word in the tokenized sentence
    mask_location = torch.where(sentences_concat_tokenized == tokenizer.mask_token_id)[0].item()

    ## generate predictions for the masked word (no gradients, as forward pass not needed for predictions, only for training)
    with torch.no_grad():
        outputs = model(sentences_concat_tokenized.unsqueeze(0))
        predictions = outputs.logits[0]

    ## get the top-k substitutes based on the predicted logits
    top_k = 30
    top_tokens = torch.topk(predictions[mask_location], top_k).indices

    ## decode the top-k substitutes and print them
    substitutes = [tokenizer.decode(token.item()).strip() for token in top_tokens]
    print(f"SG step: generated substitutes: {substitutes}")
    print()
    

    # 2. Substitute Selection (SS): Apply cosine similarity for:
    # - The contextualized embedding of the complex word in the context of the original sentence,
    # - and the contextualized embedding of each substitute after replacing the complex word in the original sentence with the substitute word.

    ## Step 2.1: Get the contextualized embedding of the complex word in the context of the original sentence
    with torch.no_grad():
        complex_word_embedding = model(sentences_concat_tokenized.unsqueeze(0)).hidden_states[-1].squeeze(0)

    ## Normalize the complex word embedding
    complex_word_embedding_norm = normalize(complex_word_embedding.reshape(1, -1))

    # Iterate through the substitutes
    substitute_similarities = []
    for substitute in substitutes:
        ## Step 2.2: Replace the complex word with a substitute in the sentence
        sentence_substitute = re.sub(r'\b' + re.escape(complex_word) + r'\b', substitute, sentence)

        ## Step 2.3: Tokenize the sentence with the substitute
        sentence_substitute_tokenized = tokenizer.encode(sentence_substitute, add_special_tokens=True)

        ## Pad the tokenized sentence to max_length
        padding_length_sub = max_length - len(sentence_substitute_tokenized)
        sentence_substitute_tokenized += [tokenizer.pad_token_id] * padding_length_sub

        ## Convert the tokenized sentence to a tensor
        input_ids_sub = torch.tensor(sentence_substitute_tokenized).unsqueeze(0)

        ## Step 2.4: Get the embedding of the substitute in the context of the sentence with the substitute
        with torch.no_grad():
            substitute_embedding = model(input_ids_sub).hidden_states[-1].squeeze(0)

        ## Normalize the substitute embedding
        substitute_embedding_norm = normalize(substitute_embedding.reshape(1, -1))

        # Calculate cosine similarity
        similarity = cosine_similarity(complex_word_embedding_norm, substitute_embedding_norm)
        substitute_similarities.append((substitute, similarity[0][0]))

    # Sort the substitutes based on their cosine similarity
    substitute_similarities = sorted(substitute_similarities, key=lambda x: x[1], reverse=True)

    # Print the substitutes ranked on cosine similarity value
    for substitute, similarity in substitute_similarities:
        print(f"SS step: substitute: {substitute}, ranked on cosine similarity: {similarity}")
    print()


Sentence: A Spanish government source, however, later said that banks able to cover by themselves losses on their toxic property assets will not be forced to remove them from their books while it will be compulsory for those receiving public help.
Complex word: compulsory
SG step: generated substitutes: ['compulsory', 'mandatory', 'obligatory', 'obliged', 'voluntary', 'uniform', 'required', 'mandated', 'optional', 'dogma', 'forced', 'intrinsic', 'advisable', 'routine', 'forthcoming', 'prerequisite', 'ubiquitous', 'involuntary', 'forcibly', 'feasible', 'compelled', 'indispensable', 'necessary', 'obligated', 'liable', 'habitual', 'enforced', 'universal', 'contingent', 'commonplace']



TypeError: 'NoneType' object is not subscriptable

In [8]:
# ranking on cosine similarity 

# using regex
# using sklearn cosine similarity with padded sentence lengths to account for different lengths due to substitutes
# trying cosine similarity of the embeddings of complex word vs. embeddings of substitute word.
## values are very much the same, but a lot lower than above, and still a complex word that is the same as its substitute did not get a value near 1.



# set the maximum length for both sentences
max_length = 128


# in each row, for each complex word: 
for index, row in data.iterrows():

    # 1. Substitute Generation (SG): perform masking and generate substitutes:
    
    ## print the sentence and the complex word
    sentence, complex_word = row["sentence"], row["complex_word"]
    print(f"Sentence: {sentence}")
    print(f"Complex word: {complex_word}")

    ## in the sentence, replace the complex word with a masked word
    sentence_masked_word = sentence.replace(complex_word, "<mask>")

    ## concatenate the sentence with the complex word and the sentence with the masked word, by using RoBERTa's separator token to create one string of both sentences
    sentences_concat = f"{sentence} {tokenizer.sep_token} {sentence_masked_word}"

    ## tokenize the concatenated sentence
    sentences_concat_tokenized = tokenizer.encode(sentences_concat, add_special_tokens=True)

    ## make sure the input length is not longer than max_length
    if len(sentences_concat_tokenized) > max_length:
        sentences_concat_tokenized = sentences_concat_tokenized[:max_length]

    ## pad the tokenized sentence to max_length
    padding_length = max_length - len(sentences_concat_tokenized)
    sentences_concat_tokenized += [tokenizer.pad_token_id] * padding_length

    ## convert the tokenized sentence to a tensor
    sentences_concat_tokenized = torch.tensor(sentences_concat_tokenized)

    ## find the masked word in the tokenized sentence
    mask_location = torch.where(sentences_concat_tokenized == tokenizer.mask_token_id)[0].item()

    ## generate predictions for the masked word (no gradients, as forward pass not needed for predictions, only for training)
    with torch.no_grad():
        outputs = model(sentences_concat_tokenized.unsqueeze(0))
        predictions = outputs.logits[0]

    ## get the top-k substitutes based on the predicted logits
    top_k = 30
    top_tokens = torch.topk(predictions[mask_location], top_k).indices

    ## decode the top-k substitutes and print them
    substitutes = [tokenizer.decode(token.item()).strip() for token in top_tokens]
    print(f"SG step: generated substitutes: {substitutes}")
    print()
    

    # 2. Substitute Selection (SS): Apply cosine similarity for:
# - The contextualized embedding of the complex word in the context of the original sentence,
# - and the contextualized embedding of each substitute after replacing the complex word in the original sentence with the substitute word.

## Step 2.1: Get the contextualized embedding of the complex word in the context of the original sentence
    with torch.no_grad():
        complex_word_embedding = model(sentences_concat_tokenized.unsqueeze(0)).hidden_states[-1].squeeze(0)

    ## Normalize the complex word embedding
    complex_word_embedding_norm = normalize(complex_word_embedding.reshape(1, -1))

    # Iterate through the substitutes
    substitute_similarities = []
    for substitute in substitutes:
        ## Step 2.2: Replace the complex word with a substitute in the sentence
        sentence_substitute = re.sub(r'\b' + re.escape(complex_word) + r'\b', substitute, sentence)

        ## Step 2.3: Tokenize the sentence with the substitute
        sentence_substitute_tokenized = tokenizer.encode(sentence_substitute, add_special_tokens=True)

        ## Pad the tokenized sentence to max_length
        padding_length_sub = max_length - len(sentence_substitute_tokenized)
        sentence_substitute_tokenized += [tokenizer.pad_token_id] * padding_length_sub

        ## Convert the tokenized sentence to a tensor
        input_ids_sub = torch.tensor(sentence_substitute_tokenized).unsqueeze(0)

        ## Step 2.4: Get the embedding of the substitute in the context of the sentence with the substitute
        with torch.no_grad():
            substitute_embedding = model(input_ids_sub).hidden_states[-1].squeeze(0)

        ## Normalize the substitute embedding
        substitute_embedding_norm = normalize(substitute_embedding.reshape(1, -1))

        # Calculate cosine similarity
        similarity = cosine_similarity(complex_word_embedding_norm, substitute_embedding_norm)
        substitute_similarities.append((substitute, similarity[0][0]))

    # Sort the substitutes based on their cosine similarity
    substitute_similarities = sorted(substitute_similarities, key=lambda x: x[1], reverse=True)

    # Print the substitutes ranked on cosine similarity value
    print("SS step: Ranked substitutes based on cosine similarity:")
    ranked_substitutes = [sub[0] for sub in substitute_similarities]
    print(ranked_substitutes)
    print()
    
    for substitute, similarity in substitute_similarities:
        print(f"SS step: substitute: {substitute}, cosine similarity: {similarity}")


Sentence: A Spanish government source, however, later said that banks able to cover by themselves losses on their toxic property assets will not be forced to remove them from their books while it will be compulsory for those receiving public help.
Complex word: compulsory
SG step: generated substitutes: ['compulsory', 'mandatory', 'obligatory', 'obliged', 'voluntary', 'uniform', 'required', 'mandated', 'optional', 'dogma', 'forced', 'intrinsic', 'advisable', 'routine', 'forthcoming', 'prerequisite', 'ubiquitous', 'involuntary', 'forcibly', 'feasible', 'compelled', 'indispensable', 'necessary', 'obligated', 'liable', 'habitual', 'enforced', 'universal', 'contingent', 'commonplace']



TypeError: 'NoneType' object is not subscriptable