In [17]:
import re
import json
import nltk
from tqdm import tqdm

# read in the cards and their descriptions
# with open("ppn_deck_cleaned.json", "w") as write_file:
#     json.dump(card_deck, write_file, indent=4)

# read card_deck from ppn_deck.json file
with open("ppn_deck.json", "r") as read_file:
    card_deck = json.load(read_file)

for card in tqdm(card_deck):
    # in the card['summary'] which is found in card['summary'][1] there should be a string that looks like this: "example summary text for a card that has a clue in it".
    # the card['title'] is the title of the card, and it may be something like "Eragon" or "The Hobbit".
    # These words (Eragon, The, Hobbit) are the clues that we want to hide in the summary because they give away the answer to the card. Usually the cards mention the words at least once, towards the beginning of the summary.
    # We want to hide the clues in the summary so that the user can quickly read the summary like catchphrase and try to get others to guess the card title without using any of the (nonstopwords) words in the title of the card.

    # first, we need to get the title of the card and split it into a list of words
    card_title = card['title']

    # Now, remove any exact matches of the card title phrase from the summary phrase before we split the summary phrase into a list of words.
    # This is because the card title phrase may be a substring of the summary phrase, and we don't want to remove the substring from the summary phrase.

    # how? we can use the re.sub() function to replace the card title phrase with an empty string in the summary phrase.
    # the re.sub() function takes 3 arguments: the regex pattern to match, the string to replace the match with, and the string to search for the match in.
    regex_pattern = r"\b" + card_title.lower() + r"\b"
    string_to_replace_with = "This/They/It"
    string_to_search = card['summary'][1]
    card['summary'][1] = re.sub(regex_pattern, string_to_replace_with, string_to_search, flags=re.IGNORECASE)


    # now split the title of the card into a list of words, and remove any stopwords from the list of words using nltk stopwords
    stopwords = nltk.corpus.stopwords.words('english')
    card_title_words = card_title.split()
    card_title_words = [word for word in card_title_words if word not in stopwords]
    # now we have a list of words that are in the title of the card, but not stopwords. (i.e. Hobbit, Eragon, etc.)
    # if these words appear in the card summary, we want to hide them in the summary by replacing them with the phrase "clue_hider" (or something similar)
    # we can use the re.sub() function to replace the words in the card title with the phrase "clue_hider" in the summary phrase.

    for word in card_title_words: # for every word in the card title
        word = word.lower()
        string_to_replace_with = "---"
        # replace '(',')' with '' in the word with regex
        word = re.sub(r'[\(\)]', '', word) # replace the '(' and ')' with an empty string
        regex_pattern1 = r'\b' + word + r'\b' # the regex pattern to match is the word
        #print(regex_pattern)
        # regex_pattern2 = r'listen(.*?);'
        string_to_search = str(card['summary'][1]).lower() #! summary
        card['summary'][1] = re.sub(regex_pattern1, string_to_replace_with, string_to_search) # replace the word with the phrase "clue_hider" in the summary phrase.

# save the card_deck to a new file
with open("ppn_deck_cleaned_clues_hidden.json", "w") as write_file:
    json.dump(card_deck, write_file, indent=4)


100%|██████████| 5167/5167 [00:01<00:00, 3235.58it/s]


In [18]:
# print the first 10 cards in the card_deck
for i in range(10):
    print(card_deck[i]['title'])
    print(card_deck[i]['summary'][1])
    print()

Climate change and cities
this/they/it are deeply connected. --- are one of the greatest contributors and likely best opportunities for addressing --- ---. --- are also one of the most vulnerable parts of the human society to the effects of --- ---, and likely one of the most important solutions for reducing the environmental impact of humans. more than half of the worlds population is in ---, consuming a large portion of food and goods produced outside of ---. the increase of urban population growth is one of the main factors in air-quality problems. in the year 2016, 31 mega---- reported having at least 10 million in their population, 8 of which surrear endd 20 million people. the un projects that 68% of the world population will live in urban areas by 2050. hence, --- have a significant influence on construction and transportationtwo of the key contributors to global warming emissions. moreover, because of processes that create --- conflict and --- refugees, city areas are expected 