In [2]:
!cp /content/drive/MyDrive/kialo_corpus.json .

In [3]:
!pip install pycld2 regex nltk gensim spacy

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pycld2
  Downloading pycld2-0.41.tar.gz (41.4 MB)
[K     |████████████████████████████████| 41.4 MB 1.6 MB/s 
Building wheels for collected packages: pycld2
  Building wheel for pycld2 (setup.py) ... [?25l[?25hdone
  Created wheel for pycld2: filename=pycld2-0.41-cp37-cp37m-linux_x86_64.whl size=9834217 sha256=4021f0cc0f974d1e6ba8860357837aadc1e3715179e305dcc06b55f22be8e507
  Stored in directory: /root/.cache/pip/wheels/ed/e4/58/ed2e9f43c07d617cc81fe7aff0fc6e42b16c9cf6afe960b614
Successfully built pycld2
Installing collected packages: pycld2
Successfully installed pycld2-0.41


In [4]:
import json
import pycld2 as cld2

with open('kialo_corpus.json', 'r') as f:
    out = json.load(f)

# Need to use regex to remove offending non-UTF-8 characters from the data, which
# causes (error: input contains invalid UTF-8 around byte ...)
# Ref: https://github.com/aboSamoor/polyglot/issues/71#issuecomment-707997790

import regex
import math

RE_BAD_CHARS = regex.compile(r"[\p{Cc}\p{Cs}]+")

def remove_bad_chars(text):
    return RE_BAD_CHARS.sub("", text)

def detect_en(text):
    _, _, _, detection = cld2.detect(text, returnVectors=True)
    for tup in detection:
        if 'en' not in tup[-1]:
            return False
    return True

out_filtered = []
prev_num = 0
for i in range(len(out)):
    x = out[i]
    try:
        x['text'] = remove_bad_chars(x['text'])
        if detect_en(x['text']):
            out_filtered.append(x)
    except Exception as e:
        print(x['text'])
        print(f'Exception {e} raised')
        break
    percent = (i+1) / len(out) * 100
    _, num = math.modf(percent)
    num_ = int(num - (num % 5))
    if num_ != prev_num:
        for x in range(prev_num+5, num_+1, 5):
            print(f"{x}% of sentences done")
        prev_num = num_

5% of sentences done
10% of sentences done
15% of sentences done
20% of sentences done
25% of sentences done
30% of sentences done
35% of sentences done
40% of sentences done
45% of sentences done
50% of sentences done
55% of sentences done
60% of sentences done
65% of sentences done
70% of sentences done
75% of sentences done
80% of sentences done
85% of sentences done
90% of sentences done
95% of sentences done
100% of sentences done


In [5]:
import nltk
from nltk.stem import SnowballStemmer, WordNetLemmatizer
import re
import os
import pickle
import gensim
from tqdm import tqdm

nltk.download("wordnet")
nltk.download("omw-1.4")

stemmer = SnowballStemmer("english")

models = {}
word_map = {}
debug = False
drive_path = '/content/drive/MyDrive'
num_topics = 50

def lemmatize_stemming(text):
    return stemmer.stem(WordNetLemmatizer().lemmatize(text, pos='v'))

# Tokenize and lemmatize
def preprocess(text):
    result = []
    for token in gensim.utils.simple_preprocess(text) :
        if token not in gensim.parsing.preprocessing.STOPWORDS and len(token) > 3:
            result.append(lemmatize_stemming(token))
            
    return result

def sentence_to_seq(text):
    split_words = set(text.split())
    tokens = preprocess(re.sub(r'http\S+', '', text))

    # Keep a mapping of stems to original words
    if not os.path.exists('word_map.pkl'):
        for tk in tokens:
            for word in split_words:
                if tk in word:
                    if not word_map.get(tk):
                        word_map[tk] = set()
                    word_map[tk].add(word)
    
    return tokens

def prep_docs(out_filtered):
  all_docs = []
  all_sents = []
  for i, x in enumerate(tqdm(out_filtered, ascii=True)):
    topic_id, _ = x['id'].strip().split('.')
    if not models.get(topic_id):
      models[topic_id] = {}
    if x['neutral']:
      all_sents.append(x['neutral'][0]['text'])
      seq = sentence_to_seq(x['neutral'][0]['text'])
      models[topic_id]['topic'] = seq
      all_docs.append(seq)

    for obj in x['pro']:
      if not models[topic_id].get('pro'):
        models[topic_id]['pro'] = []
      all_sents.append(obj['text'])
      seq = sentence_to_seq(obj['text'])
      models[topic_id]['pro'].append(seq)
      all_docs.append(seq)
    
    for obj in x['con']:
      if not models[topic_id].get('con'):
        models[topic_id]['con'] = []
      all_sents.append(obj['text'])
      seq = sentence_to_seq(obj['text'])
      models[topic_id]['con'].append(seq)
      all_docs.append(seq)

  return all_docs, all_sents

def model_topics(processed_docs, num_topics=10):
    os.makedirs(os.path.join(drive_path, 'kialo_topics'), exist_ok=True)
    model_path = os.path.join(drive_path, 'kialo_topics', 'lda_kialo_topics.ckpt')
    if os.path.exists(model_path):
        lda_model = gensim.models.LdaMulticore.load(model_path)
    else:
        dictionary = gensim.corpora.Dictionary(processed_docs)
        bow_corpus = [dictionary.doc2bow(doc) for doc in processed_docs]
        lda_model =  gensim.models.LdaMulticore(
            bow_corpus, num_topics = num_topics,
            id2word = dictionary, passes = 10, workers = 8
        )
        # Save the model
        lda_model.save(model_path)

    # Print each topic discovered with it's top-40 words (tokens)
    # for idx, topic in lda_model.print_topics(num_words=40):
    #    topic_words_raw = [x.strip().split('*')[-1] for x in topic.strip().split('+')]
    #    topic_words_mapped = [word_map.get(x.replace('"', '')) for x in topic_words_raw]
    #    topic_words = [min(x, key=len) if x else topic_words_raw[i] for i, x in enumerate(topic_words_mapped)]
    #    if debug:
    #        print("Topic: {} \nWords: {}".format(idx, ','.join(topic_words)))
    #        print("\n")

def model_subtopics(processed_docs, topic_id, stance):
    fname = f'models/lda_topics_{topic_id}_{stance}.ckpt'
    if os.path.exists(fname):
        lda_model = gensim.models.LdaMulticore.load(fname)
    else:
        dictionary = gensim.corpora.Dictionary(processed_docs)
        bow_corpus = [dictionary.doc2bow(doc) for doc in processed_docs]
        lda_model =  gensim.models.LdaMulticore(
            bow_corpus, num_topics = 10, id2word = dictionary, passes = 20, workers = 4
        )
        # Save the model
        lda_model.save(fname)
    
    if debug:
      # Print each topic discovered with it's top-40 words (tokens)
      for idx, topic in lda_model.print_topics(num_words=5):
          topic_words_raw = [x.strip().split('*')[-1] for x in topic.strip().split('+')]
          topic_words_mapped = [word_map.get(x.replace('"', '')) for x in topic_words_raw]
          topic_words = [min(x, key=len) if x else topic_words_raw[i] for i, x in enumerate(topic_words_mapped)]
          print(f"For topic ID {topic_id} and stance {stance}")
          print("Topic: {} => Words: {}".format(idx, ','.join(topic_words)))
          #fpath = os.path.join('topics', f'topic{idx+1}.txt')
          #with open(fpath, 'w') as f:
          #    for w in topic_words[:-1]:
          #        f.write(f'{w}\n')
          #    f.write(f'{topic_words[-1]}')
    
    return fname

def run_topic_modeling():
  global models, word_map, debug, num_topics
  all_docs = []
  all_sents = []
  """
  if not os.path.exists(os.path.join(drive_path, 'topic_models.json')):
    prep_docs(out_filtered)
    os.makedirs(os.path.join(drive_path, 'models'), exist_ok=True)
    for _, (k, v) in enumerate(tqdm(models.items(), ascii=True)):
      models[k]['pro_save_name'] = model_subtopics(v['pro'], k, 'pro') if v.get('pro') else None
      models[k]['con_save_name'] = model_subtopics(v['con'], k, 'con') if v.get('con') else None
  else:
    with open(os.path.join(drive_path, 'topic_models.json'), 'r') as f:
      models = json.load(f)
  """
  topic_model_path = os.path.join(drive_path, 'kialo_topics', 'lda_kialo_topics.ckpt')
  if not os.path.exists(topic_model_path):
    all_docs, all_sents = prep_docs(out_filtered)
    model_topics(all_docs, num_topics=num_topics)
  else:
    pass
    #all_docs, all_sents = prep_docs(out_filtered)

  if not os.path.exists(os.path.join(drive_path, 'word_map.pkl')):
    with open(os.path.join(drive_path, 'word_map.pkl'), 'wb') as f:
      pickle.dump(word_map, f)
  else:
    with open(os.path.join(drive_path, 'word_map.pkl'), 'rb') as f:
      word_map = pickle.load(f)

  return all_docs, all_sents

all_docs, all_sents = run_topic_modeling()

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...


In [6]:
!python -m spacy download en_core_web_sm

2022-09-19 17:59:57.228911: E tensorflow/stream_executor/cuda/cuda_driver.cc:271] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting en-core-web-sm==3.4.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.4.0/en_core_web_sm-3.4.0-py3-none-any.whl (12.8 MB)
[K     |████████████████████████████████| 12.8 MB 4.0 MB/s 
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')


In [7]:
drive_path = '/content/drive/MyDrive'
topics_path = os.path.join(drive_path, 'topics_overall')

import spacy
#nlp = spacy.load('en_core_web_sm')

from spacy.tokens import Span
from spacy.matcher import Matcher

nltk.download('punkt')

topic_model_path = os.path.join(drive_path, 'kialo_topics', 'lda_kialo_topics.ckpt')
lda_model = gensim.models.LdaMulticore.load(topic_model_path)

def extract_entities(sents):
   global nlp
   # chunk one
   enti_one = ""
   enti_two = ""
  
   dep_prev_token = "" # dependency tag of previous token in sentence
  
   txt_prev_token = "" # previous token in sentence
  
   prefix = ""
   modifier = ""
  
   for tokn in nlp(sents):
       # chunk two
       ## move to next token if token is punctuation
      
       if tokn.dep_ != "punct":
           #  check if token is compound word or not
           if tokn.dep_ == "compound":
               prefix = tokn.text
               # add the current word to it if the previous word is 'compound’
               if dep_prev_token == "compound":
                   prefix = txt_prev_token + " "+ tokn.text
                  
           # verify if token is modifier or not
           if tokn.dep_.endswith("mod") == True:
               modifier = tokn.text
               # add it to the current word if the previous word is 'compound'
               if dep_prev_token == "compound":
                   modifier = txt_prev_token + " "+ tokn.text
                  
           # chunk3
           if tokn.dep_.find("subj") == True:
               enti_one = modifier +" "+ prefix + " "+ tokn.text
               prefix = ""
               modifier = ""
               dep_prev_token = ""
               txt_prev_token = ""
              
           # chunk4
           if tokn.dep_.find("obj") == True:
               enti_two = modifier +" "+ prefix +" "+ tokn.text
              
           # chunk 5
           # update variable
           dep_prev_token = tokn.dep_
           txt_prev_token = tokn.text
          
   return [enti_one.strip(), enti_two.strip()]


os.makedirs(topics_path, exist_ok=True)

"""
for _, (k, v) in enumerate(tqdm(models.items(), ascii=True)):
  os.makedirs(os.path.join(topics_path, k), exist_ok=True)
  fname_pro = models[k]['pro_save_name']
  if fname_pro:
    fname_pro = fname_pro.replace('models', os.path.join(drive_path, 'models'))
    lda_model = gensim.models.LdaMulticore.load(fname_pro)
    for idx, topic in lda_model.print_topics(num_words=20):
      topic_words_raw = [x.strip().split('*')[-1] for x in topic.strip().split('+')]
      topic_words_mapped = [word_map.get(x.replace('"', '')) for x in topic_words_raw]
      topic_words = [
          re.sub(r'http\S+', '', min(x, key=len)).strip()
          if x else topic_words_raw[i] for i, x in enumerate(topic_words_mapped)
      ]
      with open(os.path.join(topics_path, k, f'pro_{topic_words[0]}_{idx}.txt'), 'w') as f:
        for w in topic_words[:-1]:
          f.write(f'{w}\n')
        f.write(f'{topic_words[-1]}')
  fname_con = models[k]['con_save_name']
  if fname_con:
    fname_con = fname_con.replace('models', os.path.join(drive_path, 'models'))
    lda_model = gensim.models.LdaMulticore.load(fname_con)
    for idx, topic in lda_model.print_topics(num_words=20):
      topic_words_raw = [x.strip().split('*')[-1] for x in topic.strip().split('+')]
      topic_words_mapped = [word_map.get(x.replace('"', '')) for x in topic_words_raw]
      topic_words = [
          re.sub(r'http\S+', '', min(x, key=len)).strip().replace('/', '_')
          if x else topic_words_raw[i] for i, x in enumerate(topic_words_mapped)
      ]
      with open(os.path.join(topics_path, k, f'con_{topic_words[0]}_{idx}.txt'), 'w') as f:
        for w in topic_words[:-1]:
          f.write(f'{w}\n')
        f.write(f'{topic_words[-1]}')
"""

topic_wise_entities = {
    i: [] for i in range(num_topics)
}
topic_wise_words = {
    i: [] for i in range(num_topics)
}

from nltk.tokenize import sent_tokenize

# Get top-40 words for each topic
for idx, topic in lda_model.print_topics(num_topics=num_topics, num_words=40):
  topic_words_raw = [x.strip().split('*')[-1] for x in topic.strip().split('+')]
  topic_words_mapped = [word_map.get(x.replace('"', '')) for x in topic_words_raw]
  topic_words = [min(x, key=len) if x else topic_words_raw[i] for i, x in enumerate(topic_words_mapped)]
  print(f"Topic {idx}: {topic}")
  topic_wise_words[idx] = topic_words

# print(topic_wise_words)
topic_words_json = os.path.join(drive_path, 'kialo_topics', 'topic_words.json')
with open(topic_words_json, 'w') as f:
  json.dump(topic_wise_words, f)

"""
# Get entities for each sentence and add for the topics
dictionary = gensim.corpora.Dictionary(all_docs)
for idx in tqdm(range(len(all_docs)), ascii=True):
  doc = all_docs[idx]
  sent = all_sents[idx]
  corpus = [dictionary.doc2bow(doc)]
  top_topics = (
      lda_model.get_document_topics(corpus, minimum_probability=0.0)
  )
  # Pick top topic for adding entities
  top_topic = sorted(top_topics[0], key=lambda x: x[1], reverse=True)[0]
  top_topic_id = top_topic[0]

  entities = []
  tk_sents = sent_tokenize(sent)
  for ss in tk_sents:
    e1, e2 = extract_entities(ss)
    entities.append((e1, e2))
  
  # Add to the current topic as a single entry
  topic_wise_entities[top_topic_id].append(entities)

topic_entities_json = os.path.join(drive_path, 'kialo_topics', 'topic_entities.json')
with open(topic_entities_json, 'w') as f:
  json.dump(topic_wise_entities, f)
"""

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


Topic 0: 0.094*"power" + 0.037*"intern" + 0.031*"countri" + 0.026*"ban" + 0.025*"illeg" + 0.024*"control" + 0.022*"cooper" + 0.022*"china" + 0.021*"threat" + 0.019*"conflict" + 0.018*"measur" + 0.014*"peac" + 0.013*"border" + 0.013*"check" + 0.013*"sign" + 0.012*"israel" + 0.012*"russia" + 0.011*"balanc" + 0.010*"govern" + 0.009*"turkey" + 0.008*"aggress" + 0.008*"regim" + 0.007*"bind" + 0.006*"influenc" + 0.006*"weak" + 0.006*"like" + 0.006*"blame" + 0.006*"council" + 0.006*"world" + 0.006*"trade" + 0.005*"enforc" + 0.005*"extern" + 0.005*"palestinian" + 0.005*"territori" + 0.005*"secur" + 0.005*"treati" + 0.005*"hostil" + 0.005*"allow" + 0.005*"order" + 0.004*"prevent"
Topic 1: 0.057*"space" + 0.047*"earth" + 0.033*"event" + 0.024*"destroy" + 0.023*"domest" + 0.021*"planet" + 0.021*"travel" + 0.019*"light" + 0.018*"time" + 0.016*"speci" + 0.014*"extinct" + 0.013*"convent" + 0.011*"carbon" + 0.011*"speed" + 0.010*"entertain" + 0.010*"explor" + 0.010*"caus" + 0.008*"endang" + 0.008*"mi

"\n# Get entities for each sentence and add for the topics\ndictionary = gensim.corpora.Dictionary(all_docs)\nfor idx in tqdm(range(len(all_docs)), ascii=True):\n  doc = all_docs[idx]\n  sent = all_sents[idx]\n  corpus = [dictionary.doc2bow(doc)]\n  top_topics = (\n      lda_model.get_document_topics(corpus, minimum_probability=0.0)\n  )\n  # Pick top topic for adding entities\n  top_topic = sorted(top_topics[0], key=lambda x: x[1], reverse=True)[0]\n  top_topic_id = top_topic[0]\n\n  entities = []\n  tk_sents = sent_tokenize(sent)\n  for ss in tk_sents:\n    e1, e2 = extract_entities(ss)\n    entities.append((e1, e2))\n  \n  # Add to the current topic as a single entry\n  topic_wise_entities[top_topic_id].append(entities)\n\ntopic_entities_json = os.path.join(drive_path, 'kialo_topics', 'topic_entities.json')\nwith open(topic_entities_json, 'w') as f:\n  json.dump(topic_wise_entities, f)\n"

In [8]:
import sys
import os
print(os.path.abspath('.'))
def generate_bow(input_sentence, aspect):
  # Use topic model to find input sentence's topic, get the words and entities matching
  # the ones in input sentence and use that BoW txt for inference.

  dictionary = gensim.corpora.Dictionary(all_docs)

  bow_topic = os.path.join(drive_path, 'PPLM', 'arg_gen', 'bow_topic.txt')
  # bow_ent = os.path.join(drive_path, 'PPLM', 'arg_gen', 'bow_ent.txt')

  tokens = sentence_to_seq(input_sentence)
  corpus = [dictionary.doc2bow(tokens)]
  top_topics = (
      lda_model.get_document_topics(corpus, minimum_probability=0.0)
  )
  # Pick top topic for adding entities
  top_topic = sorted(top_topics[0], key=lambda x: x[1], reverse=True)[0]
  top_topic_id = top_topic[0]

  words_l = topic_wise_words[top_topic_id]
  # Knowledge graph: g (find all relations with aspect word)
  # rel_aspect_l = g[aspect]
  # all_words = words_l + rel_aspect_l
  # ent_l = topic_wise_entities[top_topic_id]

  #with open(bow_topic, 'w') as f:
  #  f.write(f"{aspect}\n")
  #  for w in words_l[:-1]:
  #    f.write(f'{w}\n')
  #  f.write(f'{words_l[-1]}')

from subprocess import Popen, PIPE

def run_model(
    cond_text, grad_len=30, length=50, stepsize=0.01, kl_scale=0.09,
    num_samples=5, window_length=10, idx=1
):

  with open(os.path.join(drive_path, 'PPLM', f'arg_gen_outputs_{idx}.txt'), 'ab') as f:
    process = Popen([
      'python', 'run_pplm.py', '-B', './arg_gen/bow_topic.txt', '-D', 'generic', '--window_length', f'{window_length}',
      '--class_label', '0', '--cond_text', f'{cond_text}', '--grad_length', f'{grad_len}',
      '--length', f'{length}', '--gamma', '1.0', '--num_iterations', '5', '--num_samples', f'{num_samples}',
      '--stepsize', f'{stepsize}', '--kl_scale', f'{kl_scale}', '--gm_scale', '0.99', '--colorama',
      '--sample', '--discrim_weights', '/content/drive/MyDrive/PPLM/arg_gen/generic_classifier_head_epoch_8.pt',
      '--discrim_meta', '/content/drive/MyDrive/PPLM/arg_gen/generic_classifier_head_meta.json',
      '--verbosity', 'quiet'
    ], stdout=PIPE)
    for line in iter(process.stdout.readline, b""):
      sys.stdout.write(line)
      f.write(line)

  #os.system(
  #    f"python run_pplm.py -B ./arg_gen/bow_topic.txt -D generic \
  #     --class_label 0 --cond_text '{cond_text}' --grad_length {grad_len} \
  #     --length {length} --gamma 1.0 --num_iterations 5 --num_samples 5 \
  #     --stepsize {stepsize} --kl_scale {kl_scale} --gm_scale 0.99 --colorama \
  #     --sample --discrim_weights /content/drive/MyDrive/PPLM/arg_gen/generic_classifier_head_epoch_8.pt \
  #     --discrim_meta /content/drive/MyDrive/PPLM/arg_gen/generic_classifier_head_meta.json --verbosity quiet"
  #)

/content


In [9]:
os.chdir(os.path.join(drive_path, 'PPLM'))
print(os.getcwd())

/content/drive/MyDrive/PPLM


In [10]:
!pip install -r requirements.txt

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting torch==1.7.0
  Downloading torch-1.7.0-cp37-cp37m-manylinux1_x86_64.whl (776.7 MB)
[K     |████████████████████████████████| 776.7 MB 3.8 kB/s 
[?25hCollecting nltk==3.4.5
  Downloading nltk-3.4.5.zip (1.5 MB)
[K     |████████████████████████████████| 1.5 MB 45.0 MB/s 
[?25hCollecting colorama==0.4.4
  Downloading colorama-0.4.4-py2.py3-none-any.whl (16 kB)
Collecting transformers==3.4.0
  Downloading transformers-3.4.0-py3-none-any.whl (1.3 MB)
[K     |████████████████████████████████| 1.3 MB 45.1 MB/s 
[?25hCollecting torchtext==0.3.1
  Downloading torchtext-0.3.1-py3-none-any.whl (62 kB)
[K     |████████████████████████████████| 62 kB 794 kB/s 
Collecting dataclasses
  Downloading dataclasses-0.6-py3-none-any.whl (14 kB)
Collecting sacremoses
  Downloading sacremoses-0.0.53.tar.gz (880 kB)
[K     |████████████████████████████████| 880 kB 54.8 MB/s 
Collecting sentenc

In [None]:
i_sentences = [
    "Sensitive social and political topics should be discussed in school",
    "It's better to search bitter",
    "Organ donation should be mandatory",
    "World should adopt International Fixed Calendar",
    "Governments all around the world should push for 100% renewable energy as fast as possible",
    "The progression of AI and tech in the legal industry is useful for all parties",
    "Feminism empowers both women and men",
    "Astrology is a valid practice",
    "All our actions are selfish to some degree",
    "Journalists have a moral obligation to display violent images",
    "Conscientious objection to abortion should be banned",
    "We should spend our spare time helping others rather than in self-development",
    "Protecting individual data privacy is necessary for a healthy society.",
    "Politicians should spend a minimal amount of hours each year in charitable activities.",
    "Employees Should Disclose Their Mental Health Conditions In The Workplace",
    "Quantum physics is not for us to know the future",
    "Angels and demons exist in objective reality.",
    "It is impossible to say whether angels and demons exist.",
    "Election campaigns should only be funded by the government",
    "YouTube is enforcing censorship through the demonetization policy."
]

cond_text = [
    "Sensitive social and political topics should not",
    "searching should not",
    "Organ donation should not",
    "World should not adopt",
    "Governments all around the world should not push push",
    "The progression of AI and tech in the legal industry is not",
    "Feminism does not empower",
    "Astrology is not",
    "All our actions are not",
    "Journalists not",
    "Conscientious objection to abortion should not",
    "We should spend our spare time not to",
    "Protecting individual data privacy is no",
    "Politicians should not",
    "Employees Should not",
    "Quantum physics is",
    "Angels and demons are not",
    "angels and demons exist.",
    "Election campaigns should not",
    "YouTube is not"


]

aspects = [
    "school",
    "bitter",
    "save lives",
    "fixed",
    "renewable energy",
    "revolution",
    "social development",
    "space initiative",
    "self development",
    "information",
    "failure",
    "livelihood",
    "safe",
    "economic development",
    "safe workplace",
    "future",
    "real",
    "omniscious",
    "democracy",
    "censor board"
]

idx = 1
num_samples = 3
for i in tqdm(range(len(cond_text)), ascii=True):
  inp = i_sentences[i]
  cond = cond_text[i]
  aspect = aspects[i]
  
  generate_bow(inp, aspect=aspect)
  # Will generate num_samples perturbed samples for each input triplet
  # (inp, cond, aspect). Saved to ./arg_gen_outputs_{idx}.txt
  with open(os.path.join(drive_path, 'PPLM', f'arg_gen_outputs_{idx}.txt'), 'w') as f:
    f.write(f"Input: {inp}\n")
    f.write(f"Conditional text: {cond}\n")
    f.write(f"Aspect: {aspect}\n")
  run_model(cond, num_samples=num_samples, idx=idx)

  0%|          | 0/20 [00:00<?, ?it/s]

= Prefix of sentence =
<|endoftext|>Sensitive social and political topics should not

= Unperturbed generated text =
<|endoftext|>Sensitive social and political topics should not be discussed in school, says a report published by a British charity on Thursday.

The Child Rights Trust (CRT), which provides education advice for charities that provide free legal advice, said the issue of "sensitive social and political topics" should

= Perturbed generated text 1 =
<|endoftext|>Sensitive social and political topics should[31m not[0m get in the way of an excellent game! I am very pleased with[31m how[0m the game has been received and will be playing it on Saturday as soon as I can! I am hoping[31m that[0m you will enjoy the game too.

I think the main

= Perturbed generated text 2 =
<|endoftext|>Sensitive social and political topics should[31m not[0m be subject to censorship

"I don't want to be a martyr for freedom," is[31m how[0m I feel when I hear these statements. I do[31m n

  5%|5         | 1/20 [07:56<2:30:57, 476.73s/it]

= Prefix of sentence =
<|endoftext|>searching should not

= Unperturbed generated text =
<|endoftext|>searching should not be done in the context of "what to expect", "what will be good", "what's good" or the like. It must always be "what will make you want to do it".


This is especially true for searches where you

= Perturbed generated text 1 =
<|endoftext|>searching should[31m not[0m result in a return of an empty result

searching should[31m not[0m result in the return code of a function[31m not[0m found

Search for elements of certain types (if[31m any[0m) in[31m any[0m element type in the DOM

if (typeof (

= Perturbed generated text 2 =
<|endoftext|>searching should[31m not[0m take[31m any[0m more effort than searching with an actual keyword.

searching should[31m not[0m[31m need[0m to know[31m that[0m a particular keyword, for example an e-mail domain, has an exact match with another domain

searching should[31m not[0m ask a search engine

= Perturbed gen

 10%|#         | 2/20 [15:08<2:15:03, 450.18s/it]

= Prefix of sentence =
<|endoftext|>Organ donation should not

= Unperturbed generated text =
<|endoftext|>Organ donation should not be the sole purpose of life saving medical procedures, as it is for some procedures that are performed at hospitals.

Organ donation should only occur in the cases where the donor wishes to remain anonymous. In this way, the donor's identity remains unknown

= Perturbed generated text 1 =
<|endoftext|>Organ donation should[31m not[0m be used as a means to fund political campaigns.

I am a member of The Church Of Jesus Christ of Latter-Day Saints, and I don't think I've ever seen such a statement in the history of politics, or of American politics

= Perturbed generated text 2 =
<|endoftext|>Organ donation should[31m not[0m be a barrier to getting your child an early childhood education

In my view, the best way[31m that[0m we can provide[31m that[0m is to provide a child of[31m any[0m age[31m that[0m has[31m not[0m yet received[31m that[0m

 15%|#5        | 3/20 [22:40<2:07:51, 451.29s/it]

= Prefix of sentence =
<|endoftext|>World should not adopt

= Unperturbed generated text =
<|endoftext|>World should not adopt'sustainable' carbon-pricing scheme

World Bank: 'Sustainable' carbon-pricing scheme is 'not sustainable'

Sierra Club to host Climate Justice Summit on March 22 in New Orleans

'The world must

= Perturbed generated text 1 =
<|endoftext|>World should[31m not[0m adopt a "new" approach to global climate[31m change[0m.

I am talking about the current "new" approach to climate[31m change[0m which is one of "dramatic reductions in greenhouse gases[31m that[0m are[31m not[0m compatible with the future of life as we know it

= Perturbed generated text 2 =
<|endoftext|>World should[31m not[0m adopt "one nation under God," says former president

"I think the United Nations should be no more than the United States," he added. He said[31m that[0m "this world was created by our own hand."

"We should[31m not[0m be in

= Perturbed generated text 3 =
<|endoft

 20%|##        | 4/20 [30:21<2:01:20, 455.05s/it]

= Prefix of sentence =
<|endoftext|>Governments all around the world should not push push

= Unperturbed generated text =
<|endoftext|>Governments all around the world should not push push their own interests in an increasingly fragmented world in favour of the interests of one group of individuals. It is time for the world to move beyond the current economic models that seek to maximize wealth and privilege the wealthy at the expense of the majority of humanity.

= Perturbed generated text 1 =
<|endoftext|>Governments all around the world should[31m not[0m push push their economies towards zero and instead focus on creating jobs instead. In my view the[31m reason[0m[31m that[0m governments have[31m not[0m done so for a long time is[31m because[0m of the cost of doing business and the[31m fact[0m[31m that[0m they have[31m not[0m invested in new jobs or infrastructure. In

= Perturbed generated text 2 =
<|endoftext|>Governments all around the world should[31m not[0m pu

 25%|##5       | 5/20 [38:11<1:55:06, 460.44s/it]

= Prefix of sentence =
<|endoftext|>The progression of AI and tech in the legal industry is not

= Unperturbed generated text =
<|endoftext|>The progression of AI and tech in the legal industry is not just about creating machines that understand us better than we understand ourselves. It is about creating machines that can take a case to trial. That is what has been done in the AI courtroom.

The court has been called the "death penalty's court

= Perturbed generated text 1 =
<|endoftext|>The progression of AI and tech in the legal industry is[31m not[0m without controversy and challenges. In recent years AI and tech has been used in the legal industry in many cases. In a recent[31m case[0m, the judge of[31m that[0m[31m case[0m was a prominent attorney, who had a lot of experience in[31m this[0m field and he was a

= Perturbed generated text 2 =
<|endoftext|>The progression of AI and tech in the legal industry is[31m not[0m only accelerating, but also is changing our view o

 30%|###       | 6/20 [46:09<1:48:48, 466.31s/it]

= Prefix of sentence =
<|endoftext|>Feminism does not empower

= Unperturbed generated text =
<|endoftext|>Feminism does not empower men to abuse women, but to do it to each other. In fact, feminism is designed to empower women to abuse each other. And as a consequence, men are forced to abuse each other.

This is the core argument of The Myth

= Perturbed generated text 1 =
<|endoftext|>Feminism[31m does[0m[31m not[0m empower women to achieve their full potential and equality in[31m any[0m way is[31m not[0m about equality in[31m any[0m way whatsoever.

The feminist agenda[31m does[0m[31m not[0m include equality for all women or anyone in[31m any[0m way[31m that[0m they are[31m not[0m women. The only[31m reason[0m they call themselves a

= Perturbed generated text 2 =
<|endoftext|>Feminism[31m does[0m[31m not[0m empower women.

This is a[31m fact[0m,[31m not[0m a[31m fact[0moid,[31m not[0m a joke. If you are a feminist and you think "I know,[31m why[

 35%|###5      | 7/20 [53:43<1:40:11, 462.44s/it]

= Prefix of sentence =
<|endoftext|>Astrology is not

= Unperturbed generated text =
<|endoftext|>Astrology is not science, but it is an approach to observing how the heavens work. This is what astronomers do, and this is what we do with the data we gather.

Astrology is not a scientific approach, but it is an approach to observing how

= Perturbed generated text 1 =
<|endoftext|>Astrology is[31m not[0m just about stars or planets orbiting the sun. Astronomy has to be about the interactions between the physical laws of the universe, which is[31m how[0m we know[31m that[0m we are here in[31m this[0m universe at all. The laws[31m that[0m are in play have[31m not[0m changed much

= Perturbed generated text 2 =
<|endoftext|>Astrology is[31m not[0m science.

This is[31m not[0m an argument[31m that[0m can be made,[31m because[0m science is[31m not[0m science. It's[31m not[0m an argument[31m that[0m can be made,[31m because[0m science doesn't work like[31m that[0

 40%|####      | 8/20 [1:01:14<1:31:44, 458.70s/it]

= Prefix of sentence =
<|endoftext|>All our actions are not

= Unperturbed generated text =
<|endoftext|>All our actions are not just about money. They are about your health, your safety, and your future as a citizen of Australia.

If you want to know how you can support us and get involved, please click here for our information and links section.



= Perturbed generated text 1 =
<|endoftext|>All our actions are[31m not[0m just based on our minds.

In our daily lives, we act in the following ways:

• Thinking

We must learn to be more aware of the things[31m that[0m influence our thinking in the future.

• Action



= Perturbed generated text 2 =
<|endoftext|>All our actions are[31m not[0m only in our heads but also in our hands. If we don't make our decisions consciously then[31m how[0m can we ever learn from these choices? We should[31m not[0m be afraid of doing what is necessary. We have to choose and choose, and choose what will

= Perturbed generated text 3 =
<|endoftex

 45%|####5     | 9/20 [1:08:47<1:23:44, 456.79s/it]

= Prefix of sentence =
<|endoftext|>Journalists not

= Unperturbed generated text =
<|endoftext|>Journalists not affiliated with the news service have been instructed in how to conduct interviews on the streets of Paris.

The instructions to reporters were issued by the French interior minister, Manuel Valls.

The rules for the press are strict and the rules are

= Perturbed generated text 1 =
<|endoftext|>Journalists[31m not[0m only cover and write about the news of the day, but also provide information to the public on the issues which most affect them.

This is[31m because[0m we are a profession and a profession should[31m not[0m be treated like an afterthought or an afterthought

= Perturbed generated text 2 =
<|endoftext|>Journalists[31m not[0m only want access to the most important data, but they also want to make sure[31m that[0m the data they are publishing is as accurate as[31m possible[0m. We do[31m not[0m publish raw data for our analysis of the relationship be

 50%|#####     | 10/20 [1:16:14<1:15:39, 453.91s/it]

= Prefix of sentence =
<|endoftext|>Conscientious objection to abortion should not

= Unperturbed generated text =
<|endoftext|>Conscientious objection to abortion should not be defined as'religious' but rather as a'moral objection': it is 'not religious at all'.

The new guidelines, which come into force from 1 January next year, will allow abortion providers to choose to provide the procedure if they

= Perturbed generated text 1 =
<|endoftext|>Conscientious objection to abortion should[31m not[0m be based on religious belief, and should[31m not[0m be treated differently to[31m any[0m other conscientious objection.

In[31m this[0m[31m case[0m, the judge is correct in stating[31m that[0m the law is[31m not[0m clear,[31m that[0m there[31m could[0m be a legitimate[31m reason[0m[31m not[0m to have an abortion,

= Perturbed generated text 2 =
<|endoftext|>Conscientious objection to abortion should[31m not[0m be treated as an individual[31m matter[0m, as the court

 55%|#####5    | 11/20 [1:23:50<1:08:10, 454.47s/it]