# Detecting Propaganda in News Articles

In [48]:
# Required libraries
import csv
import openai
import tiktoken
import collections
import pickle
import glob
import os
import re


In [83]:
# Process the SemEval-2020 dataset

LABEL_MAP = {"Name_Calling": "Name calling",
            "Repetition": "Repetition",
            "Slogans": "Slogans",
            "Appeal_to_fear-prejudice": "Appeal to fear",
            "Doubt": "Doubt",
            "Exaggeration": "Exaggeration / minimization",
            "Flag-Waving": "Flag-Waving",
            "Loaded_Language": "Loaded Language",
            "Reductio_ad_hitlerum": "Reduction ad hitlerum",
            "Bandwagon": "Bandwagon",
            "Causal_Oversimplification": "Causal oversimplification",
            "Minimisation": "Exaggeration / minimization",
            "Appeal_to_Authority": "Appeal to authority",
            "Black-and-White_Fallacy": "Black & white fallacy",
            "Thought-terminating_Cliches": "Thought terminating clichés",
            "Red_Herring": "Red herring",
            "Straw_Men": "Straw men",
            "Whataboutism": "Whataboutism",
            "Labeling": "Name calling"}

LAP = "data/SemEval-2020_Task_11/articles/*.txt"
LA = "data/SemEval-2020_Task_11/labels/"
SUFFIX = ".task-flc-tc.labels"
semeval = collections.defaultdict(list)

for article in glob.glob(LAP):
    # Only include articles with less than 300 characters
    with open(article, "r", encoding="utf-8") as f:
        article_content = f.read()
        article_name = os.path.basename(article).split('.')[0]
        if len(article_content) < 3000:
            # Get the labels for each article, requires some formatting
            label = f"{LA}{article_name}{SUFFIX}"
            with open(label, "r", encoding="utf-8") as l:
                labels = l.read()
                if len(labels) > 0:
                    lab_list = set()
                    for i in labels.split("\t"):
                        # Ignore article ID and label positiong
                        if not re.search(r"\d+", i):
                            [lab_list.add(LABEL_MAP[j]) if "," in i else lab_list.add(LABEL_MAP[i]) for j in i.split(",")]
                    semeval[article_name].append(lab_list)

for k, v in semeval.items():
    print(k, v)


article111111111 [{'Repetition', 'Appeal to fear', 'Doubt', 'Appeal to authority'}]
article111111114 [{'Appeal to fear'}]
article111111115 [{'Appeal to authority', 'Loaded Language'}]
article111111117 [{'Causal oversimplification'}]
article111111134 [{'Repetition', 'Exaggeration / minimization', 'Name calling'}]
article111111136 [{'Flag-Waving', 'Loaded Language'}]
article694811415 [{'Reduction ad hitlerum', 'Thought terminating clichés', 'Black & white fallacy', 'Appeal to authority', 'Bandwagon', 'Doubt'}]
article697063039 [{'Exaggeration / minimization', 'Thought terminating clichés', 'Name calling', 'Loaded Language', 'Flag-Waving'}]
article697959084 [{'Repetition', 'Thought terminating clichés', 'Loaded Language', 'Flag-Waving', 'Doubt'}]
article697996062 [{'Exaggeration / minimization', 'Name calling'}]
article698780559 [{'Repetition', 'Doubt', 'Loaded Language'}]
article701299732 [{'Appeal to fear', 'Appeal to authority', 'Name calling'}]
article701447437 [{'Exaggeration / minim

[{'Appeal to authority',
  'Bandwagon',
  'Black & white fallacy',
  'Doubt',
  'Reduction ad hitlerum',
  'Thought terminating clichés'}]

In [2]:
# Process RT dataset and add contents to filtered 2D array + get API key
DATA_PATH = "data/news_articles.csv"
KEY_PATH = "data/openai_key.txt"
MODEL = "gpt-3.5-turbo"
ENC = tiktoken.encoding_for_model(MODEL)
# Set a cut-off point to ensure enough tokens are available
CUTOFF = 3000
articles = []

API_KEY = open(KEY_PATH).readline()
openai.api_key = API_KEY
with open(DATA_PATH, "r", encoding="utf-8") as d:
    for row in csv.reader(d, delimiter=",", quotechar='"'):
            if len(ENC.encode(row[4])) <= CUTOFF:
                articles.append(row)
# (Optional) remove header
articles.pop(0)

print(f"Total articles in data set: {len(articles)} / 3702")


Total articles in data set: 3652 / 3702


In [None]:
# Example article content
print(articles[0][4])


# Importing the dataset

In [3]:
# Load the data structure instead of creating it from scratch (saves credits and time)
with open("data/first10_article_outputs.pickle", "rb") as r:
    responses = pickle.load(r)


In [71]:
# Analyse data
for article_id, output in responses.items():
    print(article_id)
    print(output)
    

0
[{'Name calling': 'No'}, {'Repetition': 'No'}, {'Slogans': 'No'}, {'Appeal to fear': 'Yes', 'Explanation': 'The article suggests that Russia has plans to commit human rights violations and abuses in Ukraine, which could lead to "widespread human suffering."'}, {'Doubt': 'No'}, {'Exaggeration / minimization': 'No'}, {'Flag-Waving': 'No'}, {'Loaded Language': 'Yes', 'Explanation': 'The article uses phrases such as "human rights catastrophe," "targeted killings," and "lethal measures" to appeal to emotions and create a sense of urgency.'}, {'Reduction ad hitlerum': 'No'}, {'Bandwagon': 'No'}, {'Casual oversimplification': 'No'}, {'Obfuscation, intentional vagueness': 'No'}, {'Appeal to authority': 'Yes', 'Explanation': 'The article cites US Ambassador to the UN Bathsheba Crocker and UN High Commissioner for Human Rights Michelle Bachelet as sources of information.'}, {'Black & white fallacy': 'No'}, {'Thought terminating clichés': 'No'}, {'Red herring': 'No'}, {'Straw men': 'No'}, {'Wha

# Building dataset from scratch

In [120]:
# Send a prompt and receive output for the first 10 articles
# Not necessary if .pickle file present
articles_outputs = {}

for c, article in enumerate(articles):
    if c <= 10:
        article_prompt = f"""
The following are a list of propaganda techniques and their definitions:
                        
Name calling - Attack an object/subject of the propaganda with an insulting label.
Repetition - Repeat the same message over and over.
Slogans - Use a brief and memorable phrase.
Appeal to fear - Support an idea by instilling fear against other alternatives.
Doubt - Questioning the credibility of someone/something.
Exaggeration / minimization - Exaggerate or minimize something.
Flag-Waving - Appeal to patriotism or identity.
Loaded Language - Appeal to emotions or stereotypes.
Reduction ad hitlerum - Disapprove an idea suggesting it is popular with groups hated by the audience.
Bandwagon - Appeal to the popularity of an idea.
Causal oversimplification - Assume a simple cause for a complex event.
Obfuscation, intentional vagueness - Use deliberately unclear and obscure expressions to confuse the audience.
Appeal to authority - Use authority’s support as evidence.
Black & white fallacy - Present only two options among many.
Thought terminating clichés - Phrases that discourage critical thought and meaningful discussions.
Red herring - Introduce irrelevant material to distract.
Straw men - Refute argument that was not presented.
Whataboutism - Charging an opponent with hypocrisy.

For each of the techniques and according to its definition, answer with a yes or no if the technique is being used in the following text and with an example from the text if present. This should take the exact form of: "Propaganda technique - Yes or No - Explanation".

{article[4]}

Lastly, give a final verdict on whether the text is propaganda stating a percentage likelihood on the text being propaganda followed by a detailed explanation. This should take the form of: "Verdict - Number% - Explanation".
"""
        completion = openai.ChatCompletion.create(
            model=MODEL,
            messages=[
                {"role": "user", "content": article_prompt}
            ],
            temperature=0
        )
        output = completion.choices[0].message.content
        print(output)
        articles_outputs[article[0]] = output

    
    

Name calling - No
Repetition - No
Slogans - No
Appeal to fear - Yes - The article suggests that Russia has plans to commit human rights violations and abuses in Ukraine, which could lead to "widespread human suffering."
Doubt - No
Exaggeration / minimization - No
Flag-Waving - No
Loaded Language - Yes - The article uses phrases such as "human rights catastrophe," "targeted killings," and "lethal measures" to appeal to emotions and create a sense of urgency.
Reduction ad hitlerum - No
Bandwagon - No
Casual oversimplification - No
Obfuscation, intentional vagueness - No
Appeal to authority - Yes - The article cites US Ambassador to the UN Bathsheba Crocker and UN High Commissioner for Human Rights Michelle Bachelet as sources of information.
Black & white fallacy - No
Thought terminating clichés - No
Red herring - No
Straw men - No
Whataboutism - No

Verdict - 30% - While the article does contain some propaganda techniques, such as an appeal to fear and loaded language, the majority of t

In [154]:
# Structure the formatted output into a list of dictionaries
# Example: {8: {'Appeal to fear': 'Yes', 'Explanation': 'The text mentions the fear of a Ukrainian invasion and the escalation of the situation in the region.'}}

# TO DO: handle edge cases e.g., 'Yes (...)'
responses = collections.defaultdict(list)

def output_to_dict(article_id, output):
    for i in output.split("\n"):
        if "Verdict" in i:
            x = i.split(" - ")
            responses[article_id].append({"Verdict": x[1], "Explanation": x[2]})
        else:
            if i != "":
                x = i.split(" - ")
                if len(x) < 3:
                    responses[article_id].append({x[0]: x[1]})
                else:
                    responses[article_id].append({x[0]: x[1], "Explanation": x[2]})

# Convert the output into the structured data using the helper function
for k, v in articles_outputs.items():
    output_to_dict(k, v)

In [None]:
# Save the data structure
with open("data/first10_article_outputs.pickle", "wb") as w:
    pickle.dump(responses, w, protocol=pickle.HIGHEST_PROTOCOL)


# Example iteration using a single article from the dataset

In [4]:
# OpenAI API Example
# article111111111 (formatted to remove new lines)
article_prompt = f"""
The following are a list of propaganda techniques and their definitions:

Name calling - Attack an object/subject of the propaganda with an insulting label.
Repetition - Repeat the same message over and over.
Slogans - Use a brief and memorable phrase.
Appeal to fear - Support an idea by instilling fear against other alternatives.
Doubt - Questioning the credibility of someone/something.
Exaggeration / minimization - Exaggerate or minimize something.
Flag-Waving - Appeal to patriotism or identity.
Loaded Language - Appeal to emotions or stereotypes.
Reduction ad hitlerum - Disapprove an idea suggesting it is popular with groups hated by the audience.
Bandwagon - Appeal to the popularity of an idea.
Casual oversimplification - Assume a simple cause for a complex event.
Obfuscation, intentional vagueness - Use deliberately unclear and obscure expressions to confuse the audience.
Appeal to authority - Use authority’s support as evidence.
Black & white fallacy - Present only two options among many.
Thought terminating clichés - Phrases that discourage critical thought and meaningful discussions.
Red herring - Introduce irrelevant material to distract.
Straw men - Refute argument that was not presented.
Whataboutism - Charging an opponent with hypocrisy.

For each of the techniques and according to its definition, answer with either yes or no if the technique is being used in the following text and with an example from the text if present. This should take the form of: "Propaganda technique - Yes or No - Example".

"Next plague outbreak in Madagascar could be 'stronger': WHO Geneva - The World Health Organisation chief on Wednesday said a deadly plague epidemic appeared to have been brought under control in Madagascar, but warned the next outbreak would likely be stronger.
"The next transmission could be more pronounced or stronger," WHO Director-General Tedros Adhanom Ghebreyesus told reporters in Geneva, insisting that "the issue is serious."
An outbreak of both bubonic plague, which is spread by infected rats via flea bites, and pneumonic plague, spread person to person, has killed more than 200 people in the Indian Ocean island nation since August.
Madagascar has suffered bubonic plague outbreaks almost every year since 1980, often caused by rats fleeing forest fires.
The disease tends to make a comeback each hot rainy season, from September to April.
On average, between 300 and 600 infections are recorded every year among a population approaching 25 million people, according to a UN estimate.
But Tedros voiced alarm that "plague in Madagascar behaved in a very, very different way this year."
Cases sprang up far earlier than usual and, instead of being confined to the countryside, the disease infiltrated towns.
The authorities recorded more than 2 000 cases, and Tedros said Wednesday the death toll stood at 207.
He also pointed to the presence of the pneumonic version, which spreads more easily and is more virulent, in the latest outbreak.
He praised the rapid response from WHO and Madagascar authorities that helped bring the outbreak under control, but warned that the danger was not over.
The larger-than-usual outbreak had helped spread the bacteria that causes the plague more widely.
This along with poor sanitation and vector control on Madagascar meant that "when (the plague) comes again it starts from more stock, and the magnitude in the next transmission could be higher than the one that we saw," Tedros said.
"That means that Madagascar could be affected more, and not only that, it could even spill over into neighbouring countries and beyond," he warned.
Complicating vector control is the fact that the fleas that carry the Yersinia pestis bacteria that causes the plague have proven to be widely resistant to chemicals and insecticides.
"That's a dangerous combination," Tedros said."

Lastly, give a final verdict on whether the text is propaganda stating a percentage likelihood on the text being propaganda followed by a detailed explanation. This should take the form of: "Verdict - Number% - Explanation".
"""

completion = openai.ChatCompletion.create(
    model=MODEL,
    messages=[
        {"role": "user", "content": article_prompt}
    ],
    temperature=0
)

output = completion.choices[0].message.content
print(output)

Name calling - No
Slogans - No
Appeal to fear - Yes - The article warns of a potentially stronger and more widespread outbreak in the future, instilling fear in the reader.
Doubt - No
Exaggeration / minimization - No
Flag-Waving - No
Loaded Language - No
Reduction ad hitlerum - No
Bandwagon - No
Casual oversimplification - No
Obfuscation, intentional vagueness - No
Appeal to authority - Yes - The article quotes the WHO Director-General as an authority on the subject.
Black & white fallacy - No
Thought terminating clichés - No
Red herring - No
Straw men - No
Whataboutism - No



# Token usage

In [3]:
# Get required tokens of example prompt
# Max tokens for prompt and response cannot exceed 4096 tokens
# Prompt without article text comes to 303 tokens

EXAMPLE_PROMPT = f"""
The following are a list of propaganda techniques and their definitions:

Name calling - Attack an object/subject of the propaganda with an insulting label.
Repetition - Repeat the same message over and over.
Slogans - Use a brief and memorable phrase.
Appeal to fear - Support an idea by instilling fear against other alternatives.
Doubt - Questioning the credibility of someone/something.
Exaggeration / minimization - Exaggerate or minimize something.
Flag-Waving - Appeal to patriotism or identity.
Loaded Language - Appeal to emotions or stereotypes.
Reduction ad hitlerum - Disapprove an idea suggesting it is popular with groups hated by the audience.
Bandwagon - Appeal to the popularity of an idea.
Casual oversimplification - Assume a simple cause for a complex event.
Obfuscation, intentional vagueness - Use deliberately unclear and obscure expressions to confuse the audience.
Appeal to authority - Use authority’s support as evidence.
Black & white fallacy - Present only two options among many.
Thought terminating clichés - Phrases that discourage critical thought and meaningful discussions.
Red herring - Introduce irrelevant material to distract.
Straw men - Refute argument that was not presented.
Whataboutism - Charging an opponent with hypocrisy.

For each of the techniques and according to its definition, answer with a yes or no if the technique is being used in the following text and with an example from the text:

{articles[0][4]}

Lastly, give a final verdict on whether the text is propaganda.
"""
prompt_tokens = len(ENC.encode(EXAMPLE_PROMPT))


In [4]:
# Example response comes to 602 credits
EXAMPLE_RESPONSE = """
Name calling: No. There is no use of insulting labels towards any object/subject in the text.
Repetition: No. The same message is not repeated over and over.
Slogans: No. There is no use of a brief and memorable phrase in the text.
Appeal to fear: Yes. The text uses fear as a means to support the idea of breaking away from Ukraine and seeking military and financial support from Russia. For example, the text states, "amid what they claim is a sharp spike in hostilities, and have ordered the mobilization of all able-bodied men to be ready to fight in a potential conflict."
Doubt: No. There is no questioning of the credibility of anyone or anything in the text.
Exaggeration/minimization: No. There is no exaggeration or minimization of anything in the text.
Flag-Waving: Yes. The text appeals to the patriotism of the breakaway republics and their desire for independence from Ukraine. For example, the text states, "In statements released simultaneously on Tuesday, the parliament of the Donetsk People’s Republic and Lugansk’s People's Council declared that the decision to ratify the ‘Treaty of Friendship and Cooperation with the Russian Federation’ had been passed unanimously by both assemblies."
Loaded Language: No. There is no use of language that appeals to emotions or stereotypes in the text.
Reduction ad Hitlerum: No. There is no disapproval of an idea by suggesting it is popular with groups hated by the audience in the text.
Bandwagon: No. There is no appeal to the popularity of an idea in the text.
Casual oversimplification: No. The text does not assume a simple cause for a complex event.
Obfuscation, intentional vagueness: No. The text is not deliberately unclear or obscure.
Appeal to authority: Yes. The text uses Putin's support as evidence for the breakaway republics' independence. For example, the text states, "The move, he said, was in response to years of fighting in Ukraine’s war-torn east and Kiev’s attempts to “drag foreign states into conflict with our country” with its efforts to join NATO."
Black & white fallacy: No. The text presents more than two options.
Thought terminating clichés: No. The text does not contain phrases that discourage critical thought or meaningful discussions.
Red herring: No. The text does not introduce irrelevant material to distract.
Straw men: No. There is no refutation of an argument that was not presented in the text.
Whataboutism: No. There is no charging of an opponent with hypocrisy in the text.
Verdict: The text does contain some propaganda techniques, such as an appeal to fear and an appeal to patriotism. However, the text primarily consists of factual reporting on recent events, and these propaganda techniques are used in a relatively mild and subtle way. Overall, the text is more informative than propagandistic.
"""
resp_tokens = len(ENC.encode(EXAMPLE_RESPONSE))
print(f"Total tokens required: {prompt_tokens + resp_tokens}")


Total tokens required: 1381


# Notes
* Quotes don't work - consumes mass amount of tokens causing broken behaviour.
* The higher the complexity of the output instruction, the more inconsistent the results are. For example, asking to list only the propaganda techniques that are present with a yes / no + quote + explanation causes it to be far too liberal with identifying techniques.
* C