# Detecting Propaganda in News Articles

In [145]:
# Required libraries
import csv
import openai
import tiktoken
import collections
import pickle


In [2]:
# Read dataset and add contents to filtered 2D array + get API key
DATA_PATH = "data/news_articles.csv"
KEY_PATH = "data/openai_key.txt"
MODEL = "gpt-3.5-turbo"
ENC = tiktoken.encoding_for_model(MODEL)
# Set a cut-off point to ensure enough tokens are available
CUTOFF = 3000
articles = []

API_KEY = open(KEY_PATH).readline()
openai.api_key = API_KEY
with open(DATA_PATH, "r", encoding="utf-8") as d:
    for row in csv.reader(d, delimiter=",", quotechar='"'):
            if len(ENC.encode(row[4])) <= CUTOFF:
                articles.append(row)
# (Optional) remove header
articles.pop(0)

print(f"Total articles in data set: {len(articles)} / 3702")


Total articles in data set: 3652 / 3702


In [None]:
# Example article content
print(articles[0][4])


# Importing the dataset

In [None]:
# Load the data structure instead of creating it from scratch (saves credits and time)
with open("data/first10_article_outputs.pickle", "rb") as r:
    responses = pickle.load(r)


In [166]:
# Get all verdicts
for article_id, output in responses.items():
    print(output[-1])

0 19
{'Verdict': '30%', 'Explanation': 'While the article does contain some propaganda techniques, such as an appeal to fear and loaded language, the majority of the text is focused on reporting allegations made by US Ambassador to the UN Bathsheba Crocker. The article provides quotes from both Crocker and Kremlin spokesman Dmitry Peskov, allowing readers to form their own opinions on the matter. However, the article\'s headline ("Russia has drawn up a list of Ukrainians to be killed or sent to camps, says US diplomat") could be seen as sensationalist and potentially misleading.'}
1 19
{'Verdict': '20%', 'Explanation': 'While the text does use some propaganda techniques, such as repetition and appeal to fear, the overall purpose of the text is to report on the statements made by US Secretary of State Antony Blinken regarding the situation in Ukraine and the potential response from the US. The text does not appear to have a clear agenda or bias towards a particular viewpoint.'}
2 19
{'V

# Building dataset from scratch

In [120]:
# Send a prompt and receive output for the first 10 articles
# Not necessary if .pickle file present
articles_outputs = {}

for c, article in enumerate(articles):
    if c <= 10:
        article_prompt = f"""
The following are a list of propaganda techniques and their definitions:
                        
Name calling - Attack an object/subject of the propaganda with an insulting label.
Repetition - Repeat the same message over and over.
Slogans - Use a brief and memorable phrase.
Appeal to fear - Support an idea by instilling fear against other alternatives.
Doubt - Questioning the credibility of someone/something.
Exaggeration / minimization - Exaggerate or minimize something.
Flag-Waving - Appeal to patriotism or identity.
Loaded Language - Appeal to emotions or stereotypes.
Reduction ad hitlerum - Disapprove an idea suggesting it is popular with groups hated by the audience.
Bandwagon - Appeal to the popularity of an idea.
Casual oversimplification - Assume a simple cause for a complex event.
Obfuscation, intentional vagueness - Use deliberately unclear and obscure expressions to confuse the audience.
Appeal to authority - Use authority’s support as evidence.
Black & white fallacy - Present only two options among many.
Thought terminating clichés - Phrases that discourage critical thought and meaningful discussions.
Red herring - Introduce irrelevant material to distract.
Straw men - Refute argument that was not presented.
Whataboutism - Charging an opponent with hypocrisy.

For each of the techniques and according to its definition, answer with a yes or no if the technique is being used in the following text and with an example from the text if present. This should take the form of: "Propaganda technique - Yes or No - Explanation".

{article[4]}

Lastly, give a final verdict on whether the text is propaganda stating a percentage likelihood on the text being propaganda followed by a detailed explanation. This should take the form of: "Verdict - Number% - Explanation".
"""
        completion = openai.ChatCompletion.create(
            model=MODEL,
            messages=[
                {"role": "user", "content": article_prompt}
            ],
            temperature=0
        )
        output = completion.choices[0].message.content
        print(output)
        articles_outputs[article[0]] = output

    
    

Name calling - No
Repetition - No
Slogans - No
Appeal to fear - Yes - The article suggests that Russia has plans to commit human rights violations and abuses in Ukraine, which could lead to "widespread human suffering."
Doubt - No
Exaggeration / minimization - No
Flag-Waving - No
Loaded Language - Yes - The article uses phrases such as "human rights catastrophe," "targeted killings," and "lethal measures" to appeal to emotions and create a sense of urgency.
Reduction ad hitlerum - No
Bandwagon - No
Casual oversimplification - No
Obfuscation, intentional vagueness - No
Appeal to authority - Yes - The article cites US Ambassador to the UN Bathsheba Crocker and UN High Commissioner for Human Rights Michelle Bachelet as sources of information.
Black & white fallacy - No
Thought terminating clichés - No
Red herring - No
Straw men - No
Whataboutism - No

Verdict - 30% - While the article does contain some propaganda techniques, such as an appeal to fear and loaded language, the majority of t

In [154]:
# Structure the formatted output into a list of dictionaries
# Example: {8: {'Appeal to fear': 'Yes', 'Explanation': 'The text mentions the fear of a Ukrainian invasion and the escalation of the situation in the region.'}}

# TO DO: handle edge cases e.g., 'Yes (...)'
responses = collections.defaultdict(list)

def output_to_dict(article_id, output):
    for i in output.split("\n"):
        if "Verdict" in i:
            x = i.split(" - ")
            responses[article_id].append({"Verdict": x[1], "Explanation": x[2]})
        else:
            if i != "":
                x = i.split(" - ")
                if len(x) < 3:
                    responses[article_id].append({x[0]: x[1]})
                else:
                    responses[article_id].append({x[0]: x[1], "Explanation": x[2]})

# Convert the output into the structured data using the helper function
for k, v in articles_outputs.items():
    output_to_dict(k, v)

In [None]:
# Save the data structure
with open("data/first10_article_outputs.pickle", "wb") as w:
    pickle.dump(responses, w, protocol=pickle.HIGHEST_PROTOCOL)


# Token usage

In [3]:
# Get required tokens of example prompt
# Max tokens for prompt and response cannot exceed 4096 tokens
# Prompt without article text comes to 303 tokens

EXAMPLE_PROMPT = f"""
The following are a list of propaganda techniques and their definitions:

Name calling - Attack an object/subject of the propaganda with an insulting label.
Repetition - Repeat the same message over and over.
Slogans - Use a brief and memorable phrase.
Appeal to fear - Support an idea by instilling fear against other alternatives.
Doubt - Questioning the credibility of someone/something.
Exaggeration / minimization - Exaggerate or minimize something.
Flag-Waving - Appeal to patriotism or identity.
Loaded Language - Appeal to emotions or stereotypes.
Reduction ad hitlerum - Disapprove an idea suggesting it is popular with groups hated by the audience.
Bandwagon - Appeal to the popularity of an idea.
Casual oversimplification - Assume a simple cause for a complex event.
Obfuscation, intentional vagueness - Use deliberately unclear and obscure expressions to confuse the audience.
Appeal to authority - Use authority’s support as evidence.
Black & white fallacy - Present only two options among many.
Thought terminating clichés - Phrases that discourage critical thought and meaningful discussions.
Red herring - Introduce irrelevant material to distract.
Straw men - Refute argument that was not presented.
Whataboutism - Charging an opponent with hypocrisy.

For each of the techniques and according to its definition, answer with a yes or no if the technique is being used in the following text and with an example from the text:

{articles[0][4]}

Lastly, give a final verdict on whether the text is propaganda.
"""
prompt_tokens = len(ENC.encode(EXAMPLE_PROMPT))


In [4]:
# Example response comes to 602 credits
EXAMPLE_RESPONSE = """
Name calling: No. There is no use of insulting labels towards any object/subject in the text.
Repetition: No. The same message is not repeated over and over.
Slogans: No. There is no use of a brief and memorable phrase in the text.
Appeal to fear: Yes. The text uses fear as a means to support the idea of breaking away from Ukraine and seeking military and financial support from Russia. For example, the text states, "amid what they claim is a sharp spike in hostilities, and have ordered the mobilization of all able-bodied men to be ready to fight in a potential conflict."
Doubt: No. There is no questioning of the credibility of anyone or anything in the text.
Exaggeration/minimization: No. There is no exaggeration or minimization of anything in the text.
Flag-Waving: Yes. The text appeals to the patriotism of the breakaway republics and their desire for independence from Ukraine. For example, the text states, "In statements released simultaneously on Tuesday, the parliament of the Donetsk People’s Republic and Lugansk’s People's Council declared that the decision to ratify the ‘Treaty of Friendship and Cooperation with the Russian Federation’ had been passed unanimously by both assemblies."
Loaded Language: No. There is no use of language that appeals to emotions or stereotypes in the text.
Reduction ad Hitlerum: No. There is no disapproval of an idea by suggesting it is popular with groups hated by the audience in the text.
Bandwagon: No. There is no appeal to the popularity of an idea in the text.
Casual oversimplification: No. The text does not assume a simple cause for a complex event.
Obfuscation, intentional vagueness: No. The text is not deliberately unclear or obscure.
Appeal to authority: Yes. The text uses Putin's support as evidence for the breakaway republics' independence. For example, the text states, "The move, he said, was in response to years of fighting in Ukraine’s war-torn east and Kiev’s attempts to “drag foreign states into conflict with our country” with its efforts to join NATO."
Black & white fallacy: No. The text presents more than two options.
Thought terminating clichés: No. The text does not contain phrases that discourage critical thought or meaningful discussions.
Red herring: No. The text does not introduce irrelevant material to distract.
Straw men: No. There is no refutation of an argument that was not presented in the text.
Whataboutism: No. There is no charging of an opponent with hypocrisy in the text.
Verdict: The text does contain some propaganda techniques, such as an appeal to fear and an appeal to patriotism. However, the text primarily consists of factual reporting on recent events, and these propaganda techniques are used in a relatively mild and subtle way. Overall, the text is more informative than propagandistic.
"""
resp_tokens = len(ENC.encode(EXAMPLE_RESPONSE))
print(f"Total tokens required: {prompt_tokens + resp_tokens}")


Total tokens required: 1381


# Example iteration using a single article from the dataset

In [66]:
# OpenAI API Example
article_prompt = f"""
The following are a list of propaganda techniques and their definitions:

Name calling - Attack an object/subject of the propaganda with an insulting label.
Repetition - Repeat the same message over and over.
Slogans - Use a brief and memorable phrase.
Appeal to fear - Support an idea by instilling fear against other alternatives.
Doubt - Questioning the credibility of someone/something.
Exaggeration / minimization - Exaggerate or minimize something.
Flag-Waving - Appeal to patriotism or identity.
Loaded Language - Appeal to emotions or stereotypes.
Reduction ad hitlerum - Disapprove an idea suggesting it is popular with groups hated by the audience.
Bandwagon - Appeal to the popularity of an idea.
Casual oversimplification - Assume a simple cause for a complex event.
Obfuscation, intentional vagueness - Use deliberately unclear and obscure expressions to confuse the audience.
Appeal to authority - Use authority’s support as evidence.
Black & white fallacy - Present only two options among many.
Thought terminating clichés - Phrases that discourage critical thought and meaningful discussions.
Red herring - Introduce irrelevant material to distract.
Straw men - Refute argument that was not presented.
Whataboutism - Charging an opponent with hypocrisy.

For each of the techniques and according to its definition, answer with a yes or no if the technique is being used in the following text and with an example from the text if present. This should take the form of: "Propaganda technique - Yes or No - Explanation".

{articles[0][4]}

Lastly, give a final verdict on whether the text is propaganda stating a percentage likelihood on the text being propaganda followed by a detailed explanation. This should take the form of: "Verdict - Number% - Explanation".
"""

completion = openai.ChatCompletion.create(
    model=MODEL,
    messages=[
        {"role": "user", "content": article_prompt}
    ],
    temperature=0
)

output = completion.choices[0].message.content
print(output)

Name calling - No
Repetition - No
Slogans - No
Appeal to fear - Yes - The article suggests that Russia has plans to commit human rights violations and abuses in Ukraine, which could lead to "widespread human suffering."
Doubt - No
Exaggeration / minimization - No
Flag-Waving - No
Loaded Language - Yes - The article uses phrases such as "human rights catastrophe," "targeted killings," and "lethal measures" to appeal to emotions and create a sense of urgency.
Reduction ad hitlerum - No
Bandwagon - No
Casual oversimplification - No
Obfuscation, intentional vagueness - No
Appeal to authority - Yes - The article cites US Ambassador to the UN Bathsheba Crocker and UN High Commissioner for Human Rights Michelle Bachelet as sources of information.
Black & white fallacy - No
Thought terminating clichés - No
Red herring - No
Straw men - No
Whataboutism - No

Verdict - 30% - While the article does contain some propaganda techniques, such as an appeal to fear and the use of loaded language, the ma