In [1]:
import pandas as pd
import os

data_dir = ".."

clusters_path = os.path.join(data_dir, "other_outputs", "weaponization_analysis", "UPDATED_clusters_with_weaponization_techniques.csv")
cluster_topics_path = os.path.join(data_dir, "other_outputs", "weaponization_analysis", "UPDATED_cluster_topics_with_weaponization_techniques.csv")
general_topics_path = os.path.join(data_dir, "other_outputs", "weaponization_analysis", "UPDATED_general_topics_with_weaponization_techniques.csv")

clusters_with_techniques = pd.read_csv(clusters_path)
cluster_topics_with_techniques = pd.read_csv(cluster_topics_path)
general_topics_with_techniques = pd.read_csv(general_topics_path)


## PART I: ANALYSIS W/ MORE FINEGRAINED CATEGORIES

#### 1. Clusters Exploration

In [21]:
# for clusters_with_techniques, for EACH cluster, count and print out the occurences/number of entries of each unique weaponization technique in each cluster
def count_unique_techniques(df):
    for cluster in df['cluster'].unique():
        cluster_df = df[df['cluster'] == cluster]
        technique_counts = cluster_df['weaponization_technique'].value_counts()
        print(f"Cluster {cluster}, {len(cluster_df)} entries:")
        for technique, count in technique_counts.items():
            print(f"  Technique: {technique}, Count: {count}")
        print()

count_unique_techniques(clusters_with_techniques)


Cluster 0, 35 entries:
  Technique: Terminology Biasing, Count: 14
  Technique: Glorification & Vilification, Count: 9
  Technique: Selective Omission, Count: 7
  Technique: Selective Insertion, Count: 3
  Technique: Tag Manipulation, Count: 1
  Technique: Euphemism and Doublespeak, Count: 1

Cluster 1, 1004 entries:
  Technique: Terminology Biasing, Count: 423
  Technique: Selective Omission, Count: 286
  Technique: Glorification & Vilification, Count: 118
  Technique: Selective Insertion, Count: 92
  Technique: Tag Manipulation, Count: 31
  Technique: Euphemism and Doublespeak, Count: 25
  Technique: Source Biasing, Count: 15
  Technique: Timeline Rewriting, Count: 8
  Technique: Citation Deletion, Count: 4
  Technique: Select Insertion, Count: 1
  Technique: Selectively Omission, Count: 1

Cluster 2, 49 entries:
  Technique: Selective Omission, Count: 21
  Technique: Terminology Biasing, Count: 15
  Technique: Glorification & Vilification, Count: 8
  Technique: Selective Insertion, 

In [20]:
# explore any given cluster in clusters_with_techniques
def explore_cluster(df, cluster_number):    
    cluster_df = df[df['cluster'] == cluster_number]
    print(f"Cluster {cluster_number}, {len(cluster_df)} entries:")
    technique_counts = cluster_df['weaponization_technique'].value_counts()
    for technique, count in technique_counts.items():
        print(f"  Technique: {technique}, Count: {count}")
    print()

explore_cluster(clusters_with_techniques, 0)

Cluster 0, 35 entries:
  Technique: Terminology Biasing, Count: 14
  Technique: Glorification & Vilification, Count: 9
  Technique: Selective Omission, Count: 7
  Technique: Selective Insertion, Count: 3
  Technique: Tag Manipulation, Count: 1
  Technique: Euphemism and Doublespeak, Count: 1



In [19]:
# explore all entries of any given cluster in clusters_with_techniques
def explore_cluster_entries(df, cluster_number):    
    cluster_df = df[df['cluster'] == cluster_number]
    for index, row in cluster_df.iterrows():
        print("Chosen manipulation technique:", row['weaponization_technique'])
        print("Original text:", row['original_text'])
        print()

explore_cluster_entries(clusters_with_techniques, 1)

Chosen manipulation technique: Terminology Biasing
Original text: The revision includes a significant shift in language regarding the recognition of the Armenian Genocide. The phrase "States that deny there was an Armenian genocide" has been changed to "States that explicitly deny the Armenian Genocide." The addition of the word "explicitly" serves to strengthen the assertion of denial, framing it as a more deliberate and conscious act. This subtle shift in wording can influence the reader's perception of the legitimacy of the denial, potentially weaponizing the narrative surrounding the Armenian Genocide by emphasizing the active choice of states to deny it. The removal of "there was" also simplifies the statement, making it more direct and assertive. Overall, these changes reflect a more confrontational stance towards the denial of the genocide, aligning with narratives that seek to affirm the historical reality of the Armenian Genocide.

Chosen manipulation technique: Terminology Bi

#### 2. Cluster Topics Exploration

In [18]:
# for cluster_topics_with_techniques, for EACH topic within EACH cluster, count and print out the occurences/number of entries of each unique weaponization technique in each cluster

def count_unique_techniques_per_topic(df):
    for cluster in df['cluster'].unique():
        cluster_df = df[df['cluster'] == cluster]
        print(f"Cluster {cluster}, {len(cluster_df)} entries:")
        for topic in cluster_df['topic'].unique():
            topic_df = cluster_df[cluster_df['topic'] == topic]
            technique_counts = topic_df['weaponization_technique'].value_counts()
            print(f"  Topic: {topic}, {len(topic_df)} entries:")
            for technique, count in technique_counts.items():
                print(f"    Technique: {technique}, Count: {count}")
        print()

count_unique_techniques_per_topic(cluster_topics_with_techniques)


Cluster 1, 630 entries:
  Topic: 0, 98 entries:
    Technique: Terminology Biasing, Count: 37
    Technique: Selective Omission, Count: 31
    Technique: Selective Insertion, Count: 16
    Technique: Glorification & Vilification, Count: 10
    Technique: Source Biasing, Count: 3
    Technique: Euphemism and Doublespeak, Count: 1
  Topic: 1, 93 entries:
    Technique: Terminology Biasing, Count: 59
    Technique: Selective Omission, Count: 16
    Technique: Glorification & Vilification, Count: 7
    Technique: Selective Insertion, Count: 5
    Technique: Source Biasing, Count: 2
    Technique: Timeline Rewriting, Count: 2
    Technique: Tag Manipulation, Count: 1
    Technique: Euphemism and Doublespeak, Count: 1
  Topic: 2, 76 entries:
    Technique: Selective Omission, Count: 33
    Technique: Glorification & Vilification, Count: 23
    Technique: Terminology Biasing, Count: 11
    Technique: Selective Insertion, Count: 8
    Technique: Select Insertion, Count: 1
  Topic: 3, 64 entrie

#### 3. BERTopic exploration

In [17]:
# for general_topics_with_techniques, for EACH topic, count and print out the occurences/number of entries of each unique weaponization technique in each cluster

def count_unique_techniques_per_general_topic(df):
    for topic in df['topic'].unique():
        topic_df = df[df['topic'] == topic]
        technique_counts = topic_df['weaponization_technique'].value_counts()
        print(f"Topic: {topic}, {len(topic_df)} entries:")
        for technique, count in technique_counts.items():
            print(f"  Technique: {technique}, Count: {count}")
        print()
        
count_unique_techniques_per_general_topic(general_topics_with_techniques)


Topic: 21, 36 entries:
  Technique: Terminology Biasing, Count: 15
  Technique: Glorification & Vilification, Count: 9
  Technique: Selective Omission, Count: 7
  Technique: Selective Insertion, Count: 3
  Technique: Tag Manipulation, Count: 1
  Technique: Euphemism and Doublespeak, Count: 1

Topic: 15, 53 entries:
  Technique: Terminology Biasing, Count: 38
  Technique: Euphemism and Doublespeak, Count: 4
  Technique: Glorification & Vilification, Count: 4
  Technique: Tag Manipulation, Count: 3
  Technique: Selective Insertion, Count: 2
  Technique: Selective Omission, Count: 2

Topic: 3, 92 entries:
  Technique: Terminology Biasing, Count: 57
  Technique: Selective Omission, Count: 17
  Technique: Glorification & Vilification, Count: 7
  Technique: Selective Insertion, Count: 5
  Technique: Source Biasing, Count: 2
  Technique: Timeline Rewriting, Count: 2
  Technique: Tag Manipulation, Count: 1
  Technique: Euphemism and Doublespeak, Count: 1

Topic: 5, 88 entries:
  Technique: Sel

In [16]:
# explore all entries of any given topic in general_topics_with_techniques

def explore_general_topic_entries(df, topic_name):    
    topic_df = df[df['topic'] == topic_name]
    for index, row in topic_df.iterrows():
        print("Chosen manipulation technique:", row['weaponization_technique'])
        print("Original text:", row['original_text'])
        print()

explore_general_topic_entries(general_topics_with_techniques, 11)


Chosen manipulation technique: Glorification & Vilification
Original text: The revision includes the addition of the word "Fuck," which is a strong expletive that can convey anger, frustration, or contempt. This shift in language introduces a tone that is aggressive and dismissive, potentially undermining the seriousness of the cultural and historical context discussed in the article. The removal of the word "[[zh:亞美尼亞]]" (which translates to "Armenia" in Chinese) could also suggest an attempt to erase or diminish the representation of Armenia in a multilingual context, which aligns with narratives of cultural erasure. Overall, the changes reflect a shift towards a more hostile and dismissive tone regarding the subject matter, which can be seen as a form of weaponization of cultural heritage.

Chosen manipulation technique: Glorification & Vilification
Original text: The revision includes the addition of the phrase "FUCK YOU DIASPORA!" which is a direct and aggressive statement targeti

## PART II: ANALYSIS W/ MORE REDUCED CATEGORIES

#### 1. clusters

In [3]:
category_mapper = {
    "Terminology Biasing": "Linguistic Manipulation",
    "Euphemism and Doublespeak": "Linguistic Manipulation",
    "Selective Omission": "Factual Manipulation",
    "Selective Insertion": "Factual Manipulation",
    "Timeline Rewriting": "Factual Manipulation",
    "Source Biasing": "Factual Manipulation",
    "Citation Washing": "Factual Manipulation",
    "Citation Deletion": "Factual Manipulation",
    "Tag Manipulation": "Factual Manipulation",
    "Glorification & Vilification": "Linguistic Manipulation",
}

In [4]:
clusters_with_techniques_reduced = clusters_with_techniques.copy()
clusters_with_techniques_reduced['reduced_weaponization_technique'] = clusters_with_techniques_reduced['weaponization_technique'].map(category_mapper)

clusters_with_techniques_reduced.head()

Unnamed: 0,cluster,source,original_text,weaponization_technique,reduced_weaponization_technique
0,0,Nagorno-Karabakh_conflict_subsampled,"The revision introduces the term ""Armenian Rev...",Glorification & Vilification,Linguistic Manipulation
1,0,Armenian genocide_subsampled,"The revision introduces the term ""Dashnaks"" in...",Terminology Biasing,Linguistic Manipulation
2,0,Armenian_Revolutionary_Federation_subsampled,The removed lines contain several phrases that...,Terminology Biasing,Linguistic Manipulation
3,0,Armenian_Revolutionary_Federation_subsampled,The revision changes the description of the Ar...,Terminology Biasing,Linguistic Manipulation
4,0,Armenian_Revolutionary_Federation_subsampled,The added lines include significant historical...,Glorification & Vilification,Linguistic Manipulation


In [5]:
# save to csv
output_path = os.path.join(data_dir, "other_outputs", "weaponization_analysis", "UPDATED_clusters_with_reduced_weaponization_techniques.csv")
clusters_with_techniques_reduced.to_csv(output_path, index=False)

In [6]:
def count_unique_reduced_techniques(df):
    for cluster in df['cluster'].unique():
        cluster_df = df[df['cluster'] == cluster]
        technique_counts = cluster_df['reduced_weaponization_technique'].value_counts()
        print(f"Cluster {cluster}, {len(cluster_df)} entries:")
        for technique, count in technique_counts.items():
            print(f"  Technique: {technique}, Count: {count}")
        print()

count_unique_reduced_techniques(clusters_with_techniques_reduced)

Cluster 0, 35 entries:
  Technique: Linguistic Manipulation, Count: 24
  Technique: Factual Manipulation, Count: 11

Cluster 1, 1004 entries:
  Technique: Linguistic Manipulation, Count: 566
  Technique: Factual Manipulation, Count: 436

Cluster 2, 49 entries:
  Technique: Factual Manipulation, Count: 26
  Technique: Linguistic Manipulation, Count: 23

Cluster 3, 33 entries:
  Technique: Linguistic Manipulation, Count: 25
  Technique: Factual Manipulation, Count: 8

Cluster 4, 83 entries:
  Technique: Linguistic Manipulation, Count: 59
  Technique: Factual Manipulation, Count: 24

Cluster 5, 50 entries:
  Technique: Linguistic Manipulation, Count: 38
  Technique: Factual Manipulation, Count: 12

Cluster 6, 60 entries:
  Technique: Linguistic Manipulation, Count: 54
  Technique: Factual Manipulation, Count: 6

Cluster 7, 79 entries:
  Technique: Linguistic Manipulation, Count: 51
  Technique: Factual Manipulation, Count: 28

Cluster 8, 46 entries:
  Technique: Linguistic Manipulation, C

In [15]:
# explore all entries of any given cluster in clusters_with_techniques
def explore_cluster_entries_reduced(df, cluster_number):    
    cluster_df = df[df['cluster'] == cluster_number]
    for index, row in cluster_df.iterrows():
        print("Chosen manipulation technique:", row['reduced_weaponization_technique'])
        print("Original text:", row['original_text'])
        print()

#### 2. cluster topics

In [7]:
cluster_topics_with_techniques_reduced = cluster_topics_with_techniques.copy()
cluster_topics_with_techniques_reduced['reduced_weaponization_technique'] = cluster_topics_with_techniques_reduced['weaponization_technique'].map(category_mapper)

In [14]:
# for cluster_topics_with_techniques_reduced, for EACH topic within EACH cluster, count and print out the occurences/number of entries of each unique weaponization technique in each cluster

def count_unique_reduced_techniques_per_topic(df):
    for cluster in df['cluster'].unique():
        cluster_df = df[df['cluster'] == cluster]
        print(f"Cluster {cluster}, {len(cluster_df)} entries:")
        for topic in cluster_df['topic'].unique():
            topic_df = cluster_df[cluster_df['topic'] == topic]
            technique_counts = topic_df['reduced_weaponization_technique'].value_counts()
            print(f"  Topic: {topic}, {len(topic_df)} entries:")
            for technique, count in technique_counts.items():
                print(f"    Technique: {technique}, Count: {count}")
        print()

count_unique_reduced_techniques_per_topic(cluster_topics_with_techniques_reduced)

Cluster 1, 630 entries:
  Topic: 0, 98 entries:
    Technique: Factual Manipulation, Count: 50
    Technique: Linguistic Manipulation, Count: 48
  Topic: 1, 93 entries:
    Technique: Linguistic Manipulation, Count: 67
    Technique: Factual Manipulation, Count: 26
  Topic: 2, 76 entries:
    Technique: Factual Manipulation, Count: 41
    Technique: Linguistic Manipulation, Count: 34
  Topic: 3, 64 entries:
    Technique: Linguistic Manipulation, Count: 41
    Technique: Factual Manipulation, Count: 23
  Topic: 4, 40 entries:
    Technique: Linguistic Manipulation, Count: 36
    Technique: Factual Manipulation, Count: 4
  Topic: 5, 37 entries:
    Technique: Linguistic Manipulation, Count: 30
    Technique: Factual Manipulation, Count: 7
  Topic: 6, 31 entries:
    Technique: Linguistic Manipulation, Count: 20
    Technique: Factual Manipulation, Count: 10
  Topic: 7, 26 entries:
    Technique: Factual Manipulation, Count: 16
    Technique: Linguistic Manipulation, Count: 10
  Topic: 8

In [9]:
# save to csv
output_path = os.path.join(data_dir, "other_outputs", "weaponization_analysis", "UPDATED_cluster_topics_with_reduced_weaponization_techniques.csv")
cluster_topics_with_techniques_reduced.to_csv(output_path, index=False)

#### 3. BERTopic

In [11]:
general_topics_with_techniques_reduced = general_topics_with_techniques.copy()
general_topics_with_techniques_reduced['reduced_weaponization_technique'] = general_topics_with_techniques_reduced['weaponization_technique'].map(category_mapper)


In [12]:
# save to csv
output_path = os.path.join(data_dir, "other_outputs", "weaponization_analysis", "UPDATED_general_topics_with_reduced_weaponization_techniques.csv")
general_topics_with_techniques_reduced.to_csv(output_path, index=False)

In [13]:
# for general_topics_with_techniques, for EACH topic, count and print out the occurences/number of entries of each unique weaponization technique in each cluster

def count_unique_reduced_techniques_per_general_topic(df):
    for topic in df['topic'].unique():
        topic_df = df[df['topic'] == topic]
        technique_counts = topic_df['reduced_weaponization_technique'].value_counts()
        print(f"Topic: {topic}, {len(topic_df)} entries:")
        for technique, count in technique_counts.items():
            print(f"  Technique: {technique}, Count: {count}")
        print()

count_unique_reduced_techniques_per_general_topic(general_topics_with_techniques_reduced)

Topic: 21, 36 entries:
  Technique: Linguistic Manipulation, Count: 25
  Technique: Factual Manipulation, Count: 11

Topic: 15, 53 entries:
  Technique: Linguistic Manipulation, Count: 46
  Technique: Factual Manipulation, Count: 7

Topic: 3, 92 entries:
  Technique: Linguistic Manipulation, Count: 65
  Technique: Factual Manipulation, Count: 27

Topic: 5, 88 entries:
  Technique: Factual Manipulation, Count: 46
  Technique: Linguistic Manipulation, Count: 41

Topic: 0, 113 entries:
  Technique: Factual Manipulation, Count: 61
  Technique: Linguistic Manipulation, Count: 52

Topic: 53, 18 entries:
  Technique: Factual Manipulation, Count: 18

Topic: 44, 21 entries:
  Technique: Linguistic Manipulation, Count: 13
  Technique: Factual Manipulation, Count: 8

Topic: 47, 20 entries:
  Technique: Linguistic Manipulation, Count: 18
  Technique: Factual Manipulation, Count: 2

Topic: 45, 21 entries:
  Technique: Factual Manipulation, Count: 15
  Technique: Linguistic Manipulation, Count: 6

T

## PART III. Name the clusters

In [3]:
from openai import OpenAI
from utils import *

# subsample 15 entries (or if the number of entries is less than 15, simply the number of entries) from each cluster within clusters_with_techniques
# and concatenate the original_text of these subsampled entries to feed into OpenAI LLM to "name" the cluster based on these entries
def subsample_entries_per_cluster(df, sample_size=15):
    cluster_summaries = {}
    for cluster in df['cluster'].unique():
        cluster_df = df[df['cluster'] == cluster]
        sampled_df = cluster_df.sample(n=min(sample_size, len(cluster_df)), random_state=42)
        concatenated_text = "\n\n".join(sampled_df['original_text'].tolist())
        cluster_summaries[cluster] = name_topic_with_llm(concatenated_text)
    return cluster_summaries


client = OpenAI(api_key=OPENAI_API_KEY)

def name_topic_with_llm(keywords, model="gpt-5-mini"):
    """
    keywords: list or string of n-grams (top words defining the topic)
    model: choose any OpenAI model, default is fast & cheap
    
    Returns: short human-readable topic name.
    """
    if isinstance(keywords, list):
        keywords = ", ".join(keywords)

    prompt = f"""
    You are an expert in political narratives, conflict studies, and cultural heritage.
    Based only on the following analysis texts of 15 revisions from several wikipedia articles grouped together as a topic/theme, generate a SHORT descriptive name 
    (max 8-10 words) that summarizes the theme (subject matter and the characterstics of the revisions related to it, etc).

    Analyses: {keywords}

    Answer ONLY with the name.
    """

    response = client.responses.create(
        model=model,
        input=prompt
    )

    return response.output_text.strip()

cluster_summaries = subsample_entries_per_cluster(clusters_with_techniques)
for cluster, summary in cluster_summaries.items():
    print(f"Cluster {cluster} Summary:\n{summary}\n{'-'*80}\n")


Cluster 0 Summary:
Partisan edits weaponizing ARF history and legitimacy
--------------------------------------------------------------------------------

Cluster 1 Summary:
Weaponization of Armenian Genocide narratives in Wikipedia edits
--------------------------------------------------------------------------------

Cluster 2 Summary:
Weaponizing Armenian cultural heritage narratives in Armenia–Azerbaijan edits
--------------------------------------------------------------------------------

Cluster 3 Summary:
Wikipedia Revisionism: Hate Speech, Ethnic Blame, Historical Erasure
--------------------------------------------------------------------------------

Cluster 4 Summary:
Weaponized Wikipedia edits over dolma: cultural erasure and appropriation
--------------------------------------------------------------------------------

Cluster 5 Summary:
Contested Naming and Cultural Erasure of Shusha/Shushi
--------------------------------------------------------------------------------


In [4]:
# read existing keywords csv
keywords_path = os.path.join(data_dir, "keywords", "cluster_keywords_named.csv")
keywords_df = pd.read_csv(keywords_path)

# replace the 'Cluster_Name' column with the newly generated cluster_summaries
keywords_df['Cluster_Name'] = keywords_df['Cluster'].map(cluster_summaries)

# save updated keywords csv
output_keywords_path = os.path.join(data_dir, "keywords", "cluster_keywords_named_text_directly.csv")
keywords_df.to_csv(output_keywords_path, index=False)

In [None]:
# subsample 15 entries (or if the number of entries is less than 15, simply the number of entries) from each cluster-topic combo within cluster_topics_with_techniques
# and concatenate the original_text of these subsampled entries to feed into OpenAI LLM to "name" the cluster topic based on these entries
def subsample_entries_per_cluster_topic(df, sample_size=15):
    cluster_summaries = {}
    for cluster in df['cluster'].unique():
        cluster_df = df[df['cluster'] == cluster]
        for topic in cluster_df['topic'].unique():
            topic_df = cluster_df[cluster_df['topic'] == topic]
            sampled_df = topic_df.sample(n=min(sample_size, len(topic_df)), random_state=42)
            concatenated_text = "\n\n".join(sampled_df['original_text'].tolist())
            cluster_topic_key = f"{cluster}__{topic}"
            cluster_summaries[cluster_topic_key] = name_topic_with_llm(concatenated_text)
    return cluster_summaries

cluster_topic_summaries = subsample_entries_per_cluster_topic(cluster_topics_with_techniques)
for cluster_topic, summary in cluster_topic_summaries.items():
    print(f"Cluster-Topic {cluster_topic} Summary:\n{summary}\n{'-'*80}\n") 

# read existing keywords csv
keywords_path = os.path.join(data_dir, "keywords", "cluster_topics_named.csv")
keywords_df = pd.read_csv(keywords_path)



Cluster-Topic 1__0 Summary:
Contested Armenian Genocide narratives and editorial weaponization
--------------------------------------------------------------------------------

Cluster-Topic 1__1 Summary:
Contested Wikipedia edits on Armenian Genocide terminology and sourcing
--------------------------------------------------------------------------------

Cluster-Topic 1__2 Summary:
Contested Armenian Genocide narratives and weaponized Wikipedia edits
--------------------------------------------------------------------------------

Cluster-Topic 1__3 Summary:
Armenian Genocide recognition narratives weaponized via Wikipedia edits
--------------------------------------------------------------------------------

Cluster-Topic 1__4 Summary:
Politicized Wikipedia edits reshaping Armenian Genocide narrative
--------------------------------------------------------------------------------

Cluster-Topic 1__5 Summary:
Denialist Framing and Language Shifts on Armenian Genocide
----------------

In [11]:
# replace the 'Cluster_Topic_Name' column with the newly generated cluster_topic_summaries
def get_cluster_topic_key(row):
    return f"{row['Cluster']}__{row['Topic']}"
keywords_df['Cluster_Topic_Key'] = keywords_df.apply(get_cluster_topic_key, axis=1)
keywords_df['Cluster_Topic_Name'] = keywords_df['Cluster_Topic_Key'].map(cluster_topic_summaries)
keywords_df.drop(columns=['Cluster_Topic_Key'], inplace=True)
# save updated keywords csv
output_keywords_path = os.path.join(data_dir, "keywords", "cluster_topics_named_text_directly.csv")
keywords_df.to_csv(output_keywords_path, index=False)

In [13]:
# subsample 15 entries (or if the number of entries is less than 15, simply the number of entries) from each topic within general_topics_with_techniques
# and concatenate the original_text of these subsampled entries to feed into OpenAI LLM to "name" the topic based on these entries

def subsample_entries_per_general_topic(df, sample_size=15):
    topic_summaries = {}
    for topic in df['topic'].unique():
        topic_df = df[df['topic'] == topic]
        sampled_df = topic_df.sample(n=min(sample_size, len(topic_df)), random_state=42)
        concatenated_text = "\n\n".join(sampled_df['original_text'].tolist())
        topic_summaries[topic] = name_topic_with_llm(concatenated_text)
    return topic_summaries

general_topic_summaries = subsample_entries_per_general_topic(general_topics_with_techniques)
for topic, summary in general_topic_summaries.items():
    print(f"General Topic {topic} Summary:\n{summary}\n{'-'*80}\n") 


General Topic 21 Summary:
Weaponized Wikipedia edits about ARF and Armenian heritage
--------------------------------------------------------------------------------

General Topic 15 Summary:
Weaponized Language and Denial in Armenian Genocide Edits
--------------------------------------------------------------------------------

General Topic 3 Summary:
Contested Wikipedia narratives on the Armenian Genocide
--------------------------------------------------------------------------------

General Topic 5 Summary:
Contested Wikipedia edits: Armenian Genocide framing and denial
--------------------------------------------------------------------------------

General Topic 0 Summary:
Weaponized Wikipedia edits denying Armenian Genocide
--------------------------------------------------------------------------------

General Topic 53 Summary:
Tag edits weaponizing Armenian Genocide narrative on Wikipedia
--------------------------------------------------------------------------------

Ge

In [14]:
# read existing keywords csv
keywords_path = os.path.join(data_dir, "keywords", "general_topics_named.csv")
keywords_df = pd.read_csv(keywords_path)
# replace the 'Topic_Name' column with the newly generated general_topic_summaries
keywords_df['Topic_Name'] = keywords_df['Topic'].map(general_topic_summaries)
# save updated keywords csv
output_keywords_path = os.path.join(data_dir, "keywords", "general_topics_named_text_directly.csv")
keywords_df.to_csv(output_keywords_path, index=False)