In [None]:
import pandas as pd
import os

data_dir = ".."

clusters_path = os.path.join(data_dir, "data", "weaponization_analysis", "clusters_with_weaponization_techniques.csv")
cluster_topics_path = os.path.join(data_dir, "data", "weaponization_analysis", "cluster_topics_with_weaponization_techniques.csv")
general_topics_path = os.path.join(data_dir, "data", "weaponization_analysis", "general_topics_with_weaponization_techniques.csv")

clusters_with_techniques = pd.read_csv(clusters_path)
cluster_topics_with_techniques = pd.read_csv(cluster_topics_path)
general_topics_with_techniques = pd.read_csv(general_topics_path)


## PART I: ANALYSIS W/ MORE FINEGRAINED CATEGORIES

#### 1. Clusters Exploration

In [12]:
# for clusters_with_techniques, for EACH cluster, count and print out the occurences/number of entries of each unique weaponization technique in each cluster
def count_unique_techniques(df):
    for cluster in df['cluster'].unique():
        cluster_df = df[df['cluster'] == cluster]
        technique_counts = cluster_df['weaponization_technique'].value_counts()
        print(f"Cluster {cluster}, {len(cluster_df)} entries:")
        for technique, count in technique_counts.items():
            print(f"  Technique: {technique}, Count: {count}")
        print()

count_unique_techniques(clusters_with_techniques)


Cluster 0, 35 entries:
  Technique: Terminology Manipulation, Count: 19
  Technique: Selective Omission, Count: 8
  Technique: Selective Insertion, Count: 4
  Technique: Villification, Count: 2
  Technique: Glorification, Count: 2

Cluster 1, 1004 entries:
  Technique: Terminology Manipulation, Count: 413
  Technique: Selective Omission, Count: 340
  Technique: Selective Insertion, Count: 163
  Technique: Glorification, Count: 42
  Technique: Villification, Count: 14
  Technique: Image and Media Manipulation, Count: 8
  Technique: Timeline Rewriting, Count: 8
  Technique: Euphemism and Doublespeak, Count: 6
  Technique: Article Structure Manipulation, Count: 4
  Technique: Source Biasing, Count: 3
  Technique: Citation Deletion, Count: 2
  Technique: Selectve Insertion, Count: 1

Cluster 2, 49 entries:
  Technique: Selective Omission, Count: 22
  Technique: Selective Insertion, Count: 14
  Technique: Terminology Manipulation, Count: 6
  Technique: Villification, Count: 4
  Technique: S

In [13]:
# explore any given cluster in clusters_with_techniques
def explore_cluster(df, cluster_number):    
    cluster_df = df[df['cluster'] == cluster_number]
    print(f"Cluster {cluster_number}, {len(cluster_df)} entries:")
    technique_counts = cluster_df['weaponization_technique'].value_counts()
    for technique, count in technique_counts.items():
        print(f"  Technique: {technique}, Count: {count}")
    print()

explore_cluster(clusters_with_techniques, 0)

Cluster 0, 35 entries:
  Technique: Terminology Manipulation, Count: 19
  Technique: Selective Omission, Count: 8
  Technique: Selective Insertion, Count: 4
  Technique: Villification, Count: 2
  Technique: Glorification, Count: 2



In [None]:
# explore all entries of any given cluster in clusters_with_techniques
def explore_cluster_entries(df, cluster_number):    
    cluster_df = df[df['cluster'] == cluster_number]
    for index, row in cluster_df.iterrows():
        print("Chosen manipulation technique:", row['weaponization_technique'])
        print("Original text:", row['original_text'])
        print()

explore_cluster_entries(clusters_with_techniques, 0)

Chosen manipulation technique: Terminology Manipulation
Original text: The revision introduces the term "Armenian Revolutionary Federation (Dashnaks)" which is a specific political and cultural organization associated with the Armenian community. This addition emphasizes the organized nature of the Armenian diaspora's involvement in the conflict, framing it in a way that highlights their agency and historical significance. The phrase "the contribution of the volunteer-fighters from Diaspora into the military victory of the Artsakh struggle is invaluable" further underscores the valorization of the Armenian diaspora's role, which can be seen as a strategic narrative to bolster claims of legitimacy and historical rights over the contested region. The removal of the term "diasporan" in favor of "diaspora" also reflects a shift towards a more unified and collective identity, which can be leveraged in discussions about cultural heritage and rights. Overall, these changes reflect a deliberat

#### 2. Cluster Topics Exploration

In [22]:
# for cluster_topics_with_techniques, for EACH topic within EACH cluster, count and print out the occurences/number of entries of each unique weaponization technique in each cluster

def count_unique_techniques_per_topic(df):
    for cluster in df['cluster'].unique():
        cluster_df = df[df['cluster'] == cluster]
        print(f"Cluster {cluster}, {len(cluster_df)} entries:")
        for topic in cluster_df['topic'].unique():
            topic_df = cluster_df[cluster_df['topic'] == topic]
            technique_counts = topic_df['weaponization_technique'].value_counts()
            print(f"  Topic: {topic}, {len(topic_df)} entries:")
            for technique, count in technique_counts.items():
                print(f"    Technique: {technique}, Count: {count}")
        print()

count_unique_techniques_per_topic(cluster_topics_with_techniques)


Cluster 1, 630 entries:
  Topic: 0, 98 entries:
    Technique: Selective Omission, Count: 40
    Technique: Terminology Manipulation, Count: 27
    Technique: Selective Insertion, Count: 23
    Technique: Glorification, Count: 3
    Technique: Villification, Count: 2
    Technique: Article Structure Manipulation, Count: 1
    Technique: Source Biasing, Count: 1
    Technique: Euphemism and Doublespeak, Count: 1
  Topic: 1, 93 entries:
    Technique: Terminology Manipulation, Count: 54
    Technique: Selective Omission, Count: 22
    Technique: Selective Insertion, Count: 11
    Technique: Glorification, Count: 3
    Technique: Timeline Rewriting, Count: 2
    Technique: Image and Media Manipulation, Count: 1
  Topic: 2, 76 entries:
    Technique: Selective Omission, Count: 37
    Technique: Terminology Manipulation, Count: 14
    Technique: Selective Insertion, Count: 12
    Technique: Glorification, Count: 10
    Technique: Villification, Count: 3
  Topic: 3, 64 entries:
    Technique

#### 3. BERTopic exploration

In [23]:
# for general_topics_with_techniques, for EACH topic, count and print out the occurences/number of entries of each unique weaponization technique in each cluster

def count_unique_techniques_per_general_topic(df):
    for topic in df['topic'].unique():
        topic_df = df[df['topic'] == topic]
        technique_counts = topic_df['weaponization_technique'].value_counts()
        print(f"Topic: {topic}, {len(topic_df)} entries:")
        for technique, count in technique_counts.items():
            print(f"  Technique: {technique}, Count: {count}")
        print()
        
count_unique_techniques_per_general_topic(general_topics_with_techniques)


Topic: 21, 36 entries:
  Technique: Terminology Manipulation, Count: 20
  Technique: Selective Omission, Count: 8
  Technique: Selective Insertion, Count: 4
  Technique: Villification, Count: 2
  Technique: Glorification, Count: 2

Topic: 15, 53 entries:
  Technique: Terminology Manipulation, Count: 42
  Technique: Selective Omission, Count: 5
  Technique: Villification, Count: 3
  Technique: Selective Insertion, Count: 2
  Technique: Euphemism and Doublespeak, Count: 1

Topic: 3, 92 entries:
  Technique: Terminology Manipulation, Count: 52
  Technique: Selective Omission, Count: 23
  Technique: Selective Insertion, Count: 11
  Technique: Glorification, Count: 3
  Technique: Timeline Rewriting, Count: 2
  Technique: Image and Media Manipulation, Count: 1

Topic: 5, 88 entries:
  Technique: Selective Omission, Count: 43
  Technique: Terminology Manipulation, Count: 20
  Technique: Selective Insertion, Count: 11
  Technique: Glorification, Count: 9
  Technique: Villification, Count: 5

T

In [None]:
# explore all entries of any given topic in general_topics_with_techniques

def explore_general_topic_entries(df, topic_name):    
    topic_df = df[df['topic'] == topic_name]
    for index, row in topic_df.iterrows():
        print("Chosen manipulation technique:", row['weaponization_technique'])
        print("Original text:", row['original_text'])
        print()

explore_general_topic_entries(general_topics_with_techniques, 11)


Chosen manipulation technique: Terminology Manipulation
Original text: The revision includes the addition of the word "Fuck," which is a strong expletive that can convey anger, frustration, or contempt. This shift in language introduces a tone that is aggressive and dismissive, potentially undermining the seriousness of the cultural and historical context discussed in the article. The removal of the word "[[zh:亞美尼亞]]" (which translates to "Armenia" in Chinese) could also suggest an attempt to erase or diminish the representation of Armenia in a multilingual context, which aligns with narratives of cultural erasure. Overall, the changes reflect a shift towards a more hostile and dismissive tone regarding the subject matter, which can be seen as a form of weaponization of cultural heritage.

Chosen manipulation technique: Villification
Original text: The revision includes the addition of the phrase "FUCK YOU DIASPORA!" which is a direct and aggressive statement targeting the Armenian dia

## PART II: ANALYSIS W/ MORE REDUCED CATEGORIES

#### 1. clusters

In [None]:



category_mapper = {
    "Terminology Manipulation": "Terminology Manipulation",
    "Euphemism and Doublespeak": "Terminology Manipulation",
    "Selective Omission": "Terminology Manipulation",
    "Selective Insertion": "Terminology Manipulation",
    "Article Structure Manipulation": "Article Structure Manipulation",
    "Timeline Rewriting": "Article Structure Manipulation",
    "Source Biasing": "Media Manipulation",
    "Citation Washing": "Media Manipulation",
    "Citation Deletion": "Media Manipulation",
    "Image and Media Manipulation": "Media Manipulation",
    "Glorification": "Appeal to Emotion",
    "Villification": "Appeal to Emotion"
}

In [None]:
clusters_with_techniques_reduced = clusters_with_techniques.copy()
clusters_with_techniques_reduced['reduced_weaponization_technique'] = clusters_with_techniques_reduced['weaponization_technique'].map(category_mapper)

clusters_with_techniques_reduced.head()

Unnamed: 0,cluster,source,original_text,weaponization_technique,reduced_weaponization_technique
0,0,Nagorno-Karabakh_conflict_subsampled,"The revision introduces the term ""Armenian Rev...",Terminology Manipulation,Terminology Manipulation
1,0,Armenian genocide_subsampled,"The revision introduces the term ""Dashnaks"" in...",Terminology Manipulation,Terminology Manipulation
2,0,Armenian_Revolutionary_Federation_subsampled,The removed lines contain several phrases that...,Terminology Manipulation,Terminology Manipulation
3,0,Armenian_Revolutionary_Federation_subsampled,The revision changes the description of the Ar...,Terminology Manipulation,Terminology Manipulation
4,0,Armenian_Revolutionary_Federation_subsampled,The added lines include significant historical...,Selective Insertion,Terminology Manipulation


In [12]:
# save to csv
output_path = os.path.join(data_dir, "other_outputs", "weaponization_analysis", "clusters_with_reduced_weaponization_techniques.csv")
clusters_with_techniques_reduced.to_csv(output_path, index=False)

In [7]:
def count_unique_reduced_techniques(df):
    for cluster in df['cluster'].unique():
        cluster_df = df[df['cluster'] == cluster]
        technique_counts = cluster_df['reduced_weaponization_technique'].value_counts()
        print(f"Cluster {cluster}, {len(cluster_df)} entries:")
        for technique, count in technique_counts.items():
            print(f"  Technique: {technique}, Count: {count}")
        print()

count_unique_reduced_techniques(clusters_with_techniques_reduced)

Cluster 0, 35 entries:
  Technique: Terminology Manipulation, Count: 31
  Technique: Appeal to Emotion, Count: 4

Cluster 1, 1004 entries:
  Technique: Terminology Manipulation, Count: 922
  Technique: Appeal to Emotion, Count: 56
  Technique: Media Manipulation, Count: 13
  Technique: Article Structure Manipulation, Count: 12

Cluster 2, 49 entries:
  Technique: Terminology Manipulation, Count: 42
  Technique: Appeal to Emotion, Count: 5
  Technique: Article Structure Manipulation, Count: 1

Cluster 3, 33 entries:
  Technique: Terminology Manipulation, Count: 16
  Technique: Appeal to Emotion, Count: 16
  Technique: Article Structure Manipulation, Count: 1

Cluster 4, 83 entries:
  Technique: Terminology Manipulation, Count: 82
  Technique: Article Structure Manipulation, Count: 1

Cluster 5, 50 entries:
  Technique: Terminology Manipulation, Count: 50

Cluster 6, 60 entries:
  Technique: Terminology Manipulation, Count: 33
  Technique: Appeal to Emotion, Count: 26
  Technique: Articl

In [None]:
# explore all entries of any given cluster in clusters_with_techniques
def explore_cluster_entries_reduced(df, cluster_number):    
    cluster_df = df[df['cluster'] == cluster_number]
    for index, row in cluster_df.iterrows():
        print("Chosen manipulation technique:", row['reduced_weaponization_technique'])
        print("Original text:", row['original_text'])
        print()

#### 2. cluster topics

In [9]:
cluster_topics_with_techniques_reduced = cluster_topics_with_techniques.copy()
cluster_topics_with_techniques_reduced['reduced_weaponization_technique'] = cluster_topics_with_techniques_reduced['weaponization_technique'].map(category_mapper)

In [13]:
# save to csv
output_path = os.path.join(data_dir, "other_outputs", "weaponization_analysis", "cluster_topics_with_reduced_weaponization_techniques.csv")
clusters_with_techniques_reduced.to_csv(output_path, index=False)

In [10]:
# for cluster_topics_with_techniques_reduced, for EACH topic within EACH cluster, count and print out the occurences/number of entries of each unique weaponization technique in each cluster

def count_unique_reduced_techniques_per_topic(df):
    for cluster in df['cluster'].unique():
        cluster_df = df[df['cluster'] == cluster]
        print(f"Cluster {cluster}, {len(cluster_df)} entries:")
        for topic in cluster_df['topic'].unique():
            topic_df = cluster_df[cluster_df['topic'] == topic]
            technique_counts = topic_df['reduced_weaponization_technique'].value_counts()
            print(f"  Topic: {topic}, {len(topic_df)} entries:")
            for technique, count in technique_counts.items():
                print(f"    Technique: {technique}, Count: {count}")
        print()

count_unique_reduced_techniques_per_topic(cluster_topics_with_techniques_reduced)

Cluster 1, 630 entries:
  Topic: 0, 98 entries:
    Technique: Terminology Manipulation, Count: 91
    Technique: Appeal to Emotion, Count: 5
    Technique: Article Structure Manipulation, Count: 1
    Technique: Media Manipulation, Count: 1
  Topic: 1, 93 entries:
    Technique: Terminology Manipulation, Count: 87
    Technique: Appeal to Emotion, Count: 3
    Technique: Article Structure Manipulation, Count: 2
    Technique: Media Manipulation, Count: 1
  Topic: 2, 76 entries:
    Technique: Terminology Manipulation, Count: 63
    Technique: Appeal to Emotion, Count: 13
  Topic: 3, 64 entries:
    Technique: Terminology Manipulation, Count: 57
    Technique: Media Manipulation, Count: 3
    Technique: Appeal to Emotion, Count: 3
    Technique: Article Structure Manipulation, Count: 1
  Topic: 4, 40 entries:
    Technique: Terminology Manipulation, Count: 35
    Technique: Article Structure Manipulation, Count: 3
    Technique: Appeal to Emotion, Count: 2
  Topic: 5, 37 entries:
    T

#### 3. BERTopic

In [14]:
general_topics_with_techniques_reduced = general_topics_with_techniques.copy()
general_topics_with_techniques_reduced['reduced_weaponization_technique'] = general_topics_with_techniques_reduced['weaponization_technique'].map(category_mapper)


In [16]:
# save to csv
output_path = os.path.join(data_dir, "other_outputs", "weaponization_analysis", "general_topics_with_reduced_weaponization_techniques.csv")
general_topics_with_techniques_reduced.to_csv(output_path, index=False)

In [17]:
# for general_topics_with_techniques, for EACH topic, count and print out the occurences/number of entries of each unique weaponization technique in each cluster

def count_unique_reduced_techniques_per_general_topic(df):
    for topic in df['topic'].unique():
        topic_df = df[df['topic'] == topic]
        technique_counts = topic_df['reduced_weaponization_technique'].value_counts()
        print(f"Topic: {topic}, {len(topic_df)} entries:")
        for technique, count in technique_counts.items():
            print(f"  Technique: {technique}, Count: {count}")
        print()

count_unique_reduced_techniques_per_general_topic(general_topics_with_techniques_reduced)

Topic: 21, 36 entries:
  Technique: Terminology Manipulation, Count: 32
  Technique: Appeal to Emotion, Count: 4

Topic: 15, 53 entries:
  Technique: Terminology Manipulation, Count: 50
  Technique: Appeal to Emotion, Count: 3

Topic: 3, 92 entries:
  Technique: Terminology Manipulation, Count: 86
  Technique: Appeal to Emotion, Count: 3
  Technique: Article Structure Manipulation, Count: 2
  Technique: Media Manipulation, Count: 1

Topic: 5, 88 entries:
  Technique: Terminology Manipulation, Count: 74
  Technique: Appeal to Emotion, Count: 14

Topic: 0, 113 entries:
  Technique: Terminology Manipulation, Count: 107
  Technique: Appeal to Emotion, Count: 5
  Technique: Article Structure Manipulation, Count: 1

Topic: 53, 18 entries:
  Technique: Terminology Manipulation, Count: 17
  Technique: Article Structure Manipulation, Count: 1

Topic: 44, 21 entries:
  Technique: Terminology Manipulation, Count: 19
  Technique: Media Manipulation, Count: 1
  Technique: Appeal to Emotion, Count: 