In [13]:
from dataclasses import dataclass, field  # Import field
import csv
import os
from gensim.test.utils import common_texts
from gensim.models import Word2Vec
import random
import pandas as pd


CONCATENATE ALL FILES IN PARSED DATA FOLDER

In [14]:
@dataclass
class Influencer:
    name: str
    affiliation: str
    no_followers: int = 0
    before_corpus: list = field(default_factory=list) 
    before_likes: list = field(default_factory=list)
    before_retweets: list = field(default_factory=list)
    after_corpus: list = field(default_factory=list)
    after_likes: list = field(default_factory=list)
    after_retweets: list = field(default_factory=list)

data = []
account_list = []

#Loading File paths
supplementary_folder = "Supplementary Materials"
influencers_path = os.path.join(supplementary_folder, "Followers List & Categories - Accounts Kept.csv")

#Population the data file with initial data of the available influencers
with open(influencers_path, newline='') as csvfile:
    reader = csv.reader(csvfile)
    next(reader) #skip header
    for line in reader:
        name = line[0]
        account_list.append(name[1:]) #Creating a list of influencers account names

        affiliation = line[1]
        if affiliation == " Libertarian Party":
            affiliation = affiliation[1:]

        followers = line[2]
        if affiliation or followers:
            data.append(Influencer(name[1:], affiliation, followers))

parsed_before_folder = os.path.join("Parsed Data", "Before")
parsed_after_folder = os.path.join("Parsed Data", "After")

before_files = [f for f in os.listdir(parsed_before_folder) if f.endswith('.csv')] #Getting all before files
after_files = [f for f in os.listdir(parsed_after_folder) if f.endswith('.csv')] #Getting all after files

def getting_values(files: list, path: str, after: bool):
    for f in files:
        with open(f"{path}/{f}", 'r') as csvfile:
            reader = csv.reader(csvfile)
            next(reader) #skip header
            
            for line in reader:
                if not line:  # Skip empty lines
                    continue
                if line[0] == "|RUN STATISTICS|": # End of file, move on to next file
                    break
            
                name = line[0].strip() if line else ""
                tweet = line[2].strip()
                date = line[1]
                like = int(line[3].strip())
                retweet = int(line[4].strip())

                for i in data:
                    if name == i.name:
                        if after and (tweet not in c for c in i.after_corpus):
                                i.after_corpus.append(tweet)
                                i.after_likes.append(like)
                                i.after_retweets.append(retweet)
                        else:
                            if (tweet not in c for c in i.before_corpus):
                                i.before_corpus.append(tweet)
                                i.before_likes.append(like)
                                i.before_retweets.append(retweet)

getting_values(before_files, parsed_before_folder, False)
getting_values(after_files, parsed_after_folder, True)


# Check results for each influencer
for i in data:
    print(f"\nInfluencer: {i.name}")
    print(f"Before corpus size: {len(i.before_corpus)}")
    # print(f"Before likes list: {(i.before_likes)}")
    # print(f"Before retweets list: {(i.before_retweets)}")
    print(f"After corpus size: {len(i.after_corpus)}")
    # print(f"After likes list: {(i.after_likes)}")
    # print(f"After retweets list: {(i.after_retweets)}")



Influencer: SabbySabs2
Before corpus size: 4
After corpus size: 26

Influencer: MsLaToshaBrown
Before corpus size: 0
After corpus size: 2

Influencer: RonFilipkowski
Before corpus size: 12
After corpus size: 24

Influencer: KyleKulinski
Before corpus size: 0
After corpus size: 17

Influencer: funder
Before corpus size: 0
After corpus size: 6

Influencer: mmpadellan
Before corpus size: 4
After corpus size: 15

Influencer: krystalball
Before corpus size: 7
After corpus size: 14

Influencer: SteveSchmidtSES
Before corpus size: 6
After corpus size: 15

Influencer: robreiner
Before corpus size: 1
After corpus size: 0

Influencer: marceelias
Before corpus size: 0
After corpus size: 0

Influencer: TheRickWilson
Before corpus size: 5
After corpus size: 11

Influencer: davidsirota
Before corpus size: 10
After corpus size: 10

Influencer: TristanSnell
Before corpus size: 6
After corpus size: 16

Influencer: KyleClark
Before corpus size: 1
After corpus size: 4

Influencer: PatrickSvitek
Before c

In [15]:
with open("Pre-processing.csv", 'w', newline='', encoding='utf-8') as csv_file:
        writer = csv.writer(csv_file)
        writer.writerow(['Name','Affiliation', 'Number of Followers', "Before Corpus", "Before List of Likes", "Before List of Retweets", "After Corpus", "After List of Likes", "After List of Retweets"])
    
        for i in data:
            if i.before_corpus and i.after_corpus:
                writer.writerow([i.name, i.affiliation, i.no_followers, i.before_corpus, i.before_likes, i.before_retweets, i.after_corpus, i.after_likes, i.after_retweets])

SUBJECT ANONYMIZATION

In [16]:
df = pd.read_csv("Pre-processing.csv")

random_ids = random.sample(range(10, 99), 41)

df["Subject ID"] = random_ids

df.head()


Unnamed: 0,Name,Affiliation,Number of Followers,Before Corpus,Before List of Likes,Before List of Retweets,After Corpus,After List of Likes,After List of Retweets,Subject ID
0,SabbySabs2,Democratic Party,75700,"['Every resident in East Palestine, OH should ...","[934, 230, 259, 934]","[343, 91, 104, 343]",['This is what Ive been saying for months. Its...,"[694, 1275, 201, 638, 2264, 256, 366, 579, 271...","[223, 429, 14, 166, 681, 110, 24, 255, 161, 19...",97
1,RonFilipkowski,Democratic Party,1000000,['Dershowitz said Trump asked him at dinner wh...,"[1337, 7409, 1012, 25397, 7377, 1331, 1578, 25...","[303, 1887, 208, 6635, 1882, 305, 420, 6617, 1...","['Trump, 4 days after the Hamas attack on Isra...","[813, 1520, 2370, 1244, 1440, 2369, 34866, 305...","[317, 363, 703, 229, 476, 702, 18715, 597, 363...",92
2,mmpadellan,Democratic Party,1300000,['Even Fox News knows that the deregulation by...,"[1069, 24343, 1078, 24557]","[314, 16978, 315, 17047]",['While Democrats push for ceasefire and human...,"[3900, 892, 5003, 203, 29181, 3901, 5852, 1588...","[2146, 175, 1190, 73, 7332, 2149, 612, 2544, 1...",52
3,krystalball,Democratic Party,587700,['CNN had a literal lobbyist for Norfolk South...,"[4215, 1991, 14437, 358, 4236, 2000, 360]","[1211, 457, 3108, 61, 1220, 459, 62]","['“With Gods help, children in Gaza will die t...","[907, 1753, 10846, 238, 3408, 1704, 646, 8826,...","[482, 670, 3168, 46, 591, 520, 317, 2242, 317,...",87
4,SteveSchmidtSES,Democratic Party,1500000,['A direct threat against American Jews by a d...,"[1223, 1420, 1420, 340, 1238, 893]","[251, 459, 459, 117, 254, 259]",['More than 100 people participated in a flash...,"[742, 798, 887, 4262, 209, 888, 4259, 798, 742...","[119, 190, 256, 1588, 33, 255, 1586, 190, 119,...",62


In [19]:
df_nametoID = df[["Name", "Subject ID"]]
df_nametoID.to_csv('Ledger.csv', index=False) 

df_noName = df[["Subject ID", "Affiliation", "Number of Followers", "Before Corpus", "Before List of Likes", "Before List of Retweets", "After Corpus", "After List of Likes", "After List of Retweets"]]
df_noName.to_csv("DF_Cleaned", index=False)

CONTRAST CODING

In [20]:
df_noName["Contrast"] = 0.0

affiliation_dict = {
    "Republican Party": -0.5,
    "Democratic Party": 0.5,
    "Other": 0.0
}

df_noName["Contrast"] = df_noName["Affiliation"].map(affiliation_dict)

df_noName.head()

Unnamed: 0,Subject ID,Affiliation,Number of Followers,Before Corpus,Before List of Likes,Before List of Retweets,After Corpus,After List of Likes,After List of Retweets,Contrast
0,97,Democratic Party,75700,"['Every resident in East Palestine, OH should ...","[934, 230, 259, 934]","[343, 91, 104, 343]",['This is what Ive been saying for months. Its...,"[694, 1275, 201, 638, 2264, 256, 366, 579, 271...","[223, 429, 14, 166, 681, 110, 24, 255, 161, 19...",0.5
1,92,Democratic Party,1000000,['Dershowitz said Trump asked him at dinner wh...,"[1337, 7409, 1012, 25397, 7377, 1331, 1578, 25...","[303, 1887, 208, 6635, 1882, 305, 420, 6617, 1...","['Trump, 4 days after the Hamas attack on Isra...","[813, 1520, 2370, 1244, 1440, 2369, 34866, 305...","[317, 363, 703, 229, 476, 702, 18715, 597, 363...",0.5
2,52,Democratic Party,1300000,['Even Fox News knows that the deregulation by...,"[1069, 24343, 1078, 24557]","[314, 16978, 315, 17047]",['While Democrats push for ceasefire and human...,"[3900, 892, 5003, 203, 29181, 3901, 5852, 1588...","[2146, 175, 1190, 73, 7332, 2149, 612, 2544, 1...",0.5
3,87,Democratic Party,587700,['CNN had a literal lobbyist for Norfolk South...,"[4215, 1991, 14437, 358, 4236, 2000, 360]","[1211, 457, 3108, 61, 1220, 459, 62]","['“With Gods help, children in Gaza will die t...","[907, 1753, 10846, 238, 3408, 1704, 646, 8826,...","[482, 670, 3168, 46, 591, 520, 317, 2242, 317,...",0.5
4,62,Democratic Party,1500000,['A direct threat against American Jews by a d...,"[1223, 1420, 1420, 340, 1238, 893]","[251, 459, 459, 117, 254, 259]",['More than 100 people participated in a flash...,"[742, 798, 887, 4262, 209, 888, 4259, 798, 742...","[119, 190, 256, 1588, 33, 255, 1586, 190, 119,...",0.5
