In [1]:
import json
import csv
import re
import random
import ast
from ast import literal_eval
from itertools import combinations
from collections import OrderedDict
from gensim.models import KeyedVectors

import pandas as pd
import numpy as np

from scipy import spatial
from math import sqrt

"""
Test: Take out 20 randomly (same length in each bucket), 
"""

'\nTest: Take out 20 randomly (same length in each bucket), \n'

# WEAT

## Define functions

In [2]:
def weighted_std(values, weights):
    # values = numpy ndarray with the same shape as weights
    # weights = numpy ndarray with the same shape as values
    average = np.average(values, weights=weights)
    variance = np.average((values-average)**2, weights=weights)
    # Small sample size bias correction:
    variance_ddof1 = variance*len(values)/(len(values)-1)
    return sqrt(variance_ddof1)

def within_group_cohesion(X):
    # X = 2D numpy array of vectors for all words belonging to group X
    dist = spatial.distance.pdist(X, 'cosine')
    return dist.mean()

def sim(x, A, B):
    # x = ndarray for each word x in category X
    # A = ndarray for words in attribute A
    # B = ndarray for words in attribute B
    x = np.array(x)
    x_ = x.reshape(1, -1)
    A = np.array(A)
    B = np.array(B)
    # print(f"Shape of x: {x.shape}")
    # print(f"Shape of A: {A.shape}")
    # print(f"Shape of B: {B.shape}")
    results_A = spatial.distance.cdist(x_, A, 'cosine')
    sum_A = (1 - results_A).sum()
    results_B = spatial.distance.cdist(x_, B, 'cosine')
    sum_B = (1 - results_B).sum()
    difference = sum_A/len(A) - sum_B/len(B)

    return difference

def group_cohesion_test(X, Y, perm_n = 1000, permtype = 1):
    # X = 2D numpy array of vectors for all words belonging to group X
    # Y = 2D numpy array of vectors for all words belonging to group Y
    # perm_n = number of permutations
    # permtype = permutation type.
    test_statistic = np.average((within_group_cohesion(X), within_group_cohesion(Y)), weights = (len(X), len(Y)))
    jointlist = np.concatenate((X,Y))
    permutations = np.array([])
    if permtype == 1:
        count = 0
        cutpoint = len(X)
        while count < perm_n:
            np.random.shuffle(jointlist)
            set1 = jointlist[:cutpoint]
            set2 = jointlist[cutpoint:]
            permutations = np.append(permutations, 
                                     np.average([within_group_cohesion(set1), within_group_cohesion(set2)], 
                                                weights = [len(set1), len(set2)]))
            count += 1
    else:
        nums = list(range(len(jointlist)))
        for comb in combinations(nums, len(X)):
            set1 = [item for i, item in enumerate(jointlist) if i in comb]
            set2 = [item for i, item in enumerate(jointlist) if i not in comb]
            permutations = np.append(permutations, 
                                     np.average([within_group_cohesion(set1), within_group_cohesion(set2)], 
                                                weights = [len(set1), len(set2)]))
    P_val = (sum(i <= test_statistic for i in permutations)+1)/(len(permutations)+1)
    return P_val

def diff_sim(X, A, B, effect=1, Y=False):
    # X = ndarray for words in category X
    # A = ndarray for words in attribute A
    # B = ndarray for words in attribute B
    # effect = boolean for whether standard deviation & effect size need to be calculated
    # Y = optional. ndarray for words in category Y
    if Y:
        sum_X = 0
        sum_Y = 0

        # print(f"Len X: {len(X)}")
        # print(f"Len Y: {len(Y)}")
        for x in X:
            x = np.array([x])
            sum_X += sim(x, A, B)
        for y in Y:
            y = np.array([y])
            sum_Y += sim(y, A, B)
        # difference = sum_X/len(X) - sum_Y/len(Y)
        difference = sum_X/len(X) - sum_Y/len(Y)
        all_sims = []
        for w in (np.concatenate((X,Y))):
            all_sims.append(sim(w, A, B))
        # For SD calculation, assign weights based on frequency of opposite category
        weights = [len(Y) for num in range(len(X))] + [len(X) for num in range(len(Y))]
        standard_dev = weighted_std(all_sims, weights)
        if standard_dev == 0:
            effect_size = 0
        else:
            effect_size = difference/standard_dev
    else:
        sum_A = 0
        sum_B = 0
        all_sims = []
        for a in A:
            a_ = a.reshape(1, -1)
            results = spatial.distance.cdist(a_, X, 'cosine')
            sum_X = (1 - results).sum()
            val = sum_X/len(X)
            sum_A += val
            all_sims.append(val)
        ave_A = sum_A/len(A)
        for b in B:
            b_ = b.reshape(1, -1)
            results = spatial.distance.cdist(b_, X, 'cosine')
            sum_X = (1 - results).sum()
            val = sum_X/len(X)
            sum_B += val
            all_sims.append(val)
        ave_B = sum_B/len(B)
        difference = ave_A - ave_B
        standard_dev = np.std(all_sims, ddof=1)
        if standard_dev == 0:
            effect_size = 0
        else:
            effect_size = difference/standard_dev
    if effect == 1:
        return difference, standard_dev, effect_size
    else:
        return difference    

def permutation_test(X, A, B, Y=False):
    # X = ndarray for words in category X
    # Y = ndarray for words in category Y
    # A = ndarray for words in attribute A
    # B = ndarray for words in attribute B
    if Y:
        jointlist = np.array(list(X) + list(Y))
        permutations = []
        nums = list(range(len(jointlist)))
        for comb in combinations(nums, len(X)):
            set1 = [item for i, item in enumerate(jointlist) if i in comb]
            set2 = [item for i, item in enumerate(jointlist) if i not in comb]
            permutations.append(diff_sim(set1, set2, A, B))
    else:
        jointlist = np.array(list(A) + list(B))
        permutations = []
        nums = list(range(len(jointlist)))
        for comb in combinations(nums, len(A)):
            set1 = [item for i, item in enumerate(jointlist) if i in comb]
            set2 = [item for i, item in enumerate(jointlist) if i not in comb]
            permutations.append(diff_sim(X, set1, set2, effect=0))
    return permutations

def rand_test(X, A, B, perm_n, Y=False):
    # X = ndarray for words in category X
    # Y = ndarray for words in category Y
    # A = ndarray for words in attribute A
    # B = ndarray for words in attribute B
    # perm_n = number of permutations
    if Y:
        jointlist = np.array(list(X) + list(Y))
        np.random.shuffle(jointlist)
        permutations = []
        count = 0
        cutpoint = len(X)
        while count < perm_n:
            np.random.shuffle(jointlist)
            set1 = jointlist[:cutpoint]
            set2 = jointlist[cutpoint:]
            permutations.append(diff_sim(set1, set2, A, B))
            count += 1
    else:
        jointlist = np.array(list(A) + list(B))
        np.random.shuffle(jointlist)
        permutations = []
        count = 0
        cutpoint = len(A)
        while count < perm_n:
            np.random.shuffle(jointlist)
            set1 = jointlist[:cutpoint]
            set2 = jointlist[cutpoint:]
            permutations.append(diff_sim(X, set1, set2, effect=0))
            count += 1
    return permutations

### DOUBLE CATEGORY WEAT

def weat(X_name, X, Y_name, Y, A_name, A, B_name, B, 
         permt=0, perm_n=10000, cohesion_test=False, cohesion_permutations=1000, cohesion_type=2):
    # X_name = name of category 1. Will be used in result output.
    # X = category 1. Input should be iterable and contain numpy array(s) for words in category 1
    # Y_name = name of category 2. Will be used in result output.
    # Y = category 2. Input should be iterable and contain numpy array(s) for words in category 2
    # A_name = name of attribute 1. Will be used in result output.
    # A = attribute 1. Input should be iterable and contain numpy array(s) for words in attribute 1
    # B_name = name of attribute 2. Will be used in result output.
    # B = attribute 1. Input should be iterable and contain numpy array(s) for words in attribute 2
    # permt = do you want to perform a permutation test? 0 = no, 1 = yes, 2 = yes, with the perm_n specified
    # perm_n = number of permutations
    # cohesion_test = boolean for testing within-category cohesion
    # cohesion_permutations = number of permutations for cohesion test
    # cohesion_type = type of cohesion test. 1 = test cohesion of only one group, 2 = test cohesion of both groups
    
    # Calculate effect size
    difference, standard_dev, effect_size = diff_sim(X=X, Y=Y, A=A, B=B, effect=1)
    decimal = 6
    result_dict = OrderedDict({"categories": [X_name, Y_name],
                               "attributes": [A_name, B_name],
                               "difference": round(difference,decimal),
                               "standard_dev": round(standard_dev,decimal),
                               "effect_size": round(effect_size, decimal)})
    
    # Permutations if permt is not 0
    if permt == 1 or permt == 2:
        if permt == 1:
            permutations = np.array(permutation_test(X=X, Y=Y, A=A, B=B))
        elif permt == 2:
            permutations = np.array(rand_test(X=X, Y=Y, A=A, B=B, perm_n=perm_n))
        perm_mean = np.mean(permutations)
        permutations = permutations - perm_mean
        sum_c = effect_size - perm_mean
        Pleft = (sum(i <= sum_c for i in permutations)+1)/(len(permutations)+1)
        Pright = (sum(i >= sum_c for i in permutations)+1)/(len(permutations)+1)
        Ptot = (sum(abs(i) >= abs(sum_c) for i in permutations)+1)/(len(permutations)+1)
        se = np.std(permutations)
        result_dict["Pleft"] = round(Pleft,decimal)
        result_dict["Pright"] = round(Pright,decimal)
        result_dict["Ptot"] = round(Ptot,decimal)
        result_dict["se"] = round(se, decimal)
    
    # Cohesion test if cohesion_test is true
    if cohesion_test == True:
        cohesion_categories = group_cohesion_test(X=X, Y=Y, perm_n=cohesion_permutations, permtype=cohesion_type)
        cohesion_attributes = group_cohesion_test(X=A, Y=B, perm_n=cohesion_permutations, permtype=cohesion_type)
        result_dict["cohesion_categories"] = cohesion_categories
        result_dict["cohesion_attributes"] = cohesion_attributes
        
    return result_dict    

### SINGLE CATEGORY WEAT

def s_weat(X_name, X, A_name, A, B_name, B, permt = 0, perm_n = 10000):
    # X_name = name of category 1. Will be used in result output.
    # X = category 1. Input should be iterable and contain numpy array(s) for words in category 1
    # A_name = name of attribute 1. Will be used in result output.
    # A = attribute 1. Input should be iterable and contain numpy array(s) for words in attribute 1
    # B_name = name of attribute 2. Will be used in result output.
    # B = attribute 1. Input should be iterable and contain numpy array(s) for words in attribute 2
    # permt = do you want to perform a permutation test? 0 = no, 1 = yes, 2 = yes, with the perm_n specified
    # perm_n = number of permutations

    difference, standard_dev, effect_size = diff_sim(X=X, A=A, B=B)
    decimal = 6
    result_dict = OrderedDict({"category": [X_name],
                               "attributes": [A_name, B_name],
                               "difference": round(difference, decimal),
                               "standard_dev": round(standard_dev,6),
                               "effect_size": round(effect_size, decimal)})
    if permt == 1 or permt == 2:
        if permt == 1:
            permutations = np.array(permutation_test(X, A, B))
        elif permt == 2:
            permutations = np.array(rand_test(X, A, B, perm_n = perm_n))
        perm_mean = np.mean(permutations)
        permutations = permutations - perm_mean
        sum_c = difference - perm_mean
        Pleft = (sum(i <= sum_c for i in permutations)+1)/(len(permutations)+1)
        Pright = (sum(i >= sum_c for i in permutations)+1)/(len(permutations)+1)
        Ptot = (sum(abs(i) >= abs(sum_c) for i in permutations)+1)/(len(permutations)+1)
        result_dict["Pleft"] = round(Pleft, decimal)
        result_dict["Pright"] = round(Pright,decimal)
        result_dict["Ptot"] = round(Ptot, decimal)
    return result_dict


# Load the dataframes 

In [3]:
# Load the stereotype dictionary
dictionary = "Create Dictionary/Stereotype_Dictionary.json"
with open(dictionary, 'r') as f:
    stereotype_dict = json.load(f)

stereotype_df = pd.DataFrame(list(stereotype_dict.items()), columns=['category', 'word'])
stereotype_df = stereotype_df.explode("word")
stereotype_df["word"] = stereotype_df["word"].str.lower()

stereotype_list = stereotype_df.word.tolist()

stereotype_df.head()

def load_word2vec_model(File):
    print("Loading Glove Model")
    word2vec_model = {}
    with open(File,'r') as f:
        for line in f:
            split_line = line.split()
            word = split_line[0].lower().strip()
            if word in stereotype_list:
                embedding = np.array(split_line[1:], dtype=np.float64)
                embedding = np.round(embedding, 10)
                word2vec_model[word] = embedding
    print(f"{len(word2vec_model)} words loaded!")
    word2vec_df = pd.DataFrame(list(word2vec_model.items()), columns=['word', 'vector'])
    merged_df = word2vec_df.merge(stereotype_df, on="word")
    return merged_df

## Data quality checks

In [4]:
word2vec_after_vecs = load_word2vec_model("Tokenized Data/After_word2vec_vectors.txt")
word2vec_before_vecs = load_word2vec_model("Tokenized Data/Before_word2vec_vectors.txt")

BERT_after_vecs = pd.read_csv("Tokenized Data/BERT_After_word_vectors.csv")
BERT_before_vecs = pd.read_csv("Tokenized Data/BERT_Before_word_vectors.csv")

BERT_after_vecs = BERT_after_vecs.merge(stereotype_df, on="word")
BERT_before_vecs = BERT_before_vecs.merge(stereotype_df, on="word")

BERT_after_vecs.head()
# sterotypes_df = pd.read_csv("Create Dictionary/word2vec_Stereotype_word_embeddings.csv")
# sterotypes_df.head()

def fix_format(vector_str):
    # Add a comma between numbers using regex
    fixed_str = re.sub(r"(?<=[0-9])\s+(?=-?\d)", ", ", vector_str)
    return fixed_str


def safe_literal_eval(vector_str):
    try:
        return ast.literal_eval(vector_str)
    except Exception as e:
        print(f"Error evaluating: {vector_str} | Error: {e}")
        return np.nan
    
BERT_after_vecs["vector"] = BERT_after_vecs["vector"].apply(fix_format)
BERT_after_vecs["vector"] = BERT_after_vecs["vector"].apply(safe_literal_eval)
BERT_before_vecs["vector"] = BERT_before_vecs["vector"].apply(fix_format)
BERT_before_vecs["vector"] = BERT_before_vecs["vector"].apply(safe_literal_eval)

#Round the vector by 10 decimal
BERT_after_vecs["vector"] = BERT_after_vecs["vector"].apply(
    lambda x: np.round(np.array(x), decimals=10).tolist() if isinstance(x, list) else x
)
BERT_before_vecs["vector"] = BERT_before_vecs["vector"].apply(
    lambda x: np.round(np.array(x), decimals=10).tolist() if isinstance(x, list) else x
)

BERT_after_vecs.head()

Loading Glove Model
158 words loaded!
Loading Glove Model
28 words loaded!


Unnamed: 0,subject_id,word,vector,category
0,87,far,"[-0.48308292, -0.483188093, -0.0779211596, 0.2...",Cold
1,87,far,"[0.134767666, -0.435717076, 0.912166715, -0.47...",Cold
2,87,far,"[-0.167683467, -0.322603464, 0.395895481, 0.13...",Cold
3,87,far,"[-0.241728961, 0.0443804562, 0.278668374, -0.4...",Cold
4,87,far,"[0.0780541077, -0.379881799, 0.224797666, -0.1...",Cold


In [None]:
# Can you run the WEAT with the before and after embeddings?
# So in a double category weat your group A is Jewish/Israeli pre, 
# group B Jewish/Israeli post, attribute A warmth pre and attribute B warmth post. 
# Then you can do an average of the vectors for Israeli/Jewish and Arabic/Muslim from the pre and post periods. 
# Now run the double category WEAT with mean Israeli/Jewish as group A, 
# mean Arabic/Muslim as group B, warmth pre as attribute A and warmth post as attribute B. 
# Do these two tests for each of the qualities.

# Function to calculate average vector
def calculate_average_vector(categories, vectors_df):
    # Filter the dataframe for the specific category
    category_df = vectors_df[vectors_df['category'].isin(categories)]
    # Convert vectors to numpy arrays and calculate mean
    if len(category_df) > 0:
        # Ensure all vectors are numpy arrays
        vectors = np.array(category_df['vector'].tolist())
        avg_vector = np.mean(vectors, axis=0)
        return avg_vector
    else:
        return None

combined_df = pd.concat([BERT_before_vecs, BERT_after_vecs])

arabic_muslim_avg = calculate_average_vector(['Arabic', 'Muslim'], combined_df)
israeli_jewish_avg = calculate_average_vector(['Israeli', 'Jewish'], combined_df)
warm_avg = calculate_average_vector(['Warm'], combined_df)
cold_avg = calculate_average_vector(['Cold'], combined_df)
competence_avg = calculate_average_vector(['Competence'], combined_df)
incompetence_avg = calculate_average_vector(['Incompetence'], combined_df)



BERT_avg = pd.DataFrame({
    'category': ['Arabic/Muslim', 'Israeli/Jewish', 'Warm', 'Cold', 'Competence', 'Incompetence'],
    'vector': [arabic_muslim_avg, israeli_jewish_avg, warm_avg, cold_avg, competence_avg, incompetence_avg],
})

ValueError: All arrays must be of the same length

BERT

In [5]:
np.random.seed(random.randint(1, 1000))
results = {}

tests = [("Warm", "Cold"), ("Competence", "Incompetence")]
groups = [("Israeli", "Arabic"), ("IDF", "Hamas"), ("Jewish", "Muslim")]

for x,y in groups:
    group = f"{x, y}"
    for att1, att2 in tests:
        X = []
        X_raw = BERT_after_vecs[BERT_after_vecs["category"] == x].vector.tolist()
        for array in X_raw:
            try:
                X.append(np.array(literal_eval(array)))
            except Exception as e:
                X.append(np.array((array)))
        Y = []
        Y_raw = BERT_after_vecs[BERT_after_vecs["category"] == y].vector.tolist()
        for array in Y_raw:
            try:
                Y.append(np.array(literal_eval(array)))
            except Exception as e:
                Y.append(np.array((array)))

        A = []
        A_raw = BERT_after_vecs[BERT_after_vecs["category"] == att1].vector.tolist()
        for array in A_raw:
            try:
                A.append(np.array(literal_eval(array)))
            except Exception as e:
                A.append(np.array((array)))

        B = []
        B_raw = BERT_after_vecs[BERT_after_vecs["category"] == att2].vector.tolist()
        for array in B_raw:
            try:
                B.append(np.array(literal_eval(array)))
            except Exception as e:
                B.append(np.array((array)))

        result_dict = weat(X_name=x, X=X, 
                           Y_name=y, Y=Y, 
                           A_name=att1, A=A, 
                           B_name=att2, B=B, 
                           permt=2, perm_n=1000, cohesion_test=False, 
                           cohesion_permutations=100, cohesion_type=0)
        print(result_dict)
        results[(group, f"{att1}_{att2}")] = result_dict
        
# Convert results dictionary to DataFrame
results_dict = { (group_attr): details for group_attr, details in results.items() }

# Create DataFrame
weat_df = pd.DataFrame.from_dict(results_dict, orient='index')

# Update column names to match the number of columns in the DataFrame
weat_df.columns = ["category", 'group_attributes', 'difference', 'standard_dev', 'effect_size', 'Pleft', 'Pright', 'Ptot','se']

weat_df.to_csv('weat_after_BERT.csv', index=False)

KeyboardInterrupt: 

In [None]:
np.random.seed(random.randint(1, 1000))
results = {}

tests = [("Warm", "Cold"), ("Competence", "Incompetence")]
groups = [("Israeli", "Arabic"), ("IDF", "Hamas"), ("Jewish", "Muslim")]

for x,y in groups:
    if y == "Hamas":
        continue
    group = f"{x, y}"
    for att1, att2 in tests:
        X = []
        X_raw = BERT_before_vecs[BERT_before_vecs["category"] == x].vector.tolist()
        for array in X_raw:
            try:
                X.append(np.array(literal_eval(array)))
            except Exception as e:
                X.append(np.array((array)))
        Y = []
        Y_raw = BERT_before_vecs[BERT_before_vecs["category"] == y].vector.tolist()
        for array in Y_raw:
            try:
                Y.append(np.array(literal_eval(array)))
            except Exception as e:
                Y.append(np.array((array)))

        A = []
        A_raw = BERT_before_vecs[BERT_before_vecs["category"] == att1].vector.tolist()
        for array in A_raw:
            try:
                A.append(np.array(literal_eval(array)))
            except Exception as e:
                A.append(np.array((array)))

        B = []
        B_raw = BERT_before_vecs[BERT_before_vecs["category"] == att2].vector.tolist()
        for array in B_raw:
            try:
                B.append(np.array(literal_eval(array)))
            except Exception as e:
                B.append(np.array((array)))

        result_dict = weat(X_name=x, X=X, 
                           Y_name=y, Y=Y, 
                           A_name=att1, A=A, 
                           B_name=att2, B=B, 
                           permt=2, perm_n=1000, cohesion_test=False, 
                           cohesion_permutations=100, cohesion_type=0)
        print(result_dict)
        results[(group, f"{att1}_{att2}")] = result_dict
        
# Convert results dictionary to DataFrame
results_dict = { (group_attr): details for group_attr, details in results.items() }

# Create DataFrame
weat_df = pd.DataFrame.from_dict(results_dict, orient='index')

# Update column names to match the number of columns in the DataFrame
weat_df.columns = ["category", 'group_attributes', 'difference', 'standard_dev', 'effect_size', 'Pleft', 'Pright', 'Ptot','se']

weat_df.to_csv('weat_before_BERT.csv', index=False)

OrderedDict([('categories', ['Israeli', 'Arabic']), ('attributes', ['Warm', 'Cold']), ('difference', 0.093376), ('standard_dev', 0.070109), ('effect_size', 1.331872), ('Pleft', 1.0), ('Pright', 0.000999), ('Ptot', 0.000999), ('se', 0.036346)])
OrderedDict([('categories', ['Israeli', 'Arabic']), ('attributes', ['Competence', 'Incompetence']), ('difference', 0.021478), ('standard_dev', 0.037135), ('effect_size', 0.578397), ('Pleft', 1.0), ('Pright', 0.000999), ('Ptot', 0.000999), ('se', 0.027952)])
OrderedDict([('categories', ['Jewish', 'Muslim']), ('attributes', ['Warm', 'Cold']), ('difference', -0.02759), ('standard_dev', 0.040129), ('effect_size', -0.687531), ('Pleft', 0.000999), ('Pright', 1.0), ('Ptot', 0.000999), ('se', 0.057282)])
OrderedDict([('categories', ['Jewish', 'Muslim']), ('attributes', ['Competence', 'Incompetence']), ('difference', -0.000577), ('standard_dev', 0.0222), ('effect_size', -0.025995), ('Pleft', 0.157842), ('Pright', 0.843157), ('Ptot', 0.187812), ('se', 0.05

In [None]:
np.random.seed(random.randint(1, 1000))
results = {}

tests = [("Warm", "Warm"),
         ("Cold", "Cold"), 
         ("Competence", "Competence"),
         ("Incompetence", "Incompetence")]
groups = [("Israeli", "Israeli"),
          ("Jewish", "Jewish"),
          ("Arabic", "Arabic"), 
          ("Muslim", "Muslim")]

for x,y in groups:
    if y == "Hamas":
        continue
    group = f"{x, y}"
    for att1, att2 in tests:
        X = []
        X_raw = BERT_before_vecs[BERT_before_vecs["category"] == x].vector.tolist()
        for array in X_raw:
            try:
                X.append(np.array(literal_eval(array)))
            except Exception as e:
                X.append(np.array((array)))
        Y = []
        Y_raw = BERT_after_vecs[BERT_after_vecs["category"] == y].vector.tolist()
        for array in Y_raw:
            try:
                Y.append(np.array(literal_eval(array)))
            except Exception as e:
                Y.append(np.array((array)))

        A = []
        A_raw = BERT_before_vecs[BERT_before_vecs["category"] == att1].vector.tolist()
        for array in A_raw:
            try:
                A.append(np.array(literal_eval(array)))
            except Exception as e:
                A.append(np.array((array)))

        B = []
        B_raw = BERT_after_vecs[BERT_after_vecs["category"] == att2].vector.tolist()
        for array in B_raw:
            try:
                B.append(np.array(literal_eval(array)))
            except Exception as e:
                B.append(np.array((array)))

        result_dict = weat(X_name=f"{x}_Before", X=X, 
                           Y_name=f"{y}_After", Y=Y, 
                           A_name=f"{att1}_Before", A=A, 
                           B_name=f"{att2}_After", B=B, 
                           permt=2, perm_n=1000, cohesion_test=False, 
                           cohesion_permutations=100, cohesion_type=0)
        print(result_dict)
        results[(group, f"{att1}_{att2}")] = result_dict
        
# Convert results dictionary to DataFrame
results_dict = { (group_attr): details for group_attr, details in results.items() }

# Create DataFrame
weat_df = pd.DataFrame.from_dict(results_dict, orient='index')

# Update column names to match the number of columns in the DataFrame
weat_df.columns = ["category", 'group_attributes', 'difference', 'standard_dev', 'effect_size', 'Pleft', 'Pright', 'Ptot','se']

weat_df.to_csv('weat_before&after_single_BERT.csv', index=False)

OrderedDict([('categories', ['Israeli_Before', 'Israeli_After']), ('attributes', ['Warm_Before', 'Warm_After']), ('difference', 0.026992), ('standard_dev', 0.024558), ('effect_size', 1.099121), ('Pleft', 1.0), ('Pright', 0.000999), ('Ptot', 0.000999), ('se', 0.017202)])
OrderedDict([('categories', ['Israeli_Before', 'Israeli_After']), ('attributes', ['Cold_Before', 'Cold_After']), ('difference', -0.015183), ('standard_dev', 0.029924), ('effect_size', -0.507408), ('Pleft', 0.000999), ('Pright', 1.0), ('Ptot', 0.000999), ('se', 0.017245)])
OrderedDict([('categories', ['Israeli_Before', 'Israeli_After']), ('attributes', ['Competence_Before', 'Competence_After']), ('difference', -0.005884), ('standard_dev', 0.015108), ('effect_size', -0.38944), ('Pleft', 0.000999), ('Pright', 1.0), ('Ptot', 0.000999), ('se', 0.016952)])
OrderedDict([('categories', ['Israeli_Before', 'Israeli_After']), ('attributes', ['Incompetence_Before', 'Incompetence_After']), ('difference', -0.003019), ('standard_dev',

In [None]:
np.random.seed(random.randint(1, 1000))
results = {}

tests = [("Warm", "Warm"),
         ("Cold", "Cold"), 
         ("Competence", "Competence"),
         ("Incompetence", "Incompetence")]
groups = [("Arabic/Muslim", "Israeli/Jewish")]

for x,y in groups:
    group = f"{x, y}"
    for att1, att2 in tests:
        X = []
        X_raw = BERT_avg[BERT_avg["category"] == x].vector.tolist()
        for array in X_raw:
            try:
                X.append(np.array(literal_eval(array)))
            except Exception as e:
                X.append(np.array((array)))
        Y = []
        Y_raw = BERT_avg[BERT_avg["category"] == y].vector.tolist()
        for array in Y_raw:
            try:
                Y.append(np.array(literal_eval(array)))
            except Exception as e:
                Y.append(np.array((array)))

        A = []
        A_raw = BERT_before_vecs[BERT_before_vecs["category"] == att1].vector.tolist()
        for array in A_raw:
            try:
                A.append(np.array(literal_eval(array)))
            except Exception as e:
                A.append(np.array((array)))

        B = []
        B_raw = BERT_after_vecs[BERT_after_vecs["category"] == att2].vector.tolist()
        for array in B_raw:
            try:
                B.append(np.array(literal_eval(array)))
            except Exception as e:
                B.append(np.array((array)))

        result_dict = weat(X_name=f"{x}", X=X, 
                           Y_name=f"{y}", Y=Y, 
                           A_name=f"{att1}_Before", A=A, 
                           B_name=f"{att2}_After", B=B, 
                           permt=2, perm_n=1000, cohesion_test=False, 
                           cohesion_permutations=100, cohesion_type=0)
        print(result_dict)
        results[(group, f"{att1}_{att2}")] = result_dict
        
# Convert results dictionary to DataFrame
results_dict = { (group_attr): details for group_attr, details in results.items() }

# Create DataFrame
weat_df = pd.DataFrame.from_dict(results_dict, orient='index')

# Update column names to match the number of columns in the DataFrame
weat_df.columns = ["category", 'group_attributes', 'difference', 'standard_dev', 'effect_size', 'Pleft', 'Pright', 'Ptot','se']

weat_df.to_csv('weat_conflated_before&afterattributes_BERT.csv', index=False)

OrderedDict([('categories', ['Arabic/Muslim', 'Israeli/Jewish']), ('attributes', ['Warm_Before', 'Warm_After']), ('difference', -0.000818), ('standard_dev', 0.000578), ('effect_size', -1.414214), ('Pleft', 0.000999), ('Pright', 1.0), ('Ptot', 0.000999), ('se', 0.003635)])
OrderedDict([('categories', ['Arabic/Muslim', 'Israeli/Jewish']), ('attributes', ['Cold_Before', 'Cold_After']), ('difference', -0.008376), ('standard_dev', 0.005923), ('effect_size', -1.414214), ('Pleft', 0.000999), ('Pright', 1.0), ('Ptot', 0.000999), ('se', 0.006277)])
OrderedDict([('categories', ['Arabic/Muslim', 'Israeli/Jewish']), ('attributes', ['Competence_Before', 'Competence_After']), ('difference', -0.002263), ('standard_dev', 0.0016), ('effect_size', -1.414214), ('Pleft', 0.000999), ('Pright', 1.0), ('Ptot', 0.000999), ('se', 0.004504)])
OrderedDict([('categories', ['Arabic/Muslim', 'Israeli/Jewish']), ('attributes', ['Incompetence_Before', 'Incompetence_After']), ('difference', -0.010647), ('standard_dev'

word2vec

In [None]:
np.random.seed(random.randint(1, 1000))
results = {}

tests = [("Warm", "Cold"), ("Competence", "Incompetence")]
groups = [("Israeli", "Arabic"), ("IDF", "Hamas"), ("Jewish", "Muslim")]

for x,y in groups:
    group = f"{x, y}"
    for att1, att2 in tests:
        X = []
        X_raw = word2vec_after_vecs[word2vec_after_vecs["category"] == x].vector.tolist()
        for array in X_raw:
            try:
                X.append(np.array(literal_eval(array)))
            except Exception as e:
                X.append(np.array((array)))
        Y = []
        Y_raw = word2vec_after_vecs[word2vec_after_vecs["category"] == y].vector.tolist()
        for array in Y_raw:
            try:
                Y.append(np.array(literal_eval(array)))
            except Exception as e:
                Y.append(np.array((array)))

        A = []
        A_raw = word2vec_after_vecs[word2vec_after_vecs["category"] == att1].vector.tolist()
        for array in A_raw:
            try:
                A.append(np.array(literal_eval(array)))
            except Exception as e:
                A.append(np.array((array)))

        B = []
        B_raw = word2vec_after_vecs[word2vec_after_vecs["category"] == att2].vector.tolist()
        for array in B_raw:
            try:
                B.append(np.array(literal_eval(array)))
            except Exception as e:
                B.append(np.array((array)))

        result_dict = weat(X_name=x, X=X, 
                           Y_name=y, Y=Y, 
                           A_name=att1, A=A, 
                           B_name=att2, B=B, 
                           permt=2, perm_n=1000, cohesion_test=False, 
                           cohesion_permutations=100, cohesion_type=0)
        print(result_dict)
        results[(group, f"{att1}_{att2}")] = result_dict
        
# Convert results dictionary to DataFrame
results_dict = { (group_attr): details for group_attr, details in results.items() }

# Create DataFrame
weat_df = pd.DataFrame.from_dict(results_dict, orient='index')

# Update column names to match the number of columns in the DataFrame
weat_df.columns = ["category", 'group_attributes', 'difference', 'standard_dev', 'effect_size', 'Pleft', 'Pright', 'Ptot','se']

weat_df.to_csv('weat_after_word2vec.csv', index=False)

OrderedDict([('categories', ['Israeli', 'Arabic']), ('attributes', ['Warm', 'Cold']), ('difference', 0.011488), ('standard_dev', 0.009914), ('effect_size', 1.158856), ('Pleft', 1.0), ('Pright', 0.000999), ('Ptot', 0.000999), ('se', 0.040816)])
OrderedDict([('categories', ['Israeli', 'Arabic']), ('attributes', ['Competence', 'Incompetence']), ('difference', 0.00363), ('standard_dev', 0.006322), ('effect_size', 0.574143), ('Pleft', 1.0), ('Pright', 0.000999), ('Ptot', 0.000999), ('se', 0.03427)])
OrderedDict([('categories', ['IDF', 'Hamas']), ('attributes', ['Warm', 'Cold']), ('difference', 0.01086), ('standard_dev', 0.009616), ('effect_size', 1.129419), ('Pleft', 1.0), ('Pright', 0.000999), ('Ptot', 0.000999), ('se', 0.017364)])
OrderedDict([('categories', ['IDF', 'Hamas']), ('attributes', ['Competence', 'Incompetence']), ('difference', -0.003002), ('standard_dev', 0.00855), ('effect_size', -0.351095), ('Pleft', 0.000999), ('Pright', 1.0), ('Ptot', 0.000999), ('se', 0.014789)])
OrderedD

In [None]:
np.random.seed(random.randint(1, 1000))
results = {}

tests = [("Warm", "Cold"), ("Competence", "Incompetence")]
groups = [("Israeli", "Arabic"), ("IDF", "Hamas"), ("Jewish", "Muslim")]

for x,y in groups:
    if x == "IDF":
        continue
    group = f"{x, y}"
    for att1, att2 in tests:
        X = []
        X_raw = word2vec_before_vecs[word2vec_before_vecs["category"] == x].vector.tolist()
        for array in X_raw:
            try:
                X.append(np.array(literal_eval(array)))
            except Exception as e:
                X.append(np.array((array)))
        Y = []
        Y_raw = word2vec_before_vecs[word2vec_before_vecs["category"] == y].vector.tolist()
        for array in Y_raw:
            try:
                Y.append(np.array(literal_eval(array)))
            except Exception as e:
                Y.append(np.array((array)))

        A = []
        A_raw = word2vec_before_vecs[word2vec_before_vecs["category"] == att1].vector.tolist()
        for array in A_raw:
            try:
                A.append(np.array(literal_eval(array)))
            except Exception as e:
                A.append(np.array((array)))

        B = []
        B_raw = word2vec_before_vecs[word2vec_before_vecs["category"] == att2].vector.tolist()
        for array in B_raw:
            try:
                B.append(np.array(literal_eval(array)))
            except Exception as e:
                B.append(np.array((array)))

        result_dict = weat(X_name=x, X=X, 
                           Y_name=y, Y=Y, 
                           A_name=att1, A=A, 
                           B_name=att2, B=B, 
                           permt=2, perm_n=1000, cohesion_test=False, 
                           cohesion_permutations=100, cohesion_type=0)
        print(result_dict)
        results[(group, f"{att1}_{att2}")] = result_dict
        
# Convert results dictionary to DataFrame
results_dict = { (group_attr): details for group_attr, details in results.items() }

# Create DataFrame
weat_df = pd.DataFrame.from_dict(results_dict, orient='index')

# Update column names to match the number of columns in the DataFrame
weat_df.columns = ["category", 'group_attributes', 'difference', 'standard_dev', 'effect_size', 'Pleft', 'Pright', 'Ptot','se']

weat_df.to_csv('weat_before_word2vec.csv', index=False)

OrderedDict([('categories', ['Israeli', 'Arabic']), ('attributes', ['Warm', 'Cold']), ('difference', 0.044281), ('standard_dev', 0.027412), ('effect_size', 1.615384), ('Pleft', 1.0), ('Pright', 0.000999), ('Ptot', 0.000999), ('se', 0.011293)])
OrderedDict([('categories', ['Israeli', 'Arabic']), ('attributes', ['Competence', 'Incompetence']), ('difference', -0.039484), ('standard_dev', 0.031243), ('effect_size', -1.263766), ('Pleft', 0.000999), ('Pright', 1.0), ('Ptot', 0.000999), ('se', 0.004942)])
OrderedDict([('categories', ['Jewish', 'Muslim']), ('attributes', ['Warm', 'Cold']), ('difference', 0.031849), ('standard_dev', 0.039755), ('effect_size', 0.801116), ('Pleft', 1.0), ('Pright', 0.000999), ('Ptot', 0.000999), ('se', 0.00076)])
OrderedDict([('categories', ['Jewish', 'Muslim']), ('attributes', ['Competence', 'Incompetence']), ('difference', 0.085012), ('standard_dev', 0.054048), ('effect_size', 1.572896), ('Pleft', 1.0), ('Pright', 0.000999), ('Ptot', 0.000999), ('se', 0.002096)

## Single category WEAT

### Pre Post corpus

#### Compare opposing attributes

word2vec

In [None]:
np.random.seed(random.randint(1, 1000))
results = {}

# tests = [("Warm", "Cold"), ("Competence", "Incompetence")]
# groups = ["Israeli", "Arabic", "IDF", "Hamas", "Jewish", "Muslim"]

tests = [("Israeli", "Arabic"), ("IDF", "Hamas"), ("Jewish", "Muslim")]
groups = ["Warm", "Cold", "Competence", "Incompetence"]

for group in groups:
    # if group == "IDF":
    #     continue
    for att1, att2 in tests:
        if att1 == "IDF":
            continue
        X = []
        X_raw = word2vec_before_vecs[word2vec_before_vecs["category"] == group].vector.tolist()
        for array in X_raw:
            try:
                X.append(np.array(literal_eval(array)))
            except Exception as e:
                X.append(np.array((array)))

        A = []
        A_raw = word2vec_before_vecs[word2vec_before_vecs["category"] == att1].vector.tolist()
        for array in A_raw:
            try:
                A.append(np.array(literal_eval(array)))
            except Exception as e:
                A.append(np.array((array)))

        B = []
        B_raw = word2vec_before_vecs[word2vec_before_vecs["category"] == att2].vector.tolist()
        for array in B_raw:
            try:
                B.append(np.array(literal_eval(array)))
            except Exception as e:
                B.append(np.array((array)))

        result_dict = s_weat(X_name=group, X=X, 
            A_name=att1, A=A, 
            B_name=att2, B=B, 
            permt=2, perm_n=1000)
        print(result_dict)
        results[(group, f"{att1}_{att2}")] = result_dict
        
# Convert results dictionary to DataFrame
results_dict = { (group_attr): details for group_attr, details in results.items() }

# Create DataFrame
sweat_df = pd.DataFrame.from_dict(results_dict, orient='index')

# Update column names to match the number of columns in the DataFrame
sweat_df.columns = ["category", 'group_attributes', 'difference', 'standard_dev', 'effect_size', 'Pleft', 'Pright', 'Ptot']

# sweat_df.to_csv('sweat_before_word2vec.csv', index=False)
sweat_df.to_csv('sweat_before_word2vec_fixed_category.csv', index=False)

OrderedDict([('category', ['Warm']), ('attributes', ['Israeli', 'Arabic']), ('difference', 0.021141), ('standard_dev', 0.013747), ('effect_size', 1.537886), ('Pleft', 1.0), ('Pright', 0.336663), ('Ptot', 0.336663)])
OrderedDict([('category', ['Warm']), ('attributes', ['Jewish', 'Muslim']), ('difference', 0.048585), ('standard_dev', 0.041101), ('effect_size', 1.182072), ('Pleft', 1.0), ('Pright', 0.332667), ('Ptot', 0.653347)])
OrderedDict([('category', ['Cold']), ('attributes', ['Israeli', 'Arabic']), ('difference', -0.02314), ('standard_dev', 0.01728), ('effect_size', -1.339095), ('Pleft', 0.318681), ('Pright', 1.0), ('Ptot', 0.667333)])
OrderedDict([('category', ['Cold']), ('attributes', ['Jewish', 'Muslim']), ('difference', 0.016736), ('standard_dev', 0.013877), ('effect_size', 1.206045), ('Pleft', 1.0), ('Pright', 0.327672), ('Ptot', 0.672328)])
OrderedDict([('category', ['Competence']), ('attributes', ['Israeli', 'Arabic']), ('difference', -0.034481), ('standard_dev', 0.020906), (

In [None]:
np.random.seed(random.randint(1, 1000))
results = {}

# tests = [("Warm", "Cold"), ("Competence", "Incompetence")]
# groups = ["Israeli", "Arabic", "IDF", "Hamas", "Jewish", "Muslim"]

tests = [("Israeli", "Arabic"), ("IDF", "Hamas"), ("Jewish", "Muslim")]
groups = ["Warm", "Cold", "Competence", "Incompetence"]

for group in groups:
    for att1, att2 in tests:
        X = []
        X_raw = word2vec_after_vecs[word2vec_after_vecs["category"] == group].vector.tolist()
        for array in X_raw:
            try:
                X.append(np.array(literal_eval(array)))
            except Exception as e:
                X.append(np.array((array)))

        A = []
        A_raw = word2vec_after_vecs[word2vec_after_vecs["category"] == att1].vector.tolist()
        for array in A_raw:
            try:
                A.append(np.array(literal_eval(array)))
            except Exception as e:
                A.append(np.array((array)))

        B = []
        B_raw = word2vec_after_vecs[word2vec_after_vecs["category"] == att2].vector.tolist()
        for array in B_raw:
            try:
                B.append(np.array(literal_eval(array)))
            except Exception as e:
                B.append(np.array((array)))

        result_dict = s_weat(X_name=group, X=X, 
            A_name=att1, A=A, 
            B_name=att2, B=B, 
            permt=2, perm_n=1000)
        print(result_dict)
        results[(group, f"{att1}_{att2}")] = result_dict
        
# Convert results dictionary to DataFrame
results_dict = { (group_attr): details for group_attr, details in results.items() }

# Create DataFrame
sweat_df = pd.DataFrame.from_dict(results_dict, orient='index')

# Update column names to match the number of columns in the DataFrame
sweat_df.columns = ["category", 'group_attributes', 'difference', 'standard_dev', 'effect_size', 'Pleft', 'Pright', 'Ptot']

# sweat_df.to_csv('sweat_after_word2vec.csv', index=False)
sweat_df.to_csv('sweat_after_word2vec_fixed_category.csv', index=False)

OrderedDict([('category', ['Warm']), ('attributes', ['Israeli', 'Arabic']), ('difference', 0.140502), ('standard_dev', 0.119106), ('effect_size', 1.179639), ('Pleft', 0.975025), ('Pright', 0.036963), ('Ptot', 0.088911)])
OrderedDict([('category', ['Warm']), ('attributes', ['IDF', 'Hamas']), ('difference', -0.007888), ('standard_dev', 0.074402), ('effect_size', -0.106025), ('Pleft', 0.495504), ('Pright', 0.508492), ('Ptot', 0.925075)])
OrderedDict([('category', ['Warm']), ('attributes', ['Jewish', 'Muslim']), ('difference', -0.021922), ('standard_dev', 0.102882), ('effect_size', -0.213076), ('Pleft', 0.31968), ('Pright', 0.682318), ('Ptot', 0.677323)])
OrderedDict([('category', ['Cold']), ('attributes', ['Israeli', 'Arabic']), ('difference', 0.129014), ('standard_dev', 0.114126), ('effect_size', 1.130449), ('Pleft', 0.965035), ('Pright', 0.047952), ('Ptot', 0.085914)])
OrderedDict([('category', ['Cold']), ('attributes', ['IDF', 'Hamas']), ('difference', -0.018749), ('standard_dev', 0.07

BERT

In [None]:
np.random.seed(random.randint(1, 1000))
results = {}

# tests = [("Warm", "Cold"), ("Competence", "Incompetence")]
# groups = ["Israeli", "Arabic", "IDF", "Hamas", "Jewish", "Muslim"]

tests = [("Israeli", "Arabic"), ("IDF", "Hamas"), ("Jewish", "Muslim")]
groups = ["Warm", "Cold", "Competence", "Incompetence"]

for group in groups:
    for att1, att2 in tests:
        X = []
        X_raw = BERT_after_vecs[BERT_after_vecs["category"] == group].vector.tolist()
        for array in X_raw:
            try:
                X.append(np.array(literal_eval(array)))
            except Exception as e:
                X.append(np.array((array)))

        A = []
        A_raw = BERT_after_vecs[BERT_after_vecs["category"] == att1].vector.tolist()
        for array in A_raw:
            try:
                A.append(np.array(literal_eval(array)))
            except Exception as e:
                A.append(np.array((array)))

        B = []
        B_raw = BERT_after_vecs[BERT_after_vecs["category"] == att2].vector.tolist()
        for array in B_raw:
            try:
                B.append(np.array(literal_eval(array)))
            except Exception as e:
                B.append(np.array((array)))

        result_dict = s_weat(X_name=group, X=X, 
            A_name=att1, A=A, 
            B_name=att2, B=B, 
            permt=2, perm_n=1000)
        print(result_dict)
        results[(group, f"{att1}_{att2}")] = result_dict
        
# Convert results dictionary to DataFrame
results_dict = { (group_attr): details for group_attr, details in results.items() }

# Create DataFrame
sweat_df = pd.DataFrame.from_dict(results_dict, orient='index')

# Update column names to match the number of columns in the DataFrame
sweat_df.columns = ["category", 'group_attributes', 'difference', 'standard_dev', 'effect_size', 'Pleft', 'Pright', 'Ptot']

# sweat_df.to_csv('sweat_after_BERT.csv', index=False)
sweat_df.to_csv('sweat_after_BERT_fixed_category.csv', index=False)

OrderedDict([('category', ['Warm']), ('attributes', ['Israeli', 'Arabic']), ('difference', -0.006693), ('standard_dev', 0.044214), ('effect_size', -0.151372), ('Pleft', 0.068931), ('Pright', 0.932068), ('Ptot', 0.121878)])
OrderedDict([('category', ['Warm']), ('attributes', ['IDF', 'Hamas']), ('difference', -0.00363), ('standard_dev', 0.057487), ('effect_size', -0.063147), ('Pleft', 0.34965), ('Pright', 0.651349), ('Ptot', 0.719281)])
OrderedDict([('category', ['Warm']), ('attributes', ['Jewish', 'Muslim']), ('difference', -0.003066), ('standard_dev', 0.046636), ('effect_size', -0.065737), ('Pleft', 0.298701), ('Pright', 0.702298), ('Ptot', 0.585415)])
OrderedDict([('category', ['Cold']), ('attributes', ['Israeli', 'Arabic']), ('difference', -0.007243), ('standard_dev', 0.054495), ('effect_size', -0.132901), ('Pleft', 0.087912), ('Pright', 0.913087), ('Ptot', 0.173826)])
OrderedDict([('category', ['Cold']), ('attributes', ['IDF', 'Hamas']), ('difference', -0.002639), ('standard_dev', 0

In [None]:
np.random.seed(random.randint(1, 1000))
results = {}

# tests = [("Warm", "Cold"), ("Competence", "Incompetence")]
# groups = ["Israeli", "Arabic", "IDF", "Hamas", "Jewish", "Muslim"]

tests = [("Israeli", "Arabic"), ("IDF", "Hamas"), ("Jewish", "Muslim")]
groups = ["Warm", "Cold", "Competence", "Incompetence"]

for group in groups:
    # if group == "IDF":
    #     continue
    for att1, att2 in tests:
        if att1 == "IDF":
            continue
        X = []
        X_raw = BERT_before_vecs[BERT_before_vecs["category"] == group].vector.tolist()
        for array in X_raw:
            try:
                X.append(np.array(literal_eval(array)))
            except Exception as e:
                X.append(np.array((array)))

        A = []
        A_raw = BERT_before_vecs[BERT_before_vecs["category"] == att1].vector.tolist()
        for array in A_raw:
            try:
                A.append(np.array(literal_eval(array)))
            except Exception as e:
                A.append(np.array((array)))

        B = []
        B_raw = BERT_before_vecs[BERT_before_vecs["category"] == att2].vector.tolist()
        for array in B_raw:
            try:
                B.append(np.array(literal_eval(array)))
            except Exception as e:
                B.append(np.array((array)))

        result_dict = s_weat(X_name=group, X=X, 
            A_name=att1, A=A, 
            B_name=att2, B=B, 
            permt=2, perm_n=1000)
        print(result_dict)
        results[(group, f"{att1}_{att2}")] = result_dict
        
# Convert results dictionary to DataFrame
results_dict = { (group_attr): details for group_attr, details in results.items() }

# Create DataFrame
sweat_df = pd.DataFrame.from_dict(results_dict, orient='index')

# Update column names to match the number of columns in the DataFrame
sweat_df.columns = ["category", 'group_attributes', 'difference', 'standard_dev', 'effect_size', 'Pleft', 'Pright', 'Ptot']

# sweat_df.to_csv('sweat_before_BERT.csv', index=False)
sweat_df.to_csv('sweat_before_BERT_fixed_category.csv', index=False)

OrderedDict([('category', ['Warm']), ('attributes', ['Israeli', 'Arabic']), ('difference', 0.082848), ('standard_dev', 0.098549), ('effect_size', 0.840679), ('Pleft', 0.866134), ('Pright', 0.145854), ('Ptot', 0.301698)])
OrderedDict([('category', ['Warm']), ('attributes', ['Jewish', 'Muslim']), ('difference', -0.068867), ('standard_dev', 0.086205), ('effect_size', -0.798878), ('Pleft', 0.053946), ('Pright', 0.947053), ('Ptot', 0.17982)])
OrderedDict([('category', ['Cold']), ('attributes', ['Israeli', 'Arabic']), ('difference', -0.010528), ('standard_dev', 0.048576), ('effect_size', -0.216721), ('Pleft', 0.407592), ('Pright', 0.611389), ('Ptot', 0.851149)])
OrderedDict([('category', ['Cold']), ('attributes', ['Jewish', 'Muslim']), ('difference', -0.041277), ('standard_dev', 0.055535), ('effect_size', -0.743266), ('Pleft', 0.155844), ('Pright', 0.845155), ('Ptot', 0.320679)])
OrderedDict([('category', ['Competence']), ('attributes', ['Israeli', 'Arabic']), ('difference', 0.036647), ('sta

In [8]:
np.random.seed(random.randint(1, 1000))
results = {}

# tests = [("Warm", "Cold"), ("Competence", "Incompetence")]
# groups = ["Israeli", "Arabic", "IDF", "Hamas", "Jewish", "Muslim"]

tests = [("Israeli/Jewish", "Arabic/Muslim")]
groups = ["Warm", "Cold", "Competence", "Incompetence"]

for group in groups:
    # if group == "IDF":
    #     continue
    for att1, att2 in tests:
        if att1 == "IDF":
            continue
        X = []
        X_raw = BERT_before_vecs[BERT_before_vecs["category"] == group].vector.tolist()
        for array in X_raw:
            try:
                X.append(np.array(literal_eval(array)))
            except Exception as e:
                X.append(np.array((array)))

        A = []
        A_raw = BERT_avg[BERT_avg["category"] == att1].vector.tolist()
        for array in A_raw:
            try:
                A.append(np.array(literal_eval(array)))
            except Exception as e:
                A.append(np.array((array)))

        B = []
        B_raw = BERT_avg[BERT_avg["category"] == att2].vector.tolist()
        for array in B_raw:
            try:
                B.append(np.array(literal_eval(array)))
            except Exception as e:
                B.append(np.array((array)))

        result_dict = s_weat(X_name=group, X=X, 
            A_name=att1, A=A, 
            B_name=att2, B=B, 
            permt=2, perm_n=1000)
        print(result_dict)
        results[(group, f"{att1}_{att2}")] = result_dict
        
# Convert results dictionary to DataFrame
results_dict = { (group_attr): details for group_attr, details in results.items() }

# Create DataFrame
sweat_df = pd.DataFrame.from_dict(results_dict, orient='index')

# Update column names to match the number of columns in the DataFrame
sweat_df.columns = ["category", 'group_attributes', 'difference', 'standard_dev', 'effect_size', 'Pleft', 'Pright', 'Ptot']

# sweat_df.to_csv('sweat_before_BERT.csv', index=False)
sweat_df.to_csv('sweat_average_BERT_fixed_category.csv', index=False)

OrderedDict([('category', ['Warm']), ('attributes', ['Israeli/Jewish', 'Arabic/Muslim']), ('difference', 0.007286), ('standard_dev', 0.005152), ('effect_size', 1.414214), ('Pleft', 1.0), ('Pright', 0.47952), ('Ptot', 0.47952)])
OrderedDict([('category', ['Cold']), ('attributes', ['Israeli/Jewish', 'Arabic/Muslim']), ('difference', 0.012555), ('standard_dev', 0.008878), ('effect_size', 1.414214), ('Pleft', 1.0), ('Pright', 0.523477), ('Ptot', 1.0)])
OrderedDict([('category', ['Competence']), ('attributes', ['Israeli/Jewish', 'Arabic/Muslim']), ('difference', 0.009009), ('standard_dev', 0.006371), ('effect_size', 1.414214), ('Pleft', 1.0), ('Pright', 0.492507), ('Ptot', 0.492507)])
OrderedDict([('category', ['Incompetence']), ('attributes', ['Israeli/Jewish', 'Arabic/Muslim']), ('difference', 0.015173), ('standard_dev', 0.010729), ('effect_size', 1.414214), ('Pleft', 1.0), ('Pright', 0.503497), ('Ptot', 1.0)])


#### Compare same attribute

In [None]:
# np.random.seed(random.randint(1, 1000))
# results = {}
# dates = ['pre', 'post']

# tests = ["Warm", "Cold", "Competent", "Incompetent", "Foreign", "Diseased"]

# for i in np.arange(len(dates)-1):
#     if dates[i] not in results.keys():
#         print(dates[i])
#         print(dates[i+1])
#         result_dict_prepost = {}
#         for att1 in tests:
#             X_name="Asians"
#             X = []
#             A_name=att1
#             A = []
#             B_name=att1
#             B = []
#             for category in dict_prepost[dates[i]].keys():
#                 if category in X_name:
#                     for word in dict_prepost[dates[i]][category].keys():   
#                         x = literal_eval(dict_prepost[dates[i]][category][word])
#                         x = np.array(x)
#                         X.append(x)
#             for attribute in dict_prepost[dates[i]].keys():
#                 if attribute in A_name:
#                     for word in dict_prepost[dates[i]][attribute].keys():
#                         a = literal_eval(dict_prepost[dates[i]][attribute][word])
#                         a = np.array(a)
#                         A.append(a)    
#             for attribute in dict_prepost[dates[i+1]].keys():
#                 if attribute in B_name:
#                     for word in dict_prepost[dates[i+1]][attribute].keys():
#                         b = literal_eval(dict_prepost[dates[i+1]][attribute][word])
#                         b = np.array(b)
#                         B.append(b)
#             result_dict = s_weat(X_name="Asians", X=X, A_name=att1, A=A, B_name=f"{att1}_post", B=B, 
#                              permt=2, perm_n=1000)
#             result_dict['dates'] = [dates[i], dates[i+1]]
#             result_dict_prepost[att1] = result_dict
            
#         results[dates[i]] = result_dict_prepost

# sweat_prepost_consecutive = pd.DataFrame.from_dict({(i,j): results[i][j]
#                                           for i in results.keys()
#                                           for j in results[i].keys()},
#                                          orient='index')
# sweat_prepost_consecutive = sweat_prepost_consecutive.reset_index()
# sweat_prepost_consecutive.columns = ['date', 'axis', 'category', 'attribute', 'difference', 'standard_dev', 'effect_size', 'Pleft', 'Pright', 'Ptot', 'dates']

# sweat_prepost_consecutive.to_csv('sweat_prepost_consecutive.csv', index = False)


### Full corpus

#### Compare opposing attributes

In [None]:
# np.random.seed(random.randint(1, 1000))
# results = {}

# tests = [("Warm", "Cold"), ("Competent", "Incompetent")]

# for date, data in dict_full.items():
#     print(date)
#     result_dict_full = {}
#     for att1, att2 in tests:
#         X_name="Asians"
#         X = []
#         A_name=att1
#         A = []
#         B_name=att2
#         B = []
#         for category in data.keys():
#             if category in X_name:
#                 for word in data[category].keys():   
#                     x = literal_eval(data[category][word])
#                     x = np.array(x)
#                     X.append(x)
#         for attribute in data.keys():
#             if attribute in A_name:
#                 for word in data[attribute].keys():
#                     a = literal_eval(data[attribute][word])
#                     a = np.array(a)
#                     A.append(a)
#         for attribute in data.keys():
#             if attribute in B_name:
#                 for word in data[attribute].keys():
#                     b = literal_eval(data[attribute][word])
#                     b = np.array(b)
#                     B.append(b)
#         result_dict = s_weat(X_name="Asians", X=X, A_name=att1, A=A, B_name=att2, B=B, 
#                            permt=2, perm_n=1000)
#         result_dict_full[f"{att1}_{att2}"] = result_dict
        
#     results[date] = result_dict_full

# sweat_df = pd.DataFrame.from_dict({(i,j): results[i][j]
#                                           for i in results.keys()
#                                           for j in results[i].keys()},
#                                          orient='index')
# sweat_df.head()
# sweat_df = sweat_df.reset_index()
# sweat_df.columns = ['date', 'axis', 'category', 'attribute', 'difference', 'standard_dev', 'effect_size', 'Pleft', 'Pright', 'Ptot']

# sweat_df.to_csv('sweat.csv', index = False)


#### Compare same attribute on consecutive days

In [None]:
# np.random.seed(random.randint(1, 1000))
# results = {}
# dates = []
# for date in dict_full.keys():
#     if date not in dates:
#         dates.append(date)
# dates.sort()

# tests = ["Warm", "Cold", "Competent", "Incompetent", "Foreign", "Diseased"]

# for i in np.arange(len(dates)-1):
#     if dates[i] not in results.keys():
#         print(dates[i])
#         print(dates[i+1])
#         result_dict_full = {}
#         for att1 in tests:
#             X_name="Asians"
#             X = []
#             A_name=att1
#             A = []
#             B_name=att1
#             B = []
#             for category in dict_full[dates[i]].keys():
#                 if category in X_name:
#                     for word in dict_full[dates[i]][category].keys():   
#                         x = literal_eval(dict_full[dates[i]][category][word])
#                         x = np.array(x)
#                         X.append(x)
#             for attribute in dict_full[dates[i]].keys():
#                 if attribute in A_name:
#                     for word in dict_full[dates[i]][attribute].keys():
#                         a = literal_eval(dict_full[dates[i]][attribute][word])
#                         a = np.array(a)
#                         A.append(a)    
#             for attribute in dict_full[dates[i+1]].keys():
#                 if attribute in B_name:
#                     for word in dict_full[dates[i+1]][attribute].keys():
#                         b = literal_eval(dict_full[dates[i+1]][attribute][word])
#                         b = np.array(b)
#                         B.append(b)
#             result_dict = s_weat(X_name="Asians", X=X, A_name=att1, A=A, B_name=f"{att1}_nextday", B=B, 
#                              permt=2, perm_n=1000)
#             result_dict['dates'] = [dates[i], dates[i+1]]
#             result_dict_full[att1] = result_dict
            
#         results[dates[i]] = result_dict_full

# sweat_consecutivedays = pd.DataFrame.from_dict({(i,j): results[i][j]
#                                           for i in results.keys()
#                                           for j in results[i].keys()},
#                                          orient='index')
# sweat_consecutivedays = sweat_consecutivedays.reset_index()
# sweat_consecutivedays.columns = ['date', 'axis', 'category', 'attribute', 'difference', 'standard_dev', 'effect_size', 'Pleft', 'Pright', 'Ptot', 'dates']

# sweat_consecutivedays.to_csv('sweat_consecutivedays.csv', index = False)


#### Compare same attribute on days of subsequent years

In [None]:
# np.random.seed(random.randint(1, 1000))
# results = {}
# dates = []
# for date in dict_full.keys():
#     if date not in dates:
#         dates.append(date)
# dates.sort()

# tests = ["Warm", "Cold", "Competent", "Incompetent", "Foreign", "Diseased"]

# for date in dates:
#     if date not in results.keys():
#         result_dict_full = {}
#         if date[:4] == '2019':
#             t2 = re.sub('2019-', '2020-', date)
#             if t2 in dict_full.keys():
#                 if t2 not in result_dict_full.keys():
#                     result_dict_full[t2] = {}
#                 for att1 in tests:
#                     if att1 not in result_dict_full[t2].keys():
#                         X_name="Asians"
#                         X = []
#                         A_name=att1
#                         A = []
#                         B_name=att1
#                         B = []
#                         for category in dict_full[date].keys():
#                             if category in X_name:
#                                 for word in dict_full[date][category].keys():   
#                                     x = literal_eval(dict_full[date][category][word])
#                                     x = np.array(x)
#                                     X.append(x)
#                         for attribute in dict_full[date].keys():
#                             if attribute in A_name:
#                                 for word in dict_full[date][attribute].keys():
#                                     a = literal_eval(dict_full[date][attribute][word])
#                                     a = np.array(a)
#                                     A.append(a)  
#                         for attribute in dict_full[t2].keys():
#                             if attribute in B_name:
#                                 for word in dict_full[t2][attribute].keys():
#                                     b = literal_eval(dict_full[t2][attribute][word])
#                                     b = np.array(b)
#                                     B.append(b)
#                         result_dict = s_weat(X_name="Asians", X=X, A_name=att1, A=A, B_name=f"{att1}_nextday", B=B, 
#                                          permt=2, perm_n=1000)
#                         result_dict['dates'] = [date, t2]
#                         result_dict_full[t2][att1] = result_dict
            
#             t3 = re.sub('2019-', '2021-', date)
#             if t3 in dict_full.keys():
#                 if t3 not in result_dict_full.keys():
#                     result_dict_full[t3] = {}
#                 for att1 in tests:
#                     if att1 not in result_dict_full[t3].keys():
#                         X_name="Asians"
#                         X = []
#                         A_name=att1
#                         A = []
#                         B_name=att1
#                         B = []
#                         for category in dict_full[date].keys():
#                             if category in X_name:
#                                 for word in dict_full[date][category].keys():   
#                                     x = literal_eval(dict_full[date][category][word])
#                                     x = np.array(x)
#                                     X.append(x)
#                         for attribute in dict_full[date].keys():
#                             if attribute in A_name:
#                                 for word in dict_full[date][attribute].keys():
#                                     a = literal_eval(dict_full[date][attribute][word])
#                                     a = np.array(a)
#                                     A.append(a)  
#                         for attribute in dict_full[t3].keys():
#                             if attribute in B_name:
#                                 for word in dict_full[t3][attribute].keys():
#                                     b = literal_eval(dict_full[t3][attribute][word])
#                                     b = np.array(b)
#                                     B.append(b)
#                         result_dict = s_weat(X_name="Asians", X=X, A_name=att1, A=A, B_name=f"{att1}_nextday", B=B, 
#                                          permt=2, perm_n=1000)
#                         result_dict['dates'] = [date, t3]
#                         result_dict_full[t3][att1] = result_dict
#         elif date[:4] == '2020':
#             t2 = False
#             t3 = re.sub('2020-', '2021-', date)
#             if t3 in dict_full.keys():
#                 if t3 not in result_dict_full.keys():
#                     result_dict_full[t3] = {}
#                 for att1 in tests:
#                     if att1 not in result_dict_full[t3].keys():
#                         X_name="Asians"
#                         X = []
#                         A_name=att1
#                         A = []
#                         B_name=att1
#                         B = []
#                         for category in dict_full[date].keys():
#                             if category in X_name:
#                                 for word in dict_full[date][category].keys():   
#                                     x = literal_eval(dict_full[date][category][word])
#                                     x = np.array(x)
#                                     X.append(x)
#                         for attribute in dict_full[date].keys():
#                             if attribute in A_name:
#                                 for word in dict_full[date][attribute].keys():
#                                     a = literal_eval(dict_full[date][attribute][word])
#                                     a = np.array(a)
#                                     A.append(a)  

#                         for attribute in dict_full[t3].keys():
#                             if attribute in B_name:
#                                 for word in dict_full[t3][attribute].keys():
#                                     b = literal_eval(dict_full[t3][attribute][word])
#                                     b = np.array(b)
#                                     B.append(b)
#                         result_dict = s_weat(X_name="Asians", X=X, A_name=att1, A=A, B_name=f"{att1}_nextyear", B=B, 
#                                          permt=2, perm_n=1000)
#                         result_dict['dates'] = [date, t3]
#                         result_dict_full[t3][att1] = result_dict
#             else:
#                 pass
            
#             results[date] = result_dict_full

# sweat_consecutiveyears = pd.DataFrame.from_dict({(i,j,k): results[i][j][k]
#                                                  for i in results.keys()
#                                                  for j in results[i].keys()
#                                                  for k in results[i][j].keys()},
#                                                 orient='index')
# sweat_consecutiveyears = sweat_consecutiveyears.reset_index()
# sweat_consecutiveyears.columns = ['date', 'comparison_date', 'axis', 'category', 'attribute', 'difference', 'standard_dev', 'effect_size', 'Pleft', 'Pright', 'Ptot', 'dates']

# sweat_consecutiveyears.to_csv('sweat_consecutiveyears.csv', index = False)


### Asian corpus

#### Compare opposing attributes

In [None]:
# np.random.seed(random.randint(1, 1000))
# results = {}

# tests = [("Warm", "Cold"), ("Competent", "Incompetent")]

# for date, data in dict_asian.items():
#     print(date)
#     result_dict_full = {}
#     for att1, att2 in tests:
#         X_name="Asians"
#         X = []
#         A_name=att1
#         A = []
#         B_name=att2
#         B = []
#         for category in data.keys():
#             if category in X_name:
#                 for word in data[category].keys():   
#                     x = literal_eval(data[category][word])
#                     x = np.array(x)
#                     X.append(x)
#         for attribute in data.keys():
#             if attribute in A_name:
#                 for word in data[attribute].keys():
#                     a = literal_eval(data[attribute][word])
#                     a = np.array(a)
#                     A.append(a)
#         for attribute in data.keys():
#             if attribute in B_name:
#                 for word in data[attribute].keys():
#                     b = literal_eval(data[attribute][word])
#                     b = np.array(b)
#                     B.append(b)
#         result_dict = s_weat(X_name="Asians", X=X, A_name=att1, A=A, B_name=att2, B=B, 
#                            permt=2, perm_n=1000)
#         result_dict_full[f"{att1}_{att2}"] = result_dict
        
#     results[date] = result_dict_full

# with open("s_weat_asian.json", "w") as outfile:
#     json.dump(results, outfile)

# sweat_asian = pd.DataFrame.from_dict({(i,j): results[i][j]
#                                           for i in results.keys()
#                                           for j in results[i].keys()},
#                                          orient='index')
# sweat_asian = sweat_asian.reset_index()
# sweat_asian.columns = ['date', 'axis', 'category', 'attribute', 'difference', 'standard_dev', 'effect_size', 'Pleft', 'Pright', 'Ptot']

# sweat_asian.to_csv('sweat_asian.csv', index = False)


#### Compare same attribute on consecutive days

In [None]:
# np.random.seed(random.randint(1, 1000))
# results = {}
# dates = []
# for date in dict_asian.keys():
#     if date not in dates:
#         dates.append(date)
# dates.sort()

# tests = ["Warm", "Cold", "Competent", "Incompetent", "Foreign", "Diseased"]

# for i in np.arange(len(dates)-1):
#     if dates[i] not in results.keys():
#         print(dates[i])
#         print(dates[i+1])
#         result_dict_full = {}
#         for att1 in tests:
#             X_name="Asians"
#             X = []
#             A_name=att1
#             A = []
#             B_name=att1
#             B = []
#             for category in dict_asian[dates[i]].keys():
#                 if category in X_name:
#                     for word in dict_asian[dates[i]][category].keys():   
#                         x = literal_eval(dict_asian[dates[i]][category][word])
#                         x = np.array(x)
#                         X.append(x)
#             for attribute in dict_asian[dates[i]].keys():
#                 if attribute in A_name:
#                     for word in dict_asian[dates[i]][attribute].keys():
#                         a = literal_eval(dict_asian[dates[i]][attribute][word])
#                         a = np.array(a)
#                         A.append(a)    
#             for attribute in dict_asian[dates[i+1]].keys():
#                 if attribute in B_name:
#                     for word in dict_asian[dates[i+1]][attribute].keys():
#                         b = literal_eval(dict_asian[dates[i+1]][attribute][word])
#                         b = np.array(b)
#                         B.append(b)
#             result_dict = s_weat(X_name="Asians", X=X, A_name=att1, A=A, B_name=f"{att1}_nextday", B=B, 
#                              permt=2, perm_n=1000)
#             result_dict['dates'] = [dates[i], dates[i+1]]
#             result_dict_full[att1] = result_dict
            
#         results[dates[i]] = result_dict_full

# sweat_asian_consecutivedays = pd.DataFrame.from_dict({(i,j): results[i][j]
#                                           for i in results.keys()
#                                           for j in results[i].keys()},
#                                          orient='index')
# sweat_asian_consecutivedays = sweat_asian_consecutivedays.reset_index()
# sweat_asian_consecutivedays.columns = ['date', 'axis', 'category', 'attribute', 'difference', 'standard_dev', 'effect_size', 'Pleft', 'Pright', 'Ptot', 'dates']

# sweat_asian_consecutivedays.to_csv('sweat_asian_consecutivedays.csv', index = False)


#### Compare same attribute on days of subsequent years

In [None]:
# np.random.seed(random.randint(1, 1000))
# results = {}
# dates = []
# for date in dict_asian.keys():
#     if date not in dates:
#         dates.append(date)
# dates.sort()

# tests = ["Warm", "Cold", "Competent", "Incompetent", "Foreign", "Diseased"]

# for date in dates:
#     if date not in results.keys():
#         result_dict_full = {}
#         if date[:4] == '2019':
#             t2 = re.sub('2019-', '2020-', date)
#             if t2 in dict_asian.keys():
#                 for att1 in tests:
#                     X_name="Asians"
#                     X = []
#                     A_name=att1
#                     A = []
#                     B_name=att1
#                     B = []
#                     for category in dict_asian[date].keys():
#                         if category in X_name:
#                             for word in dict_asian[date][category].keys():   
#                                 x = literal_eval(dict_asian[date][category][word])
#                                 x = np.array(x)
#                                 X.append(x)
#                     for attribute in dict_asian[date].keys():
#                         if attribute in A_name:
#                             for word in dict_asian[date][attribute].keys():
#                                 a = literal_eval(dict_asian[date][attribute][word])
#                                 a = np.array(a)
#                                 A.append(a)  
#                     for attribute in dict_asian[t2].keys():
#                         if attribute in B_name:
#                             for word in dict_asian[t2][attribute].keys():
#                                 b = literal_eval(dict_asian[t2][attribute][word])
#                                 b = np.array(b)
#                                 B.append(b)
#                     result_dict = s_weat(X_name="Asians", X=X, A_name=att1, A=A, B_name=f"{att1}_nextday", B=B, 
#                                      permt=2, perm_n=1000)
#                     result_dict['dates'] = [date, t2]
#                     result_dict_full[t2] = {}
#                     result_dict_full[t2][att1] = result_dict
#             t3 = re.sub('2019-', '2021-', date)
#             if t3 in dict_asian.keys():
#                 for att1 in tests:
#                     X_name="Asians"
#                     X = []
#                     A_name=att1
#                     A = []
#                     B_name=att1
#                     B = []
#                     for category in dict_asian[date].keys():
#                         if category in X_name:
#                             for word in dict_asian[date][category].keys():   
#                                 x = literal_eval(dict_asian[date][category][word])
#                                 x = np.array(x)
#                                 X.append(x)
#                     for attribute in dict_asian[date].keys():
#                         if attribute in A_name:
#                             for word in dict_asian[date][attribute].keys():
#                                 a = literal_eval(dict_asian[date][attribute][word])
#                                 a = np.array(a)
#                                 A.append(a)  

#                     for attribute in dict_asian[t3].keys():
#                         if attribute in B_name:
#                             for word in dict_asian[t3][attribute].keys():
#                                 b = literal_eval(dict_asian[t3][attribute][word])
#                                 b = np.array(b)
#                                 B.append(b)
#                     result_dict = s_weat(X_name="Asians", X=X, A_name=att1, A=A, B_name=f"{att1}_nextday", B=B, 
#                                      permt=2, perm_n=1000)
#                     result_dict['dates'] = [date, t3]
#                     result_dict_full[t3] = {}
#                     result_dict_full[t3][att1] = result_dict
#         elif date[:4] == '2020':
#             t2 = False
#             t3 = re.sub('2020-', '2021-', date)
#             if t3 in dict_asian.keys():
#                 for att1 in tests:
#                     X_name="Asians"
#                     X = []
#                     A_name=att1
#                     A = []
#                     B_name=att1
#                     B = []
#                     for category in dict_asian[date].keys():
#                         if category in X_name:
#                             for word in dict_asian[date][category].keys():   
#                                 x = literal_eval(dict_asian[date][category][word])
#                                 x = np.array(x)
#                                 X.append(x)
#                     for attribute in dict_asian[date].keys():
#                         if attribute in A_name:
#                             for word in dict_asian[date][attribute].keys():
#                                 a = literal_eval(dict_asian[date][attribute][word])
#                                 a = np.array(a)
#                                 A.append(a)  

#                     for attribute in dict_asian[t3].keys():
#                         if attribute in B_name:
#                             for word in dict_asian[t3][attribute].keys():
#                                 b = literal_eval(dict_asian[t3][attribute][word])
#                                 b = np.array(b)
#                                 B.append(b)
#                     result_dict = s_weat(X_name="Asians", X=X, A_name=att1, A=A, B_name=f"{att1}_nextyear", B=B, 
#                                      permt=2, perm_n=1000)
#                     result_dict['dates'] = [date, t3]
#                     result_dict_full[t3] = {}
#                     result_dict_full[t3][att1] = result_dict
#             else:
#                 pass
            
#             results[date] = result_dict_full

# sweat_asian_consecutiveyears = pd.DataFrame.from_dict({(i,j): results[i][j]
#                                           for i in results.keys()
#                                           for j in results[i].keys()},
#                                          orient='index')
# sweat_asian_consecutiveyears = sweat_asian_consecutiveyears.reset_index()
# sweat_asian_consecutiveyears.columns = ['date', 'axis', 'category', 'attribute', 'difference', 'standard_dev', 'effect_size', 'Pleft', 'Pright', 'Ptot', 'dates']

# sweat_asian_consecutiveyears.to_csv('sweat_asian_consecutiveyears.csv', index = False)


# Merge with NYT df

In [None]:
# df_nyt = pd.read_csv('df_nyt.csv')
# df_nyt = df_nyt.sort_values(by=['date'], ignore_index=True)
# df_asian = pd.read_csv('s_weat_asian.csv')
# df_asian = df_asian.sort_values(by=['date'], ignore_index=True)

In [None]:
# df = pd.merge(attribute_weat_asian_df, df_nyt, on="date")
# df.to_csv('attribute_weat_asian_df.csv', index = False)