In [None]:
import json
import csv
import re
import random
import ast
from ast import literal_eval
from itertools import combinations
from collections import OrderedDict

import pandas as pd
import numpy as np

from scipy import spatial
from math import sqrt

"""
Test: Take out 20 randomly (same length in each bucket), 
"""

# WEAT

## Define functions

In [None]:
def weighted_std(values, weights):
    # values = numpy ndarray with the same shape as weights
    # weights = numpy ndarray with the same shape as values
    average = np.average(values, weights=weights)
    variance = np.average((values-average)**2, weights=weights)
    # Small sample size bias correction:
    variance_ddof1 = variance*len(values)/(len(values)-1)
    return sqrt(variance_ddof1)

def within_group_cohesion(X):
    # X = 2D numpy array of vectors for all words belonging to group X
    dist = spatial.distance.pdist(X, 'cosine')
    return dist.mean()

def sim(x, A, B):
    # x = ndarray for each word x in category X
    # A = ndarray for words in attribute A
    # B = ndarray for words in attribute B
    x_ = x.reshape(1, -1)
    results_A = spatial.distance.cdist(x_, A, 'cosine')
    sum_A = (1 - results_A).sum()
    results_B = spatial.distance.cdist(x_, B, 'cosine')
    sum_B = (1 - results_B).sum()
    difference = sum_A/len(A) - sum_B/len(B)
    return difference

def group_cohesion_test(X, Y, perm_n = 1000, permtype = 1):
    # X = 2D numpy array of vectors for all words belonging to group X
    # Y = 2D numpy array of vectors for all words belonging to group Y
    # perm_n = number of permutations
    # permtype = permutation type.
    test_statistic = np.average((within_group_cohesion(X), within_group_cohesion(Y)), weights = (len(X), len(Y)))
    jointlist = np.concatenate((X,Y))
    permutations = np.array([])
    if permtype == 1:
        count = 0
        cutpoint = len(X)
        while count < perm_n:
            np.random.shuffle(jointlist)
            set1 = jointlist[:cutpoint]
            set2 = jointlist[cutpoint:]
            permutations = np.append(permutations, 
                                     np.average([within_group_cohesion(set1), within_group_cohesion(set2)], 
                                                weights = [len(set1), len(set2)]))
            count += 1
    else:
        nums = list(range(len(jointlist)))
        for comb in combinations(nums, len(X)):
            set1 = [item for i, item in enumerate(jointlist) if i in comb]
            set2 = [item for i, item in enumerate(jointlist) if i not in comb]
            permutations = np.append(permutations, 
                                     np.average([within_group_cohesion(set1), within_group_cohesion(set2)], 
                                                weights = [len(set1), len(set2)]))
    P_val = (sum(i <= test_statistic for i in permutations)+1)/(len(permutations)+1)
    return P_val

def diff_sim(X, A, B, effect=1, Y=False):
    # X = ndarray for words in category X
    # A = ndarray for words in attribute A
    # B = ndarray for words in attribute B
    # effect = boolean for whether standard deviation & effect size need to be calculated
    # Y = optional. ndarray for words in category Y
    if Y:
        sum_X = 0
        sum_Y = 0
        for x in X:
            x = np.array(x)
            sum_X += sim(x, A, B)
        for y in Y:
            y = np.array(y)
            sum_Y += sim(y, A, B)
        difference = sum_X/len(X) - sum_Y/len(Y)
        all_sims = []
        for w in (np.concatenate((X,Y))):
            all_sims.append(sim(w, A, B))
        # For SD calculation, assign weights based on frequency of opposite category
        weights = [len(Y) for num in range(len(X))] + [len(X) for num in range(len(Y))]
        standard_dev = weighted_std(all_sims, weights)
        if standard_dev == 0:
            effect_size = 0
        else:
            effect_size = difference/standard_dev
    else:
        sum_A = 0
        sum_B = 0
        all_sims = []
        for a in A:
            a_ = a.reshape(1, -1)
            results = spatial.distance.cdist(a_, X, 'cosine')
            sum_X = (1 - results).sum()
            val = sum_X/len(X)
            sum_A += val
            all_sims.append(val)
        ave_A = sum_A/len(A)
        for b in B:
            b_ = b.reshape(1, -1)
            results = spatial.distance.cdist(b_, X, 'cosine')
            sum_X = (1 - results).sum()
            val = sum_X/len(X)
            sum_B += val
            all_sims.append(val)
        ave_B = sum_B/len(B)
        difference = ave_A - ave_B
        standard_dev = np.std(all_sims, ddof=1)
        if standard_dev == 0:
            effect_size = 0
        else:
            effect_size = difference/standard_dev
    if effect == 1:
        return difference, standard_dev, effect_size
    else:
        return difference    

def permutation_test(X, A, B, Y=False):
    # X = ndarray for words in category X
    # Y = ndarray for words in category Y
    # A = ndarray for words in attribute A
    # B = ndarray for words in attribute B
    if Y:
        jointlist = np.array(list(X) + list(Y))
        permutations = []
        nums = list(range(len(jointlist)))
        for comb in combinations(nums, len(X)):
            set1 = [item for i, item in enumerate(jointlist) if i in comb]
            set2 = [item for i, item in enumerate(jointlist) if i not in comb]
            permutations.append(diff_sim(set1, set2, A, B))
    else:
        jointlist = np.array(list(A) + list(B))
        permutations = []
        nums = list(range(len(jointlist)))
        for comb in combinations(nums, len(A)):
            set1 = [item for i, item in enumerate(jointlist) if i in comb]
            set2 = [item for i, item in enumerate(jointlist) if i not in comb]
            permutations.append(diff_sim(X, set1, set2, effect=0))
    return permutations

def rand_test(X, A, B, perm_n, Y=False):
    # X = ndarray for words in category X
    # Y = ndarray for words in category Y
    # A = ndarray for words in attribute A
    # B = ndarray for words in attribute B
    # perm_n = number of permutations
    if Y:
        jointlist = np.array(list(X) + list(Y))
        np.random.shuffle(jointlist)
        permutations = []
        count = 0
        cutpoint = len(X)
        while count < perm_n:
            np.random.shuffle(jointlist)
            set1 = jointlist[:cutpoint]
            set2 = jointlist[cutpoint:]
            permutations.append(diff_sim(set1, set2, A, B))
            count += 1
    else:
        jointlist = np.array(list(A) + list(B))
        np.random.shuffle(jointlist)
        permutations = []
        count = 0
        cutpoint = len(A)
        while count < perm_n:
            np.random.shuffle(jointlist)
            set1 = jointlist[:cutpoint]
            set2 = jointlist[cutpoint:]
            permutations.append(diff_sim(X, set1, set2, effect=0))
            count += 1
    return permutations

### DOUBLE CATEGORY WEAT

def weat(X_name, X, Y_name, Y, A_name, A, B_name, B, 
         permt=0, perm_n=10000, cohesion_test=False, cohesion_permutations=1000, cohesion_type=2):
    # X_name = name of category 1. Will be used in result output.
    # X = category 1. Input should be iterable and contain numpy array(s) for words in category 1
    # Y_name = name of category 2. Will be used in result output.
    # Y = category 2. Input should be iterable and contain numpy array(s) for words in category 2
    # A_name = name of attribute 1. Will be used in result output.
    # A = attribute 1. Input should be iterable and contain numpy array(s) for words in attribute 1
    # B_name = name of attribute 2. Will be used in result output.
    # B = attribute 1. Input should be iterable and contain numpy array(s) for words in attribute 2
    # permt = do you want to perform a permutation test? 0 = no, 1 = yes, 2 = yes, with the perm_n specified
    # perm_n = number of permutations
    # cohesion_test = boolean for testing within-category cohesion
    # cohesion_permutations = number of permutations for cohesion test
    # cohesion_type = type of cohesion test. 1 = test cohesion of only one group, 2 = test cohesion of both groups
    
    # Calculate effect size
    difference, standard_dev, effect_size = diff_sim(X=X, Y=Y, A=A, B=B, effect=1)
    
    result_dict = OrderedDict({"categories": [X_name, Y_name],
                               "attributes": [A_name, B_name],
                               "difference": difference,
                               "standard_dev": standard_dev,
                               "effect_size": effect_size})
    
    # Permutations if permt is not 0
    if permt == 1 or permt == 2:
        if permt == 1:
            permutations = np.array(permutation_test(X=X, Y=Y, A=A, B=B))
        elif permt == 2:
            permutations = np.array(rand_test(X=X, Y=Y, A=A, B=B, perm_n=perm_n))
        perm_mean = np.mean(permutations)
        permutations = permutations - perm_mean
        sum_c = effect_size - perm_mean
        Pleft = (sum(i <= sum_c for i in permutations)+1)/(len(permutations)+1)
        Pright = (sum(i >= sum_c for i in permutations)+1)/(len(permutations)+1)
        Ptot = (sum(abs(i) >= abs(sum_c) for i in permutations)+1)/(len(permutations)+1)
        se = np.std(permutations)
        result_dict["Pleft"] = Pleft
        result_dict["Pright"] = Pright
        result_dict["Ptot"] = Ptot
        result_dict["se"] = se
    
    # Cohesion test if cohesion_test is true
    if cohesion_test == True:
        cohesion_categories = group_cohesion_test(X=X, Y=Y, perm_n=cohesion_permutations, permtype=cohesion_type)
        cohesion_attributes = group_cohesion_test(X=A, Y=B, perm_n=cohesion_permutations, permtype=cohesion_type)
        result_dict["cohesion_categories"] = cohesion_categories
        result_dict["cohesion_attributes"] = cohesion_attributes
        
    return result_dict    

### SINGLE CATEGORY WEAT

def s_weat(X_name, X, A_name, A, B_name, B, permt = 0, perm_n = 10000):
    # X_name = name of category 1. Will be used in result output.
    # X = category 1. Input should be iterable and contain numpy array(s) for words in category 1
    # A_name = name of attribute 1. Will be used in result output.
    # A = attribute 1. Input should be iterable and contain numpy array(s) for words in attribute 1
    # B_name = name of attribute 2. Will be used in result output.
    # B = attribute 1. Input should be iterable and contain numpy array(s) for words in attribute 2
    # permt = do you want to perform a permutation test? 0 = no, 1 = yes, 2 = yes, with the perm_n specified
    # perm_n = number of permutations

    difference, standard_dev, effect_size = diff_sim(X=X, A=A, B=B)
    
    result_dict = OrderedDict({"category": [X_name],
                               "attributes": [A_name, B_name],
                               "difference": difference,
                               "standard_dev": standard_dev,
                               "effect_size": effect_size})
    if permt == 1 or permt == 2:
        if permt == 1:
            permutations = np.array(permutation_test(X, A, B))
        elif permt == 2:
            permutations = np.array(rand_test(X, A, B, perm_n = perm_n))
        perm_mean = np.mean(permutations)
        permutations = permutations - perm_mean
        sum_c = difference - perm_mean
        Pleft = (sum(i <= sum_c for i in permutations)+1)/(len(permutations)+1)
        Pright = (sum(i >= sum_c for i in permutations)+1)/(len(permutations)+1)
        Ptot = (sum(abs(i) >= abs(sum_c) for i in permutations)+1)/(len(permutations)+1)
        result_dict["Pleft"] = Pleft
        result_dict["Pright"] = Pright
        result_dict["Ptot"] = Ptot
    return result_dict


# Load the dataframes 

In [None]:
# df_asian = pd.read_csv('embeddings_asian.csv')
# dict_asian = df_asian.groupby('day').apply(lambda a: dict(a.groupby('category').apply(lambda x: dict(zip(x['word'], x['vectors'])))))
# dict_asian = dict_asian.to_dict()

# df_full = pd.read_csv('embeddings_full.csv')
# df_full = df_full.sort_values(by="day")
# dict_full = df_full.groupby('day').apply(lambda a: dict(a.groupby('category').apply(lambda x: dict(zip(x['word'], x['vectors'])))))
# dict_full = dict_full.to_dict()

df_prepost = pd.read_csv('embeddings_prepost.csv')
df_prepost = df_prepost.sort_values(by="day")
dict_prepost = df_prepost.groupby('day').apply(lambda a: dict(a.groupby('category').apply(lambda x: dict(zip(x['word'], x['vectors'])))))
dict_prepost = dict_prepost.to_dict()


## Data quality checks

In [None]:
# charlesworth_df = pd.read_csv("../../Charlesworth/Study 3/Kurdi, Mann, Charlesworth, & Banaji (2018) Vectors.csv")
# charlesworth_df.head()
charlesworth_df.category.unique()

In [None]:
groups = ['Asians', 'Whites']
X = []
X_raw = charlesworth_df[charlesworth_df.category == "Asians"].vector.tolist()
for array in X_raw: 
    X.append(np.array(literal_eval(array)))
Y = []
Y_raw = charlesworth_df[charlesworth_df.category == "Whites"].vector.tolist()
for array in Y_raw:
    Y.append(np.array(literal_eval(array)))
A = []
A_raw = charlesworth_df[charlesworth_df.category == "Warm"].vector.tolist()
for array in A_raw:
    A.append(np.array(literal_eval(array)))
B = []
B_raw = charlesworth_df[charlesworth_df.category == "Cold"].vector.tolist()
for array in B_raw:
    B.append(np.array(literal_eval(array)))
    
weat(X_name="Asians", X=X, 
     Y_name="Whites", Y=Y, 
     A_name="Warm", A=A, 
     B_name="Cold", B=B, 
     permt=2, perm_n=1000, cohesion_test=False, cohesion_permutations=100, cohesion_type=0)



In [None]:
groups = ['Asians', 'Whites']
X = []
X_raw = charlesworth_df[charlesworth_df.category == "Asians"].vector.tolist()
for array in X_raw: 
    X.append(np.array(literal_eval(array)))
Y = []
Y_raw = charlesworth_df[charlesworth_df.category == "Whites"].vector.tolist()
for array in Y_raw:
    Y.append(np.array(literal_eval(array)))
A = []
A_raw = charlesworth_df[charlesworth_df.category == "Competence"].vector.tolist()
for array in A_raw:
    A.append(np.array(literal_eval(array)))
B = []
B_raw = charlesworth_df[charlesworth_df.category == "Incompetence"].vector.tolist()
for array in B_raw:
    B.append(np.array(literal_eval(array)))
    
weat(X_name="Asians", X=X, 
     Y_name="Whites", Y=Y, 
     A_name="Competence", A=A, 
     B_name="Incompetence", B=B, 
     permt=2, perm_n=1000, cohesion_test=False, cohesion_permutations=100, cohesion_type=0)



In [None]:
np.random.seed(random.randint(1, 1000))
results = {}
dates = []
for date in dict_full.keys():
    if date not in dates:
        dates.append(date)
dates.sort()
tests = [("Warm", "Cold"),("Competent", "Incompetent")]

for i in np.arange(len(dates)-1):
    result_dict_full = {}
    for att1, att2 in tests:
        X_name="Asians"
        X = []
        Y_name="Whites"
        Y = []
        A_name=att1
        A = []
        B_name=att2
        B = []
        for category in dict_full[dates[i]].keys():
            if category in X_name:
                for word in dict_full[dates[i]][category].keys():   
                    x = literal_eval(dict_full[dates[i]][category][word])
                    x = np.array(x)
                    X.append(x)
        for category in dict_full[dates[i]].keys():
            if category in Y_name:
                for word in dict_full[dates[i]][category].keys():
                    y = literal_eval(dict_full[dates[i]][category][word])
                    y = np.array(y)
                    Y.append(y)
        for attribute in dict_full[dates[i]].keys():
            if attribute in A_name:
                for word in dict_full[dates[i]][attribute].keys():
                    a = literal_eval(dict_full[dates[i]][attribute][word])
                    a = np.array(a)
                    A.append(a)    
        for attribute in dict_full[dates[i]].keys():
            if attribute in B_name:
                for word in dict_full[dates[i]][attribute].keys():
                    b = literal_eval(dict_full[dates[i]][attribute][word])
                    b = np.array(b)
                    B.append(b)
        result_dict = weat(X_name="Asians", X=X, Y_name="Whites", Y=Y, A_name=att1, A=A, B_name=att2, B=B, 
                           permt=2, perm_n=1000, cohesion_test=False, cohesion_permutations=100, cohesion_type=0)
        result_dict['date'] = dates[i]
        result_dict_full[f"{att1}_{att2}"] = result_dict
    
    if dates[i] not in results.keys():
        results[dates[i]] = result_dict_full

weat_df = pd.DataFrame.from_dict({(i,j): results[i][j]
                                  for i in results.keys()
                                  for j in results[i].keys()},
                                 orient='index')
weat_df = weat_df.reset_index()
weat_df.columns = ['date', 'axis', 'category', 'attribute', 'difference', 'standard_dev', 'effect_size', 'Pleft', 'Pright', 'Ptot', 'se', 'date']
weat_df.head()



## Double category WEAT

## Pre Post corpus

In [None]:
np.random.seed(random.randint(1, 1000))
results = {}
dates = []
for date in dict_prepost.keys():
    if date not in dates:
        dates.append(date)
dates.sort()
tests = [("Warm", "Cold"),("Competent", "Incompetent")]

for i in np.arange(len(dates)-1):
    print(dates[i])
    print(dates[i+1])
    result_dict_prepost = {}
    for att1, att2 in tests:
        X_name="Asians"
        X = []
        Y_name="Asians"
        Y = []
        A_name=att1
        A = []
        B_name=att2
        B = []
        for category in dict_prepost[dates[i]].keys():
            if category in X_name:
                for word in dict_prepost[dates[i]][category].keys():   
                    x = literal_eval(dict_prepost[dates[i]][category][word])
                    x = np.array(x)
                    X.append(x)
        for category in dict_prepost[dates[i+1]].keys():
            if category in Y_name:
                for word in dict_prepost[dates[i+1]][category].keys():
                    y = literal_eval(dict_prepost[dates[i+1]][category][word])
                    y = np.array(y)
                    Y.append(y)
        for attribute in dict_prepost[dates[i]].keys():
            if attribute in A_name:
                for word in dict_prepost[dates[i]][attribute].keys():
                    a = literal_eval(dict_prepost[dates[i]][attribute][word])
                    a = np.array(a)
                    A.append(a)    
        for attribute in dict_prepost[dates[i]].keys():
            if attribute in B_name:
                for word in dict_prepost[dates[i]][attribute].keys():
                    b = literal_eval(dict_prepost[dates[i]][attribute][word])
                    b = np.array(b)
                    B.append(b)
        result_dict = weat(X_name="Asians_pre", X=X, Y_name="Asian_post", Y=Y, A_name=att1, A=A, B_name=att2, B=B, 
                           permt=2, perm_n=1000, cohesion_test=False, cohesion_permutations=100, cohesion_type=0)
        result_dict['dates'] = [dates[i], dates[i+1]]
        result_dict_prepost[f"{att1}_{att2}"] = result_dict
    
    if dates[i] not in results.keys():
        results[dates[i]] = result_dict_prepost

weat_df = pd.DataFrame.from_dict({(i,j): results[i][j]
                                  for i in results.keys()
                                  for j in results[i].keys()},
                                 orient='index')
weat_df = weat_df.reset_index()
weat_df.columns = ['date', 'axis', 'category', 'attribute', 'difference', 'standard_dev', 'effect_size', 'Pleft', 'Pright', 'Ptot', 'se', 'dates']
weat_df.head()

# weat_df.to_csv('weat_prepost.csv', index = False)


## Full corpus

In [None]:
# np.random.seed(random.randint(1, 1000))
# results = {}
# dates = []
# for date in dict_full.keys():
#     if date not in dates:
#         dates.append(date)
# dates.sort()
# tests = [("Warm", "Cold"),("Competent", "Incompetent")]

# for i in np.arange(len(dates)-1):
#     print(dates[i])
#     print(dates[i+1])
#     result_dict_full = {}
#     for att1, att2 in tests:
#         X_name="Asians"
#         X = []
#         Y_name="Asians"
#         Y = []
#         A_name=att1
#         A = []
#         B_name=att2
#         B = []
#         for category in dict_full[dates[i]].keys():
#             if category in X_name:
#                 for word in dict_full[dates[i]][category].keys():   
#                     x = literal_eval(dict_full[dates[i]][category][word])
#                     x = np.array(x)
#                     X.append(x)
#         for category in dict_full[dates[i+1]].keys():
#             if category in Y_name:
#                 for word in dict_full[dates[i+1]][category].keys():
#                     y = literal_eval(dict_full[dates[i+1]][category][word])
#                     y = np.array(y)
#                     Y.append(y)
#         for attribute in dict_full[dates[i]].keys():
#             if attribute in A_name:
#                 for word in dict_full[dates[i]][attribute].keys():
#                     a = literal_eval(dict_full[dates[i]][attribute][word])
#                     a = np.array(a)
#                     A.append(a)    
#         for attribute in dict_full[dates[i]].keys():
#             if attribute in B_name:
#                 for word in dict_full[dates[i]][attribute].keys():
#                     b = literal_eval(dict_full[dates[i]][attribute][word])
#                     b = np.array(b)
#                     B.append(b)
#         result_dict = weat(X_name="Asians", X=X, Y_name="Asian_nextday", Y=Y, A_name=att1, A=A, B_name=att2, B=B, 
#                            permt=2, perm_n=1000, cohesion_test=False, cohesion_permutations=100, cohesion_type=0)
#         result_dict['dates'] = [dates[i], dates[i+1]]
#         result_dict_full[f"{att1}_{att2}"] = result_dict
    
#     if dates[i] not in results.keys():
#         results[dates[i]] = result_dict_full

# weat_df = pd.DataFrame.from_dict({(i,j): results[i][j]
#                                   for i in results.keys()
#                                   for j in results[i].keys()},
#                                  orient='index')
# weat_df = weat_df.reset_index()
# weat_df.columns = ['date', 'axis', 'category', 'attribute', 'difference', 'standard_dev', 'effect_size', 'Pleft', 'Pright', 'Ptot', 'se', 'dates']
# weat_df.head()
# weat_df.to_csv('weat_consecutivedays.csv', index = False)



In [None]:
# np.random.seed(random.randint(1, 1000))
# results = {}
# dates = []
# for date in dict_full.keys():
#     if date not in dates:
#         dates.append(date)
# dates.sort()
# tests = [("Warm", "Cold"),("Competent", "Incompetent")]

# for i in np.arange(len(dates)-1):
#     print(dates[i])
#     print(dates[i+1])
#     result_dict_full = {}
#     for att1, att2 in tests:
#         X_name="Asians"
#         X = []
#         Y_name="Asians"
#         Y = []
#         A_name=att1
#         A = []
#         B_name=att2
#         B = []
#         for category in dict_full[dates[i]].keys():
#             if category in X_name:
#                 for word in dict_full[dates[i]][category].keys():   
#                     x = literal_eval(dict_full[dates[i]][category][word])
#                     x = np.array(x)
#                     X.append(x)
#         for category in dict_full[dates[i+1]].keys():
#             if category in Y_name:
#                 for word in dict_full[dates[i+1]][category].keys():
#                     y = literal_eval(dict_full[dates[i+1]][category][word])
#                     y = np.array(y)
#                     Y.append(y)
#         for attribute in dict_full[dates[i]].keys():
#             if attribute in A_name:
#                 for word in dict_full[dates[i]][attribute].keys():
#                     a = literal_eval(dict_full[dates[i]][attribute][word])
#                     a = np.array(a)
#                     A.append(a)    
#         for attribute in dict_full[dates[i]].keys():
#             if attribute in B_name:
#                 for word in dict_full[dates[i]][attribute].keys():
#                     b = literal_eval(dict_full[dates[i]][attribute][word])
#                     b = np.array(b)
#                     B.append(b)
#         result_dict = weat(X_name="Asians", X=X, Y_name="Asian_nextday", Y=Y, A_name=att1, A=A, B_name=att2, B=B, 
#                            permt=2, perm_n=1000, cohesion_test=False, cohesion_permutations=100, cohesion_type=0)
#         result_dict['dates'] = [dates[i], dates[i+1]]
#         result_dict_full[f"{att1}_{att2}"] = result_dict
    
#     if dates[i] not in results.keys():
#         results[dates[i]] = result_dict_full

# weat_df = pd.DataFrame.from_dict({(i,j): results[i][j]
#                                   for i in results.keys()
#                                   for j in results[i].keys()},
#                                  orient='index')
# weat_df = weat_df.reset_index()
# weat_df.columns = ['date', 'axis', 'category', 'attribute', 'difference', 'standard_dev', 'effect_size', 'Pleft', 'Pright', 'Ptot', 'se', 'dates']
# weat_df.head()
# weat_df.to_csv('weat_consecutivedays.csv', index = False)


## Asian corpus

In [None]:
# np.random.seed(random.randint(1, 1000))
# results = {}
# dates = []
# for date in dict_asian.keys():
#     if date not in dates:
#         dates.append(date)
# dates.sort()
# tests = [("Warm", "Cold"),("Competent", "Incompetent")]

# for i in np.arange(len(dates)-1):
#     print(dates[i])
#     print(dates[i+1])
#     result_dict_full = {}
#     for att1, att2 in tests:
#         X_name="Asians"
#         X = []
#         Y_name="Asians"
#         Y = []
#         A_name=att1
#         A = []
#         B_name=att2
#         B = []
#         for category in dict_asian[dates[i]].keys():
#             if category in X_name:
#                 for word in dict_asian[dates[i]][category].keys():   
#                     x = literal_eval(dict_asian[dates[i]][category][word])
#                     x = np.array(x)
#                     X.append(x)
#         for category in dict_asian[dates[i+1]].keys():
#             if category in Y_name:
#                 for word in dict_asian[dates[i+1]][category].keys():
#                     y = literal_eval(dict_asian[dates[i+1]][category][word])
#                     y = np.array(y)
#                     Y.append(y)
#         for attribute in dict_asian[dates[i]].keys():
#             if attribute in A_name:
#                 for word in dict_asian[dates[i]][attribute].keys():
#                     a = literal_eval(dict_asian[dates[i]][attribute][word])
#                     a = np.array(a)
#                     A.append(a)    
#         for attribute in dict_asian[dates[i]].keys():
#             if attribute in B_name:
#                 for word in dict_asian[dates[i]][attribute].keys():
#                     b = literal_eval(dict_asian[dates[i]][attribute][word])
#                     b = np.array(b)
#                     B.append(b)
#         result_dict = weat(X_name="Asians", X=X, Y_name="Asian_nextday", Y=Y, A_name=att1, A=A, B_name=att2, B=B, 
#                            permt=2, perm_n=1000, cohesion_test=False, cohesion_permutations=100, cohesion_type=0)
#         result_dict['dates'] = [dates[i], dates[i+1]]
#         result_dict_full[f"{att1}_{att2}"] = result_dict
    
#     if dates[i] not in results.keys():
#         results[dates[i]] = result_dict_full

# weat_asian_df = pd.DataFrame.from_dict({(i,j): results[i][j]
#                                         for i in results.keys()
#                                         for j in results[i].keys()},
#                                        orient='index')
# weat_asian_df = weat_asian_df.reset_index()
# weat_asian_df.columns = ['date', 'axis', 'category', 'attribute', 'difference', 'standard_dev', 'effect_size', 'Pleft', 'Pright', 'Ptot', 'se', 'dates']
# weat_asian_df.to_csv('weat_asian.csv', index = False)


## Single category WEAT

### Pre Post corpus

#### Compare opposing attributes

In [None]:
# np.random.seed(random.randint(1, 1000))
# results = {}

# tests = [("Warm", "Cold"), ("Competent", "Incompetent")]

# for date, data in dict_prepost.items():
#     print(date)
#     result_dict_prepost = {}
#     for att1, att2 in tests:
#         X_name="Asians"
#         X = []
#         A_name=att1
#         A = []
#         B_name=att2
#         B = []
#         for category in data.keys():
#             if category in X_name:
#                 for word in data[category].keys():   
#                     x = literal_eval(data[category][word])
#                     x = np.array(x)
#                     X.append(x)
#         for attribute in data.keys():
#             if attribute in A_name:
#                 for word in data[attribute].keys():
#                     a = literal_eval(data[attribute][word])
#                     a = np.array(a)
#                     A.append(a)
#         for attribute in data.keys():
#             if attribute in B_name:
#                 for word in data[attribute].keys():
#                     b = literal_eval(data[attribute][word])
#                     b = np.array(b)
#                     B.append(b)
#         result_dict = s_weat(X_name="Asians", X=X, A_name=att1, A=A, B_name=att2, B=B, 
#                            permt=2, perm_n=1000)
#         result_dict_prepost[f"{att1}_{att2}"] = result_dict
        
#     results[date] = result_dict_prepost

# sweat_df = pd.DataFrame.from_dict({(i,j): results[i][j]
#                                           for i in results.keys()
#                                           for j in results[i].keys()},
#                                          orient='index')

# sweat_df = sweat_df.reset_index()
# sweat_df.columns = ['date', 'axis', 'category', 'attribute', 'difference', 'standard_dev', 'effect_size', 'Pleft', 'Pright', 'Ptot']
# sweat_df.head()

# sweat_df.to_csv('sweat_prepost.csv', index = False)


#### Compare same attribute

In [None]:
np.random.seed(random.randint(1, 1000))
results = {}
dates = ['pre', 'post']

tests = ["Warm", "Cold", "Competent", "Incompetent", "Foreign", "Diseased"]

for i in np.arange(len(dates)-1):
    if dates[i] not in results.keys():
        print(dates[i])
        print(dates[i+1])
        result_dict_prepost = {}
        for att1 in tests:
            X_name="Asians"
            X = []
            A_name=att1
            A = []
            B_name=att1
            B = []
            for category in dict_prepost[dates[i]].keys():
                if category in X_name:
                    for word in dict_prepost[dates[i]][category].keys():   
                        x = literal_eval(dict_prepost[dates[i]][category][word])
                        x = np.array(x)
                        X.append(x)
            for attribute in dict_prepost[dates[i]].keys():
                if attribute in A_name:
                    for word in dict_prepost[dates[i]][attribute].keys():
                        a = literal_eval(dict_prepost[dates[i]][attribute][word])
                        a = np.array(a)
                        A.append(a)    
            for attribute in dict_prepost[dates[i+1]].keys():
                if attribute in B_name:
                    for word in dict_prepost[dates[i+1]][attribute].keys():
                        b = literal_eval(dict_prepost[dates[i+1]][attribute][word])
                        b = np.array(b)
                        B.append(b)
            result_dict = s_weat(X_name="Asians", X=X, A_name=att1, A=A, B_name=f"{att1}_post", B=B, 
                             permt=2, perm_n=1000)
            result_dict['dates'] = [dates[i], dates[i+1]]
            result_dict_prepost[att1] = result_dict
            
        results[dates[i]] = result_dict_prepost

sweat_prepost_consecutive = pd.DataFrame.from_dict({(i,j): results[i][j]
                                          for i in results.keys()
                                          for j in results[i].keys()},
                                         orient='index')
sweat_prepost_consecutive = sweat_prepost_consecutive.reset_index()
sweat_prepost_consecutive.columns = ['date', 'axis', 'category', 'attribute', 'difference', 'standard_dev', 'effect_size', 'Pleft', 'Pright', 'Ptot', 'dates']

sweat_prepost_consecutive.to_csv('sweat_prepost_consecutive.csv', index = False)


### Full corpus

#### Compare opposing attributes

In [None]:
# np.random.seed(random.randint(1, 1000))
# results = {}

# tests = [("Warm", "Cold"), ("Competent", "Incompetent")]

# for date, data in dict_full.items():
#     print(date)
#     result_dict_full = {}
#     for att1, att2 in tests:
#         X_name="Asians"
#         X = []
#         A_name=att1
#         A = []
#         B_name=att2
#         B = []
#         for category in data.keys():
#             if category in X_name:
#                 for word in data[category].keys():   
#                     x = literal_eval(data[category][word])
#                     x = np.array(x)
#                     X.append(x)
#         for attribute in data.keys():
#             if attribute in A_name:
#                 for word in data[attribute].keys():
#                     a = literal_eval(data[attribute][word])
#                     a = np.array(a)
#                     A.append(a)
#         for attribute in data.keys():
#             if attribute in B_name:
#                 for word in data[attribute].keys():
#                     b = literal_eval(data[attribute][word])
#                     b = np.array(b)
#                     B.append(b)
#         result_dict = s_weat(X_name="Asians", X=X, A_name=att1, A=A, B_name=att2, B=B, 
#                            permt=2, perm_n=1000)
#         result_dict_full[f"{att1}_{att2}"] = result_dict
        
#     results[date] = result_dict_full

# sweat_df = pd.DataFrame.from_dict({(i,j): results[i][j]
#                                           for i in results.keys()
#                                           for j in results[i].keys()},
#                                          orient='index')
# sweat_df.head()
# sweat_df = sweat_df.reset_index()
# sweat_df.columns = ['date', 'axis', 'category', 'attribute', 'difference', 'standard_dev', 'effect_size', 'Pleft', 'Pright', 'Ptot']

# sweat_df.to_csv('sweat.csv', index = False)


#### Compare same attribute on consecutive days

In [None]:
# np.random.seed(random.randint(1, 1000))
# results = {}
# dates = []
# for date in dict_full.keys():
#     if date not in dates:
#         dates.append(date)
# dates.sort()

# tests = ["Warm", "Cold", "Competent", "Incompetent", "Foreign", "Diseased"]

# for i in np.arange(len(dates)-1):
#     if dates[i] not in results.keys():
#         print(dates[i])
#         print(dates[i+1])
#         result_dict_full = {}
#         for att1 in tests:
#             X_name="Asians"
#             X = []
#             A_name=att1
#             A = []
#             B_name=att1
#             B = []
#             for category in dict_full[dates[i]].keys():
#                 if category in X_name:
#                     for word in dict_full[dates[i]][category].keys():   
#                         x = literal_eval(dict_full[dates[i]][category][word])
#                         x = np.array(x)
#                         X.append(x)
#             for attribute in dict_full[dates[i]].keys():
#                 if attribute in A_name:
#                     for word in dict_full[dates[i]][attribute].keys():
#                         a = literal_eval(dict_full[dates[i]][attribute][word])
#                         a = np.array(a)
#                         A.append(a)    
#             for attribute in dict_full[dates[i+1]].keys():
#                 if attribute in B_name:
#                     for word in dict_full[dates[i+1]][attribute].keys():
#                         b = literal_eval(dict_full[dates[i+1]][attribute][word])
#                         b = np.array(b)
#                         B.append(b)
#             result_dict = s_weat(X_name="Asians", X=X, A_name=att1, A=A, B_name=f"{att1}_nextday", B=B, 
#                              permt=2, perm_n=1000)
#             result_dict['dates'] = [dates[i], dates[i+1]]
#             result_dict_full[att1] = result_dict
            
#         results[dates[i]] = result_dict_full

# sweat_consecutivedays = pd.DataFrame.from_dict({(i,j): results[i][j]
#                                           for i in results.keys()
#                                           for j in results[i].keys()},
#                                          orient='index')
# sweat_consecutivedays = sweat_consecutivedays.reset_index()
# sweat_consecutivedays.columns = ['date', 'axis', 'category', 'attribute', 'difference', 'standard_dev', 'effect_size', 'Pleft', 'Pright', 'Ptot', 'dates']

# sweat_consecutivedays.to_csv('sweat_consecutivedays.csv', index = False)


#### Compare same attribute on days of subsequent years

In [None]:
# np.random.seed(random.randint(1, 1000))
# results = {}
# dates = []
# for date in dict_full.keys():
#     if date not in dates:
#         dates.append(date)
# dates.sort()

# tests = ["Warm", "Cold", "Competent", "Incompetent", "Foreign", "Diseased"]

# for date in dates:
#     if date not in results.keys():
#         result_dict_full = {}
#         if date[:4] == '2019':
#             t2 = re.sub('2019-', '2020-', date)
#             if t2 in dict_full.keys():
#                 if t2 not in result_dict_full.keys():
#                     result_dict_full[t2] = {}
#                 for att1 in tests:
#                     if att1 not in result_dict_full[t2].keys():
#                         X_name="Asians"
#                         X = []
#                         A_name=att1
#                         A = []
#                         B_name=att1
#                         B = []
#                         for category in dict_full[date].keys():
#                             if category in X_name:
#                                 for word in dict_full[date][category].keys():   
#                                     x = literal_eval(dict_full[date][category][word])
#                                     x = np.array(x)
#                                     X.append(x)
#                         for attribute in dict_full[date].keys():
#                             if attribute in A_name:
#                                 for word in dict_full[date][attribute].keys():
#                                     a = literal_eval(dict_full[date][attribute][word])
#                                     a = np.array(a)
#                                     A.append(a)  
#                         for attribute in dict_full[t2].keys():
#                             if attribute in B_name:
#                                 for word in dict_full[t2][attribute].keys():
#                                     b = literal_eval(dict_full[t2][attribute][word])
#                                     b = np.array(b)
#                                     B.append(b)
#                         result_dict = s_weat(X_name="Asians", X=X, A_name=att1, A=A, B_name=f"{att1}_nextday", B=B, 
#                                          permt=2, perm_n=1000)
#                         result_dict['dates'] = [date, t2]
#                         result_dict_full[t2][att1] = result_dict
            
#             t3 = re.sub('2019-', '2021-', date)
#             if t3 in dict_full.keys():
#                 if t3 not in result_dict_full.keys():
#                     result_dict_full[t3] = {}
#                 for att1 in tests:
#                     if att1 not in result_dict_full[t3].keys():
#                         X_name="Asians"
#                         X = []
#                         A_name=att1
#                         A = []
#                         B_name=att1
#                         B = []
#                         for category in dict_full[date].keys():
#                             if category in X_name:
#                                 for word in dict_full[date][category].keys():   
#                                     x = literal_eval(dict_full[date][category][word])
#                                     x = np.array(x)
#                                     X.append(x)
#                         for attribute in dict_full[date].keys():
#                             if attribute in A_name:
#                                 for word in dict_full[date][attribute].keys():
#                                     a = literal_eval(dict_full[date][attribute][word])
#                                     a = np.array(a)
#                                     A.append(a)  
#                         for attribute in dict_full[t3].keys():
#                             if attribute in B_name:
#                                 for word in dict_full[t3][attribute].keys():
#                                     b = literal_eval(dict_full[t3][attribute][word])
#                                     b = np.array(b)
#                                     B.append(b)
#                         result_dict = s_weat(X_name="Asians", X=X, A_name=att1, A=A, B_name=f"{att1}_nextday", B=B, 
#                                          permt=2, perm_n=1000)
#                         result_dict['dates'] = [date, t3]
#                         result_dict_full[t3][att1] = result_dict
#         elif date[:4] == '2020':
#             t2 = False
#             t3 = re.sub('2020-', '2021-', date)
#             if t3 in dict_full.keys():
#                 if t3 not in result_dict_full.keys():
#                     result_dict_full[t3] = {}
#                 for att1 in tests:
#                     if att1 not in result_dict_full[t3].keys():
#                         X_name="Asians"
#                         X = []
#                         A_name=att1
#                         A = []
#                         B_name=att1
#                         B = []
#                         for category in dict_full[date].keys():
#                             if category in X_name:
#                                 for word in dict_full[date][category].keys():   
#                                     x = literal_eval(dict_full[date][category][word])
#                                     x = np.array(x)
#                                     X.append(x)
#                         for attribute in dict_full[date].keys():
#                             if attribute in A_name:
#                                 for word in dict_full[date][attribute].keys():
#                                     a = literal_eval(dict_full[date][attribute][word])
#                                     a = np.array(a)
#                                     A.append(a)  

#                         for attribute in dict_full[t3].keys():
#                             if attribute in B_name:
#                                 for word in dict_full[t3][attribute].keys():
#                                     b = literal_eval(dict_full[t3][attribute][word])
#                                     b = np.array(b)
#                                     B.append(b)
#                         result_dict = s_weat(X_name="Asians", X=X, A_name=att1, A=A, B_name=f"{att1}_nextyear", B=B, 
#                                          permt=2, perm_n=1000)
#                         result_dict['dates'] = [date, t3]
#                         result_dict_full[t3][att1] = result_dict
#             else:
#                 pass
            
#             results[date] = result_dict_full

# sweat_consecutiveyears = pd.DataFrame.from_dict({(i,j,k): results[i][j][k]
#                                                  for i in results.keys()
#                                                  for j in results[i].keys()
#                                                  for k in results[i][j].keys()},
#                                                 orient='index')
# sweat_consecutiveyears = sweat_consecutiveyears.reset_index()
# sweat_consecutiveyears.columns = ['date', 'comparison_date', 'axis', 'category', 'attribute', 'difference', 'standard_dev', 'effect_size', 'Pleft', 'Pright', 'Ptot', 'dates']

# sweat_consecutiveyears.to_csv('sweat_consecutiveyears.csv', index = False)


### Asian corpus

#### Compare opposing attributes

In [None]:
# np.random.seed(random.randint(1, 1000))
# results = {}

# tests = [("Warm", "Cold"), ("Competent", "Incompetent")]

# for date, data in dict_asian.items():
#     print(date)
#     result_dict_full = {}
#     for att1, att2 in tests:
#         X_name="Asians"
#         X = []
#         A_name=att1
#         A = []
#         B_name=att2
#         B = []
#         for category in data.keys():
#             if category in X_name:
#                 for word in data[category].keys():   
#                     x = literal_eval(data[category][word])
#                     x = np.array(x)
#                     X.append(x)
#         for attribute in data.keys():
#             if attribute in A_name:
#                 for word in data[attribute].keys():
#                     a = literal_eval(data[attribute][word])
#                     a = np.array(a)
#                     A.append(a)
#         for attribute in data.keys():
#             if attribute in B_name:
#                 for word in data[attribute].keys():
#                     b = literal_eval(data[attribute][word])
#                     b = np.array(b)
#                     B.append(b)
#         result_dict = s_weat(X_name="Asians", X=X, A_name=att1, A=A, B_name=att2, B=B, 
#                            permt=2, perm_n=1000)
#         result_dict_full[f"{att1}_{att2}"] = result_dict
        
#     results[date] = result_dict_full

# with open("s_weat_asian.json", "w") as outfile:
#     json.dump(results, outfile)

# sweat_asian = pd.DataFrame.from_dict({(i,j): results[i][j]
#                                           for i in results.keys()
#                                           for j in results[i].keys()},
#                                          orient='index')
# sweat_asian = sweat_asian.reset_index()
# sweat_asian.columns = ['date', 'axis', 'category', 'attribute', 'difference', 'standard_dev', 'effect_size', 'Pleft', 'Pright', 'Ptot']

# sweat_asian.to_csv('sweat_asian.csv', index = False)


#### Compare same attribute on consecutive days

In [None]:
# np.random.seed(random.randint(1, 1000))
# results = {}
# dates = []
# for date in dict_asian.keys():
#     if date not in dates:
#         dates.append(date)
# dates.sort()

# tests = ["Warm", "Cold", "Competent", "Incompetent", "Foreign", "Diseased"]

# for i in np.arange(len(dates)-1):
#     if dates[i] not in results.keys():
#         print(dates[i])
#         print(dates[i+1])
#         result_dict_full = {}
#         for att1 in tests:
#             X_name="Asians"
#             X = []
#             A_name=att1
#             A = []
#             B_name=att1
#             B = []
#             for category in dict_asian[dates[i]].keys():
#                 if category in X_name:
#                     for word in dict_asian[dates[i]][category].keys():   
#                         x = literal_eval(dict_asian[dates[i]][category][word])
#                         x = np.array(x)
#                         X.append(x)
#             for attribute in dict_asian[dates[i]].keys():
#                 if attribute in A_name:
#                     for word in dict_asian[dates[i]][attribute].keys():
#                         a = literal_eval(dict_asian[dates[i]][attribute][word])
#                         a = np.array(a)
#                         A.append(a)    
#             for attribute in dict_asian[dates[i+1]].keys():
#                 if attribute in B_name:
#                     for word in dict_asian[dates[i+1]][attribute].keys():
#                         b = literal_eval(dict_asian[dates[i+1]][attribute][word])
#                         b = np.array(b)
#                         B.append(b)
#             result_dict = s_weat(X_name="Asians", X=X, A_name=att1, A=A, B_name=f"{att1}_nextday", B=B, 
#                              permt=2, perm_n=1000)
#             result_dict['dates'] = [dates[i], dates[i+1]]
#             result_dict_full[att1] = result_dict
            
#         results[dates[i]] = result_dict_full

# sweat_asian_consecutivedays = pd.DataFrame.from_dict({(i,j): results[i][j]
#                                           for i in results.keys()
#                                           for j in results[i].keys()},
#                                          orient='index')
# sweat_asian_consecutivedays = sweat_asian_consecutivedays.reset_index()
# sweat_asian_consecutivedays.columns = ['date', 'axis', 'category', 'attribute', 'difference', 'standard_dev', 'effect_size', 'Pleft', 'Pright', 'Ptot', 'dates']

# sweat_asian_consecutivedays.to_csv('sweat_asian_consecutivedays.csv', index = False)


#### Compare same attribute on days of subsequent years

In [None]:
# np.random.seed(random.randint(1, 1000))
# results = {}
# dates = []
# for date in dict_asian.keys():
#     if date not in dates:
#         dates.append(date)
# dates.sort()

# tests = ["Warm", "Cold", "Competent", "Incompetent", "Foreign", "Diseased"]

# for date in dates:
#     if date not in results.keys():
#         result_dict_full = {}
#         if date[:4] == '2019':
#             t2 = re.sub('2019-', '2020-', date)
#             if t2 in dict_asian.keys():
#                 for att1 in tests:
#                     X_name="Asians"
#                     X = []
#                     A_name=att1
#                     A = []
#                     B_name=att1
#                     B = []
#                     for category in dict_asian[date].keys():
#                         if category in X_name:
#                             for word in dict_asian[date][category].keys():   
#                                 x = literal_eval(dict_asian[date][category][word])
#                                 x = np.array(x)
#                                 X.append(x)
#                     for attribute in dict_asian[date].keys():
#                         if attribute in A_name:
#                             for word in dict_asian[date][attribute].keys():
#                                 a = literal_eval(dict_asian[date][attribute][word])
#                                 a = np.array(a)
#                                 A.append(a)  
#                     for attribute in dict_asian[t2].keys():
#                         if attribute in B_name:
#                             for word in dict_asian[t2][attribute].keys():
#                                 b = literal_eval(dict_asian[t2][attribute][word])
#                                 b = np.array(b)
#                                 B.append(b)
#                     result_dict = s_weat(X_name="Asians", X=X, A_name=att1, A=A, B_name=f"{att1}_nextday", B=B, 
#                                      permt=2, perm_n=1000)
#                     result_dict['dates'] = [date, t2]
#                     result_dict_full[t2] = {}
#                     result_dict_full[t2][att1] = result_dict
#             t3 = re.sub('2019-', '2021-', date)
#             if t3 in dict_asian.keys():
#                 for att1 in tests:
#                     X_name="Asians"
#                     X = []
#                     A_name=att1
#                     A = []
#                     B_name=att1
#                     B = []
#                     for category in dict_asian[date].keys():
#                         if category in X_name:
#                             for word in dict_asian[date][category].keys():   
#                                 x = literal_eval(dict_asian[date][category][word])
#                                 x = np.array(x)
#                                 X.append(x)
#                     for attribute in dict_asian[date].keys():
#                         if attribute in A_name:
#                             for word in dict_asian[date][attribute].keys():
#                                 a = literal_eval(dict_asian[date][attribute][word])
#                                 a = np.array(a)
#                                 A.append(a)  

#                     for attribute in dict_asian[t3].keys():
#                         if attribute in B_name:
#                             for word in dict_asian[t3][attribute].keys():
#                                 b = literal_eval(dict_asian[t3][attribute][word])
#                                 b = np.array(b)
#                                 B.append(b)
#                     result_dict = s_weat(X_name="Asians", X=X, A_name=att1, A=A, B_name=f"{att1}_nextday", B=B, 
#                                      permt=2, perm_n=1000)
#                     result_dict['dates'] = [date, t3]
#                     result_dict_full[t3] = {}
#                     result_dict_full[t3][att1] = result_dict
#         elif date[:4] == '2020':
#             t2 = False
#             t3 = re.sub('2020-', '2021-', date)
#             if t3 in dict_asian.keys():
#                 for att1 in tests:
#                     X_name="Asians"
#                     X = []
#                     A_name=att1
#                     A = []
#                     B_name=att1
#                     B = []
#                     for category in dict_asian[date].keys():
#                         if category in X_name:
#                             for word in dict_asian[date][category].keys():   
#                                 x = literal_eval(dict_asian[date][category][word])
#                                 x = np.array(x)
#                                 X.append(x)
#                     for attribute in dict_asian[date].keys():
#                         if attribute in A_name:
#                             for word in dict_asian[date][attribute].keys():
#                                 a = literal_eval(dict_asian[date][attribute][word])
#                                 a = np.array(a)
#                                 A.append(a)  

#                     for attribute in dict_asian[t3].keys():
#                         if attribute in B_name:
#                             for word in dict_asian[t3][attribute].keys():
#                                 b = literal_eval(dict_asian[t3][attribute][word])
#                                 b = np.array(b)
#                                 B.append(b)
#                     result_dict = s_weat(X_name="Asians", X=X, A_name=att1, A=A, B_name=f"{att1}_nextyear", B=B, 
#                                      permt=2, perm_n=1000)
#                     result_dict['dates'] = [date, t3]
#                     result_dict_full[t3] = {}
#                     result_dict_full[t3][att1] = result_dict
#             else:
#                 pass
            
#             results[date] = result_dict_full

# sweat_asian_consecutiveyears = pd.DataFrame.from_dict({(i,j): results[i][j]
#                                           for i in results.keys()
#                                           for j in results[i].keys()},
#                                          orient='index')
# sweat_asian_consecutiveyears = sweat_asian_consecutiveyears.reset_index()
# sweat_asian_consecutiveyears.columns = ['date', 'axis', 'category', 'attribute', 'difference', 'standard_dev', 'effect_size', 'Pleft', 'Pright', 'Ptot', 'dates']

# sweat_asian_consecutiveyears.to_csv('sweat_asian_consecutiveyears.csv', index = False)


# Merge with NYT df

In [None]:
# df_nyt = pd.read_csv('df_nyt.csv')
# df_nyt = df_nyt.sort_values(by=['date'], ignore_index=True)
# df_asian = pd.read_csv('s_weat_asian.csv')
# df_asian = df_asian.sort_values(by=['date'], ignore_index=True)

In [None]:
# df = pd.merge(attribute_weat_asian_df, df_nyt, on="date")
# df.to_csv('attribute_weat_asian_df.csv', index = False)