In [1]:
import csv
import matplotlib as mpl
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import pandas as pd
import scipy

from sklearn.cluster import SpectralClustering

from constants import (
    BAT_SIGNAL_FILE,
    BAT_SIGNAL_AND_STEPHAN_FILE,
    ALL_FILE,
)
from util import (
    cognitive_dissonance_score,
    Strength,
    ONEOFFS_TO_TOKEN,
    TOKEN_TO_STRENGTH,
)
%load_ext autoreload
%autoreload 2

In [4]:
def map_to_strength(strength_str):
    lowercase = strength_str.lower()
    if lowercase in ONEOFFS_TO_TOKEN:
        token = ONEOFFS_TO_TOKEN[lowercase]
    else:
        token = lowercase
    
    return TOKEN_TO_STRENGTH[token]
    
def preprocess(strengths):
    return [map_to_strength(strength_str.strip()) for strength_str in strengths]

with open(BAT_SIGNAL_FILE) as csvfile:
    reader = csv.reader(csvfile)
    user_strengths = {}
    for row in reader:
        if len(row) != 26:
            print("{} has issue".format(row[0]))
        name = row[0]
        evaluator = row[1].lower().strip()
        strengths_in_order = row[2:]
        strengths = preprocess(strengths_in_order)
        
        if evaluator == "self":
            user_strengths[name] = strengths
print("Done")

Done


In [5]:
users = list(user_strengths.keys())

d = {}
index = {}
users.sort() # Needed so that we can create a diagonal matrix
for i, user in enumerate(users):
    d[user] = [0 for j in range(len(users))]
    index[i] = user

df = pd.DataFrame(d, dtype=np.float64)
df = df.rename(index=index)

for user in users:
    for other_user in users:
        top3 = set(user_strengths[user][:3])
        other_top3 = set(user_strengths[other_user][:3])
        shared_top3 = (top3 & other_top3)
        if shared_top3:
            df[user][other_user] = 1
            df[other_user][user] = 1
        else:
            df[user][other_user] = 0
            df[other_user][user] = 0

In [6]:
G = nx.Graph()
users = list(user_strengths.keys())
node_sizes = []
for name in users:
#     first, last = name.split(' ')
#     G.add_node(first)
    G.add_node(name)
    node_sizes.append(1200)
#     node_colors.append(["blue", "green", "red"][random.randint(0,2)])
#     node_sizes.append(random.randint(1, 10)*150)

for i, user in enumerate(users):
    for j, other_user in enumerate(users[i+1:]):
        top3 = set(user_strengths[user][:3])
        other_top3 = set(user_strengths[other_user][:3])
        shared_top3 = (top3 & other_top3)
        if shared_top3:
            G.add_edge(user, other_user)

adj_sizes = []
for i, n in enumerate(G.nodes()):
    adj_list_size = len(G.adj[n])
    node_sizes[i] *= adj_list_size
    adj_sizes.append(adj_list_size)

adj_sizes = np.array(adj_sizes)

mean = adj_sizes.mean()
std = adj_sizes.std()

lower_band = mean - std
upper_band = mean + std

node_colors = []
for i, n in enumerate(G.nodes()):
    size = adj_sizes[i]
    if size < lower_band:
        node_colors.append("lightblue")
    elif size > upper_band:
        node_colors.append("lime")
    else:
        node_colors.append("lightgreen")

def create_user_vec(single_user_strengths):
    user_vec = np.empty(24)
    for i,strength in enumerate(single_user_strengths):
        user_vec[strength.value - 1] = i
    return user_vec

users = list(user_strengths.keys())

d = {}
index = {}
users.sort() # Needed so that we can create a diagonal matrix
for i, user in enumerate(users):
    d[user] = [0 for j in range(len(users))]
    index[i] = user

df = pd.DataFrame(d, dtype=np.float64)
df = df.rename(index=index)

for i, user in enumerate(users):
    for j, other_user in enumerate(users):
        if user == other_user:
            df[user][other_user] = 0
            continue
        user_vec = create_user_vec(user_strengths[user])
        other_user_vec = create_user_vec(user_strengths[other_user])
        corrmatrix = np.corrcoef(user_vec, other_user_vec)
        coefficient = corrmatrix[0][1]
        df[user][other_user] = coefficient
# df

edge_colors = []
edge_widths = []
for i, edge in enumerate(G.edges()):
    u1, u2 = edge
    coefficient = df[u1][u2]
    random_color = ["turquoise", "teal", "navy"]
    if coefficient < 0:
        edge_colors.append("salmon")
        edge_widths.append(1)
    elif coefficient > 0.5:
        edge_colors.append("navy")
        edge_widths.append(4)
    elif coefficient > 0.25:
        edge_colors.append("teal")
        edge_widths.append(2)
    else:
        edge_colors.append("turquoise")
        edge_widths.append(1)

# pos = nx.circular_layout(G)
# nx.draw(G, pos, with_labels=False, node_color=node_colors, node_size=node_sizes, edge_color=edge_colors, width=edge_widths, font_weight='bold')
# # nx.draw(G, pos, with_labels=False, node_size=node_sizes, font_weight='bold')
# # for p in pos:  # raise text positions
# #     pos[p][1] += 0.08
# nx.draw_networkx_labels(G, pos, with_labels=True, font_color='black', node_size=node_sizes)
# plt.savefig('graph.png', figsize=(200,200))

In [76]:
from collections import defaultdict
strength_to_top3_users = defaultdict(list)

for user in user_strengths:
    strengths = user_strengths[user]
    top3_strengths = strengths[:3]
    for strength in top3_strengths:
        strength_to_top3_users[strength.name].append(user)

with open("top3_result.txt", "w") as top3_file:
    for strength in strength_to_top3_users:
        num_users_with_strength = len(strength_to_top3_users[strength])
        userlist_str = ", ".join(strength_to_top3_users[strength])
        top3_file.write("(%d) %+22s: %s\n" % (num_users_with_strength, strength, userlist_str))

In [77]:
from collections import defaultdict
strength_to_bot3_users = defaultdict(list)

for user in user_strengths:
    strengths = user_strengths[user]
    bot3_strengths = strengths[-3:]
    for strength in bot3_strengths:
        strength_to_bot3_users[strength.name].append(user)

with open("bot3_result.txt", "w") as bot3_file:
    for strength in strength_to_bot3_users:
        num_users_with_strength = len(strength_to_bot3_users[strength])
        userlist_str = ", ".join(strength_to_bot3_users[strength])
        bot3_file.write("(%d) %+22s: %s\n" % (num_users_with_strength, strength, userlist_str))
print('done')

done


In [7]:
def create_user_vec(single_user_strengths):
    user_vec = np.empty(24)
    for i,strength in enumerate(single_user_strengths):
        user_vec[strength.value - 1] = i
    return user_vec

users = list(user_strengths.keys())

d = {}
index = {}
users.sort() # Needed so that we can create a diagonal matrix
for i, user in enumerate(users):
    d[user] = [0 for j in range(len(users))]
    index[i] = user

df = pd.DataFrame(d, dtype=np.float64)
df = df.rename(index=index)

for i, user in enumerate(users):
    for j, other_user in enumerate(users):
        if user == other_user:
            df[user][other_user] = 0
            continue
        user_vec = create_user_vec(user_strengths[user])
        other_user_vec = create_user_vec(user_strengths[other_user])
        corrmatrix = np.corrcoef(user_vec, other_user_vec)
        coefficient = corrmatrix[0][1]
        df[user][other_user] = coefficient
# for user in users:
#     print("%20s ---> %-20s" % (user, df[user].idxmax()))

# plt.rcParams["figure.figsize"] = [20,20]
# users = list(user_strengths.keys())
# DG = nx.DiGraph()
# # DG.add_weighted_edges_from([ ("Angelina Huang", "Allen Wu", 0.249565), ("Angelina Huang", "Anh Tran", 0.034783)])
# DG.add_weighted_edges_from([ ("Angelina Huang", "Allen Wu", 1), ("Angelina Huang", "Anh Tran", 0.034783)])
# pos = nx.circular_layout(DG)
# nx.draw(DG, pos, with_labels=False, node_size=node_sizes, font_weight='bold')
# df["Angelina Huang"]

# DG.add_weighted_edges_from([(1, 2, 0.5), (3, 1, 0.75)])
# pos = nx.circular_layout(DG)
# nx.draw(DG, pos, with_labels=False, node_size=node_sizes, font_weight='bold')
    

In [81]:
firstrow = None
secondrow = None
thirdrow = None
fourthrow = None
with open('carson.csv') as csvfile:
    reader = csv.reader(csvfile)
    for row in reader:
        if firstrow is None:
            firstrow = row
        elif secondrow is None:
            secondrow = row
        elif thirdrow is None:
            thirdrow = row
        elif fourthrow is None:
            fourthrow = row
    

self_perception_scores = np.empty(24)
for i, token in enumerate(firstrow[2:]):
    strength_str = token.strip()
    self_perception_scores[TOKEN_TO_STRENGTH[strength_str].value - 1] = i

external_perception_scores1 = np.empty(24)
for i, token in enumerate(secondrow[2:]):
    strength_str = token.strip()
    external_perception_scores1[TOKEN_TO_STRENGTH[strength_str].value - 1] = i

external_perception_scores2 = np.empty(24)
for i, token in enumerate(thirdrow[2:]):
    strength_str = token.strip()
    external_perception_scores2[TOKEN_TO_STRENGTH[strength_str].value - 1] = i

external_perception_scores3 = np.empty(24)
for i, token in enumerate(fourthrow[2:]):
    strength_str = token.strip()
    external_perception_scores3[TOKEN_TO_STRENGTH[strength_str].value - 1] = i

external_perception_score = (external_perception_scores1 + external_perception_scores2 + external_perception_scores3)/3.0
# print(self_perception_scores)
# print(external_perception_scores1)
# print(external_perception_scores2)
# print(external_perception_scores3)

cog_diss = cognitive_dissonance_score(external_perception_score, self_perception_scores)
s = scipy.spatial.distance.cosine(external_perception_score, self_perception_scores)
print(s)
print(cog_diss)

0.10438157954858163
4.279596331234476


In [82]:
firstrow = None
secondrow = None
thirdrow = None
fourthrow = None
with open('jessica.csv') as csvfile:
    reader = csv.reader(csvfile)
    for row in reader:
        if firstrow is None:
            firstrow = row
        elif secondrow is None:
            secondrow = row
        elif thirdrow is None:
            thirdrow = row
        elif fourthrow is None:
            fourthrow = row
    

self_perception_scores = np.empty(24)
for i, token in enumerate(firstrow[2:]):
    strength_str = token.strip()
    self_perception_scores[TOKEN_TO_STRENGTH[strength_str].value - 1] = i

external_perception_scores1 = np.empty(24)
for i, token in enumerate(secondrow[2:]):
    strength_str = token.strip()
    external_perception_scores1[TOKEN_TO_STRENGTH[strength_str].value - 1] = i

s = scipy.spatial.distance.cosine(external_perception_scores1, self_perception_scores)
print(s)
print(cog_diss)

0.08117483811285842
4.279596331234476


In [86]:
firstrow = None
secondrow = None
thirdrow = None
fourthrow = None
with open('anh.csv') as csvfile:
    reader = csv.reader(csvfile)
    for row in reader:
        if firstrow is None:
            firstrow = row
        elif secondrow is None:
            secondrow = row
        elif thirdrow is None:
            thirdrow = row
        elif fourthrow is None:
            fourthrow = row
    

self_perception_scores = np.empty(24)
for i, token in enumerate(firstrow[2:]):
    strength_str = token.strip()
    self_perception_scores[TOKEN_TO_STRENGTH[strength_str].value - 1] = i

external_perception_scores1 = np.empty(24)
for i, token in enumerate(secondrow[2:]):
    strength_str = token.strip()
    external_perception_scores1[TOKEN_TO_STRENGTH[strength_str].value - 1] = i

s = scipy.spatial.distance.cosine(self_perception_scores, external_perception_scores1)

s = scipy.spatial.distance.cosine(external_perception_scores1, self_perception_scores)
print(s)
print(cog_diss)

0.1302035152636447
4.279596331234476
