In [1]:
import seaborn as sns
import matplotlib.pyplot as plt

import os
import numpy as np
import pandas as pd
import random



from utils.statistical_test_utils import *



experiment_folder = 'datasets/llm_measurements/experiments'

# Single Speaker, Single Speaker Variable Measurements

In [2]:
n_bootstrap_samples = 300

In [3]:
data = pd.read_pickle(f'{experiment_folder}/all_variables_150_slices_single_speaker_single_variable_candidates_only_averaged.pkl')
# data = pd.read_pickle(f'{experiment_folder}/all_variables_150_slices_single_speaker_single_variable_candidates_only.pkl')

data = data.drop(columns=['slice_id', 'speaker', 'slice_size', 'speaker_is_candidate'])

corr = data.corr()
corr.dropna(axis=0, how='all', inplace=True)
corr.dropna(axis=1, how='all', inplace=True)

print(f'Dropping columns: {set(data.columns) - set(corr.columns)}')
data.drop(columns=set(data.columns) - set(corr.columns), inplace=True)

# calculate the correlation matrix
def get_corr_matrix(data):
    return data.corr()

# run bootstrap
bootstrap_std, _, bootstrap_df_of_lists = bootstrap_estimation(data, get_corr_matrix, gen_bootstrap_data, n_experiments=n_bootstrap_samples)
bootstrap_std.max().max(), bootstrap_std.min().min(), bootstrap_std.mean().mean(), bootstrap_std.std().std()

Dropping columns: set()


(0.12335490086864971, 0.0, 0.05091180884918469, 0.0023682813446116537)

In [4]:
# print top 10 stds
bootstrap_std_list = []
for i in range(bootstrap_std.shape[0]):
    for j in range(i+1, bootstrap_std.shape[1]):
        bootstrap_std_list.append((bootstrap_std.iloc[i, j], bootstrap_std.columns[i], bootstrap_std.columns[j]))

bootstrap_std_list.sort(reverse=True)
for i in range(20):
    # print(bootstrap_std_list[i])
    source = bootstrap_std_list[i][1]
    target = bootstrap_std_list[i][2]
    value = bootstrap_std_list[i][0]
    min_value = bootstrap_df_of_lists[source][target].min()
    max_value = bootstrap_df_of_lists[source][target].max()
    min_5_percentile = np.percentile(bootstrap_df_of_lists[source][target], 5)
    max_95_percentile = np.percentile(bootstrap_df_of_lists[source][target], 95)
    print(f'std={value:.3f} corr=[{min_value:.3f}; {max_value:.3f}]\t95% confidence interval=[{min_5_percentile:.3f}; {max_95_percentile:.3f}]    \t{source:<20} -> {target:<20}')

std=0.123 corr=[-0.029; 0.551]	95% confidence interval=[0.071; 0.482]    	engagement           -> confidence          
std=0.101 corr=[0.063; 0.567]	95% confidence interval=[0.172; 0.506]    	objectivity          -> preparation         
std=0.098 corr=[-0.201; 0.287]	95% confidence interval=[-0.134; 0.206]    	emotional appeal     -> coherence           
std=0.094 corr=[-0.281; 0.233]	95% confidence interval=[-0.126; 0.185]    	objectivity          -> positive impact on Russia
std=0.092 corr=[-0.623; -0.098]	95% confidence interval=[-0.527; -0.223]    	bias                 -> objectivity         
std=0.092 corr=[0.115; 0.665]	95% confidence interval=[0.242; 0.541]    	objectivity          -> adherence to rules  
std=0.092 corr=[-0.089; 0.393]	95% confidence interval=[0.011; 0.308]    	quality of sources   -> engagement          
std=0.090 corr=[-0.407; 0.200]	95% confidence interval=[-0.309; -0.018]    	bias                 -> impact on environment
std=0.090 corr=[-0.567; -0.091]	95% c

# ADN Connections

In [5]:
from utils.network_utils import *

variables_of_interest = ['speaker_party_IS_DEMOCRAT', 'general score', 'outreach',
    'persuasiveness', 'empathy', 'authenticity', 'decorum',
    'tone is professional', 'resonance', 'preparation', 'logical',
    'clarity', 'egotistical', 'relevant', 'contextual awareness',
    'listening skills', 'civil discourse', 'coherence', 'respectfulness',
    'responsiveness', 'pro neutral', 'language appropriateness',
    'evasiveness', 'adherence to rules', 'factuality', 'contribution',
    'manipulation', 'completeness', 'respect for diverse opinions',
    'sensationalism', 'use of evidence', 'objectivity', 'interruptions',
    'consistency', 'conciseness', 'venue respect', 'accessibility',
    'innovation', 'relevance', 'fair play', 'time management',
    'speaker_num_parts', 'bias', 'truthfulness', 'tone is academic',
    'num_parts', 'content quality', 'speaker_num_parts_ratio', 'balance',
    'quality of sources', 'speaker_popular_votes_ratio', 'controversiality',
    'confidence', 'engagement']

data = pd.read_pickle(f'{experiment_folder}/all_variables_150_slices_single_speaker_single_variable_candidates_only_averaged.pkl')

data = data[variables_of_interest]

In [27]:
source = 'Speaker Party'
target = 'general score'

sub_data = data.copy()
sub_data = sub_data.rename(columns={'speaker_party_IS_DEMOCRAT': 'Speaker Party'})

corr = sub_data.corr()
sub_columns = corr.loc[:, [source, target]].abs().max(axis=1)
sub_columns = sub_columns[(sub_columns > 0.2) & (sub_columns < 0.8)]
sub_columns = [source, target] + list(sub_columns.index)

sub_data = sub_data[sub_columns]

for n_connections in [1, 2, 3, 5, 7, 9, 20, 50, 100]:
    dis_matrix, _, _ = compute_distance_matrix_from_df(sub_data)
    dm = DistanceMatrix(dis_matrix)
    dm.keep_strongest_outgoing_influence(n_connections, 'Speaker Party')
    fig = dm.to_go_figure()
    fig.show()

In [None]:
n_bootstrap_samples = 2000

def get_dis_matrix(data):
    dis_matrix, _, _ = compute_distance_matrix_from_df(data)
    return dis_matrix

# run bootstrap
bootstrap_std, bootstrap_raw_measurements, _ = bootstrap_estimation(data, get_dis_matrix, gen_bootstrap_data, n_experiments=n_bootstrap_samples)
bootstrap_std.max().max(), bootstrap_std.min().min(), bootstrap_std.mean().mean(), bootstrap_std.std().std()

(0.041647683739242505, 0.0, 0.016345568892927993, 0.0011174992191413276)

In [None]:
# we check in how many cases a connection is present in the bootstrap samples
def convert_to_flags(data):
    data = data.copy()
    data[data > 0] = 1
    data[data < 0] = 0
    return data

bootstrap_connection_values = [
    convert_to_flags(bootstrap_raw_measurements[i])
    for i in range(n_bootstrap_samples)
]
# group into a new df
def list_of_dfs_to_df_of_lists(list_of_dfs):
    df = pd.DataFrame(index=list_of_dfs[0].index, columns=list_of_dfs[0].columns, dtype=object)
    for row in df.index:
        for col in df.columns:
            vals = [list_of_dfs[i].loc[row, col] for i in range(n_bootstrap_samples)]
            vals = [v for v in vals if v is not None]
            df.loc[row, col] = vals
            n_nan = n_bootstrap_samples - len(df.loc[row, col])
            if n_nan > 0:
                print(f"[WARNING]: {n_nan} of {n_bootstrap_samples} values in the results are NAN for {row}, {col}. Removing them.")
    return df

bootstrap_connection_values = list_of_dfs_to_df_of_lists(bootstrap_connection_values)
bootstrap_connection_percentage = bootstrap_connection_values.applymap(np.mean)
bootstrap_connection_percentage.max().max(), bootstrap_connection_percentage.min().min(), bootstrap_connection_percentage.mean().mean(), bootstrap_connection_percentage.std().std()

(1.0, 0.0, 0.9512431412894377, 0.04633601320566756)

In [None]:
# we check in how often consistent the connections that are kept for plotting are in the bootstrap samples
# num_edges_to_check = [10, 20, 50, 100, 200, 500, 1000] # max is 54*54, but for plotting we only keep very few
num_edges_to_check = [10, 50, 100, 1000] # max is 54*54, but for plotting we only keep very few

def keep_only_strongest_connections(dis_matrix, num_edges):
    assert(list(dis_matrix.columns) == list(dis_matrix.index))
    dis_matrix = dis_matrix.copy()
    # links = []
    edges = sorted([(s, t, dis_matrix.loc[s, t]) for i, s in enumerate(dis_matrix.columns) for j, t in enumerate(dis_matrix.columns) if s != t and dis_matrix.loc[s][t] > 0.], key=lambda x: x[2], reverse=True)
    # for s, t, d in edges[:num_edges]:
    #     links.append(dict(source=s, target=j, value=d))
    
    for s, t, _ in edges[num_edges:]:
        dis_matrix.loc[s, t] = 0.
    
    # set remaining values to 0
    dis_matrix = dis_matrix.applymap(lambda x: 0. if x <= 0. else x)

    assert(np.sum(dis_matrix.applymap(lambda x: 1 if x > 0. else 0).values) <= num_edges)

    return dis_matrix

for num_edges in num_edges_to_check:
    bootstrap_all_connection_values = list_of_dfs_to_df_of_lists(bootstrap_raw_measurements)

    bootstrap_connection_values = [
        keep_only_strongest_connections(bootstrap_raw_measurements[i], num_edges)
        for i in range(n_bootstrap_samples)
    ]

    average_dis_matrix = bootstrap_all_connection_values.applymap(np.mean)
    average_dis_matrix = keep_only_strongest_connections(average_dis_matrix, num_edges)
    top_average_connections = []
    for i, s in enumerate(average_dis_matrix.columns):
        for j, t in enumerate(average_dis_matrix.columns):
            if i != j and average_dis_matrix.loc[s, t] > 0.:
                top_average_connections.append((s, t, average_dis_matrix.loc[s, t]))
    assert(len(top_average_connections) <= num_edges)

    bootstrap_connection_values = list_of_dfs_to_df_of_lists(bootstrap_connection_values)
    bootstrap_connection_flags = bootstrap_connection_values.applymap(lambda x: [1. if v > 0. else 0. for v in x])
    bootstrap_connection_percentage = bootstrap_connection_flags.applymap(np.mean)
    # bootstrap_connection_consistency = bootstrap_connection_percentage.applymap(lambda x: max(np.mean(x), 1-np.mean(x)))
    bootstrap_connection_consistency = bootstrap_connection_percentage.applymap(lambda x: max(x, 1-x))
    bootstrap_active_connection_consistency_values = []
    # go through all connections with percentage > 0
    for i, s in enumerate(bootstrap_connection_percentage.columns):
        for j, t in enumerate(bootstrap_connection_percentage.columns):
            if i != j and bootstrap_connection_percentage.loc[s, t] > 0.:
                bootstrap_active_connection_consistency_values.append(bootstrap_connection_consistency.loc[s, t])
    bootstrap_active_connection_consistency_values = np.array(bootstrap_active_connection_consistency_values)

    bootstrap_connection_strength = bootstrap_all_connection_values.applymap(np.mean) # lambda x: np.mean([v for v in x if v >= 0.]))

    bootstrap_connection_stds = bootstrap_all_connection_values.applymap(np.std)
    # assert no nan values
    assert not bootstrap_connection_stds.isna().any().any()

    bootstrap_active_connection_stds = []
    # go through all connections with percentage > 0
    for i, s in enumerate(bootstrap_connection_percentage.columns):
        for j, t in enumerate(bootstrap_connection_percentage.columns):
            if i != j and bootstrap_connection_percentage.loc[s, t] > 0.:
                bootstrap_active_connection_stds.append(bootstrap_connection_stds.loc[s, t])
    bootstrap_active_connection_stds = np.array(bootstrap_active_connection_stds)

    bootstrap_active_connection_means = []
    # go through all connections with percentage > 0
    for i, s in enumerate(bootstrap_connection_percentage.columns):
        for j, t in enumerate(bootstrap_connection_percentage.columns):
            if i != j and bootstrap_connection_percentage.loc[s, t] > 0.:
                bootstrap_active_connection_means.append(bootstrap_connection_strength.loc[s, t])
    bootstrap_active_connection_means = np.array(bootstrap_active_connection_means)

    bootstrap_top_connections_consistencies = []
    bootstrap_top_connections_stds = []
    bootstrap_top_connections_strengths = []
    for s, t, d in top_average_connections:
        bootstrap_top_connections_stds.append(bootstrap_connection_stds.loc[s, t])
        bootstrap_top_connections_consistencies.append(bootstrap_connection_consistency.loc[s,t])
        bootstrap_top_connections_strengths.append(bootstrap_connection_strength.loc[s,t])
    bootstrap_top_connections_strengths = np.array(bootstrap_top_connections_strengths)
    bootstrap_top_connections_consistencies = np.array(bootstrap_top_connections_consistencies)
    bootstrap_top_connections_stds = np.array(bootstrap_top_connections_stds)

    print(f'Connections: {num_edges}\tActive Connections: {len(bootstrap_active_connection_consistency_values)}\n\tconsistency: max: {bootstrap_connection_consistency.max().max():.5f}, min: {bootstrap_connection_consistency.min().min():.5f}, mean: {bootstrap_connection_consistency.mean().mean():.5f}, std: {bootstrap_connection_consistency.std().std():.5f}')
    print(f'\tConsistency of at least once active connections: max: {bootstrap_active_connection_consistency_values.max():.5f}, min: {bootstrap_active_connection_consistency_values.min():.5f}, mean: {bootstrap_active_connection_consistency_values.mean():.5f}, std: {bootstrap_active_connection_consistency_values.std():.5f}')
    print(f'\tConnection Strength: max: {bootstrap_connection_strength.max().max():.5f}, min: {bootstrap_connection_strength.min().min():.5f}, mean: {bootstrap_connection_strength.mean().mean():.5f}, std: {bootstrap_connection_strength.std().std():.5f}')
    print(f'\tMean of connection strength for at least once active connections: {bootstrap_active_connection_means.mean():.5f}')
    print(f'\tStd of connection strength for at least once active connections: max std: {bootstrap_active_connection_stds.max():.5f}, min std: {bootstrap_active_connection_stds.min():.5f}, mean std: {bootstrap_active_connection_stds.mean():.5f}, std of stds: {bootstrap_active_connection_stds.std():.5f}')
    print(f'\tTop Connections: Mean Strengths: {bootstrap_top_connections_strengths.mean()}, Mean STDS: {bootstrap_top_connections_stds.mean()}, Mean Consisntency: {bootstrap_top_connections_consistencies.mean()}')
    # print(f'Connections: {num_edges}, max: {bootstrap_connection_percentage.max().max():.5f}, min: {bootstrap_connection_percentage.min().min():.5f}, mean: {bootstrap_connection_percentage.mean().mean():.5f}, std: {bootstrap_connection_percentage.std().std():.5f}')

Connections: 10	Active Connections: 79
	consistency: max: 1.00000, min: 0.57000, mean: 0.99885, std: 0.01466
	Consistency of at least once active connections: max: 0.99950, min: 0.57000, mean: 0.95758, std: 0.08564
	Connection Strength: max: 0.32399, min: -0.03260, mean: 0.07853, std: 0.01365
	Mean of connection strength for at least once active connections: 0.23140
	Std of connection strength for at least once active connections: max std: 0.03850, min std: 0.01744, mean std: 0.02464, std of stds: 0.00409
	Top Connections: Mean Strengths: 0.29820920543661533, Mean STDS: 0.02585657571193852, Mean Consisntency: 0.8503000000000001
Connections: 50	Active Connections: 276
	consistency: max: 1.00000, min: 0.50150, mean: 0.99223, std: 0.03953
	Consistency of at least once active connections: max: 1.00000, min: 0.50150, mean: 0.91791, std: 0.13211
	Connection Strength: max: 0.32399, min: -0.03260, mean: 0.07853, std: 0.01365
	Mean of connection strength for at least once active connections: 0.