# Current Embeddings of Arguments and Counterarguments

In [1]:
# General imports
import os
import re
from enum import Enum
from typing import Optional
from ctypes import Union
import numpy as np
import pandas as pd

### OpenAI Setup

In [2]:
pip install openai


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m23.3.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3.11 -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [3]:
import openai
from openai import OpenAI

In [4]:
client = OpenAI()

### Class Declarations

In [5]:
# Enum for categories

class Category(Enum):
    CULTURE = "culture"
    DIGITAL_FREEDOMS = "digital-freedoms"
    ECONOMY = "economy"
    EDUCATION = "education"
    ENVIRONMENT = "environment"
    FREE_SPEECH_DEBATE = "free-speech-debate"
    HEALTH = "health"
    INTERNATIONAL = "international"
    LAW = "law"
    PHILOSOPHY = "philosophy"
    POLITICS = "politics"
    RELIGION = "religion"
    SCIENCE = "science"
    SOCIETY = "society"
    SPORT = "sport" 

In [6]:
# Enum for analysis types

class AnalysisType(Enum):
    TSNE = "tsne"
    PCA = "pca"

In [7]:
# Enum for processing unit

class ProcessingUnit(Enum):
    GLOBAL = "global"
    CATEGORY = "category"
    DEBATE = "debate"

## Extract Arguments from File

#### [Debate] Arguments dict

In [8]:
""" Extract arguments from category file: debate_topic.txt -> full.txt """

def debate_extract_arguments(
    category: Category,
    file_path: str,
    start_re: str = "# PRO",
    end_re: str = "# LITERATURE",
    pro_point_re: str = "# PRO\w+-POINT",
    pro_counter_re: str = "# PRO\w+-COUNTER",
    con_point_re: str = "# CON\w+-POINT",
    con_counter_re: str = "# CON\w+-COUNTER"
    ) -> {}:
    
    # try to open file from path
    try:
        with open(f'../arguana-counterargs-corpus/02-extracted-arguments/training/{category.value}/{file_path}/full.txt', 'r') as file:
            file_contents = file.read()
    except FileNotFoundError:
        print(f"File not found: {file_path + '.txt'}")
        return None
        
    # parse file contents
    lines: [] = re.split(r'\n', file_contents)

    # Enum for argument section
    class ArgumentSection(Enum):
        PRO = "pro"
        CON = "con"
        
    # Enum for argument type
    class ArgumentType(Enum):
        POINT = "point"
        COUNTER = "counter"
        
    # holds the extracted arguments for the debate topic
    debate_arguments = {}
    
    # holds the argument pairs data for the debate topic
    arguments = {
        'pro': [],
        'con': []
    }
    
    # Start looping through lines
    current_argument: str = ""
    start: bool = False
    current_argument_section = ArgumentSection.PRO
    current_argument_type = ArgumentType.POINT
    cur_pair = {}

    for line in lines:
        # skip to start line
        if (not start):
            if re.match(r'\s*' + start_re, line):
                start = True
                continue
            continue
        
        # special case when we reach # LITERATURE we append the last argument and return
        if re.match(r'\s*' + end_re, line):
            _append_argument_to_cur_pair(current_argument, current_argument_type, cur_pair)
            _append_cur_pair_to_arguments(current_argument_section, arguments, cur_pair)
            if len(arguments['pro']) and len(arguments['con']):
                debate_arguments[file_path] = arguments
            return debate_arguments

        # skip citations
        if re.match(r'\s*\[', line):
            continue 

        # Append an argument to current pair
        def _append_argument_to_cur_pair(current_argument: str, current_argument_type: ArgumentType, cur_pair: {}):
            if len(current_argument):
                if current_argument_type == ArgumentType.POINT:
                    cur_pair['point'] = current_argument
                else:
                    cur_pair['counter'] = current_argument

        # Append current argument pair to arguments
        def _append_cur_pair_to_arguments(current_argument_section: ArgumentSection, arguments: {}, cur_pair: {}):
            if len(cur_pair):
                if current_argument_section == ArgumentSection.PRO:
                    arguments["pro"].append(cur_pair)
                else:
                    arguments["con"].append(cur_pair)

        # case where we meet a pro point
        if re.match(r'\s*' + pro_point_re, line):
            _append_argument_to_cur_pair(current_argument, current_argument_type, cur_pair)
            _append_cur_pair_to_arguments(current_argument_section, arguments, cur_pair)
            current_argument_section = ArgumentSection.PRO
            current_argument_type = ArgumentType.POINT
            current_argument = ""
            cur_pair = {}
            continue

        # case where we meet a pro counter
        elif re.match(r'\s*' + pro_counter_re, line):
            _append_argument_to_cur_pair(current_argument, current_argument_type, cur_pair)
            current_argument_section = ArgumentSection.PRO
            current_argument_type = ArgumentType.COUNTER
            current_argument = ""
            continue

         # case where we meet a con point
        elif re.match(r'\s*' + con_point_re, line):
            _append_argument_to_cur_pair(current_argument, current_argument_type, cur_pair)
            _append_cur_pair_to_arguments(current_argument_section, arguments, cur_pair)
            current_argument_section = ArgumentSection.CON
            current_argument_type = ArgumentType.POINT
            current_argument = ""
            cur_pair = {}
            continue
            
        # case where we meet a con counter
        elif re.match(r'\s*' + con_counter_re, line):
            _append_argument_to_cur_pair(current_argument, current_argument_type, cur_pair)
            current_argument_section = ArgumentSection.CON
            current_argument_type = ArgumentType.COUNTER
            current_argument = ""
            continue
        
        # remove in-text citations
        line = re.sub(r'\[\w+\]', '', line)
        line = re.sub(r'\s\s+', '', line)
        current_argument += line.strip()
        
    # this should never actually be reached
    debate_arguments[file_path] = arguments
    return debate_arguments

#### [Category] Arguments dict

In [9]:
""" Extract all debates from a category: list_of_<category_path>_debates.txt -> <debate_topic>.txt """

def category_extract_arguments(category: Category) -> {}:
    # convert category.value to path syntax
    category_path = category.value.replace('-', '_')
    
    # try to open file from path
    try:
        with open(f'./file_paths/list_of_{category_path}_debates.txt', 'r') as file:
            file_contents = file.read()
    except FileNotFoundError:
        print(f"File not found: {f'list_of_{category_path}_debates.txt'}")
        return None
        
    # parse file contents
    debates: [] = re.split(r'\n', file_contents)
    
    # grab arguments for each debate in the category
    category_arguments = {}
    for i, debate in enumerate(debates):
        # add topic and arguments to category_arguments
        debate_arguments = debate_extract_arguments(category, debate)
        if debate_arguments:
            category_arguments.update(debate_extract_arguments(category, debate))
        else:
            _write_invalid_debate_to_file(category, debate)
    category_arguments = {f'{category.value}': category_arguments}
    return category_arguments

In [10]:
def _write_invalid_debate_to_file(category: Category, file_path: str):
    output_folder = f'../data_dump/data_valid_tally/'
    output_file_path = f'{output_folder}{category.value}.txt'
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    file = open(output_file_path, "a")
    file.write(file_path)

#### [Global] Arguments dict

In [11]:
""" Extract all debates across all categories: all_categories.txt -> list_of_<category>_debates.txt """

def global_extract_arguments() -> {}:
    # open global file from path
    with open('./file_paths/all_categories.txt', 'r') as global_file:
        global_file_contents = global_file.read()
        
    # parse file contents
    category_pattern = re.compile(r'list_of_(\w+)_debates')
    lines: [] = re.split(r'\n', global_file_contents)
    category_paths = [line for line in lines if category_pattern.search(line)]
    category_names = [category_pattern.search(category).group(1).upper() for category in category_paths]
    
    # key: category: Category.value
    # value: dictionary of dictionaries where key = topic and value is {'pro: [{'point':, 'counter':}, ...], 'con': []}
    global_arguments = {} 
    # add valid topics as keys to extracted_categories and grab their arguments
    for index, category_str in enumerate(zip(category_paths, category_names)):
        try:
            category = Category[category_str[1]]
            global_arguments.update(category_extract_arguments(category))
        except KeyError as e:
            print(f"Category: {category_str[1]}, Category not found in Category enum and is removed.")
            category_paths.pop(index)
            category_names.pop(index)
    return global_arguments

## Convert to df

#### [Write to File] Arguments df

In [12]:
""" Write arguments df to pickle file """

def _arguments_df_write_to_file(
        arguments_data: pd.DataFrame,
        category: Optional[str] = None,
        topic: Optional[str] = None
    ):

    # Debate case
    if topic and category:
        topic_path = topic.replace('-', '_')
        output_folder = f'../data_dump/arguments_dump/{category}/'
        output_file_path = f'{output_folder}{topic_path}_arguments.pkl'
    
    # Category case
    elif category:
        output_folder = f'../data_dump/arguments_dump/{category}/'
        output_file_path = f'{output_folder}{category}_arguments.pkl'
    
    # Global case
    else:
        output_folder = f'../data_dump/arguments_dump/'
        output_file_path = f'{output_folder}global_arguments.pkl'
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    arguments_data.to_pickle(output_file_path)

#### [Debate] Arguments df

In [13]:
""" Convert arguments dict into df """

def debate_convert_to_df(debate_arguments: {}, category: str) -> pd.DataFrame:
    debate_arguments_df = pd.DataFrame()
    debate_topic = next(iter(debate_arguments))

    # loop through all argument pairs in the # PRO section
    for i, pro_argument in enumerate(debate_arguments[debate_topic]["pro"]):
        point_argument = {
            'argument': pro_argument['point'],
            'pair_id': str(i),
            'type': 'point',
            'stance': 'PRO'
        }
        debate_arguments_df = pd.concat([debate_arguments_df, pd.DataFrame([point_argument])], axis=0)
        debate_arguments_df = debate_arguments_df.reset_index(drop=True)

        if 'counter' in pro_argument.keys():
            counter_argument = {
                'argument': pro_argument['counter'],
                'pair_id': str(i),
                'type': 'counter',
                'stance': 'CON'
            }
            debate_arguments_df = pd.concat([debate_arguments_df, pd.DataFrame([counter_argument])], axis=0)
            debate_arguments_df = debate_arguments_df.reset_index(drop=True)
    
    offset = len(debate_arguments[debate_topic]["pro"])
    
    # loop through all argument pairs in the # CON section
    for j, con_argument in enumerate(debate_arguments[debate_topic]["con"]):
        point_argument = {
            'argument': con_argument['point'],
            'pair_id': str(j+offset),
            'type': 'point',
            'stance': 'CON'
        }
        debate_arguments_df = pd.concat([debate_arguments_df, pd.DataFrame([point_argument])], axis=0)
        debate_arguments_df = debate_arguments_df.reset_index(drop=True)

        if 'counter' in con_argument.keys():
            counter_argument = {
                'argument': con_argument['counter'],
                'pair_id': str(j+offset),
                'type': 'counter',
                'stance': 'PRO'
            }
            debate_arguments_df = pd.concat([debate_arguments_df, pd.DataFrame([counter_argument])], axis=0)
            debate_arguments_df = debate_arguments_df.reset_index(drop=True)
            
    debate_arguments_df['topic'] = debate_topic
    debate_arguments_df = debate_arguments_df.dropna()
    _arguments_df_write_to_file(debate_arguments_df, category, debate_topic)
    return debate_arguments_df

#### [Category] Arguments df

In [14]:
""" Convert category arguments dict into df """

def category_convert_to_df(category_arguments: {}) -> pd.DataFrame:
    category_arguments_df = pd.DataFrame()
    category = next(iter(category_arguments))

    # Loop through debates in category
    debates = category_arguments[category]
    for debate in debates:
        debate_dict = category_arguments[category][debate]
        debate_df = debate_convert_to_df({debate: debate_dict}, category)
        category_arguments_df = pd.concat([category_arguments_df, debate_df], axis = 0)
        category_arguments_df = category_arguments_df.reset_index(drop=True)

    category_arguments_df['category'] = category
    category_arguments_df = category_arguments_df.dropna()
    _arguments_df_write_to_file(category_arguments_df, category)
    return category_arguments_df

#### [Global] Arguments df

In [15]:
""" Convert global arguments dict into df """

def global_convert_to_df(global_arguments: {}) -> pd.DataFrame:
    global_arguments_df = pd.DataFrame()

    # Loop through categories in global arguments
    for category in global_arguments.keys():
        global_arguments_df = pd.concat([global_arguments_df, category_convert_to_df({category: global_arguments[category]})], axis=0)
        global_arguments_df = global_arguments_df.reset_index(drop=True)

    global_arguments_df = global_arguments_df.dropna()
    _arguments_df_write_to_file(global_arguments_df)
    return global_arguments_df

## Get Embeddings

#### [Imports] Get Embeddings

In [16]:
pip install tenacity


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m23.3.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3.11 -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [17]:
import time
from tenacity import (
    retry,
    stop_after_attempt,
    wait_random_exponential
)

#### [Write] Embeddings df

In [18]:
""" Write extracted embeddings to pickle file """

def _embeddings_write_to_file(
    embeddings_data: pd.DataFrame,
    category: Optional[str] = None,
    topic: Optional[str] = None
    ):

    # Debate case
    if topic and category:
        topic_path = topic.replace('-', '_')
        output_folder = f'../data_dump/embeddings_dump/{category}/'
        output_file_path = f'{output_folder}{topic_path}_embeddings.pkl'
    
    # Category case
    elif category:
        output_folder = f'../data_dump/embeddings_dump/{category}/'
        output_file_path = f'{output_folder}{category}_embeddings.pkl'
    
    # Global case
    else:
        output_folder = f'../data_dump/embeddings_dump/'
        output_file_path = f'{output_folder}global_embeddings.pkl'
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    embeddings_data.to_pickle(output_file_path)

#### [All] Embeddings df

In [19]:
""" Convert an argument into a (1 x 1536) embedding df """

DIM_EMBEDDING = 1536

@retry(wait=wait_random_exponential(min=60, max=500), stop=stop_after_attempt(10))
def _get_embeddings(arguments: []) -> []:
    embeddings = client.embeddings.create(input=arguments, model="text-embedding-ada-002")
    embeddings_data = [embedding_data.embedding for embedding_data in embeddings.data]
    embeddings_df = pd.DataFrame(embeddings_data, columns=[f"{str(i)}" for i in range(DIM_EMBEDDING)])
    return embeddings_df.reset_index(drop=True)

In [20]:
""" Add embeddings column to a df """

API_LIMIT = 1000

def get_embeddings_df(arguments_df: pd.DataFrame, processing_unit: ProcessingUnit, debate_category: Optional[Category] = None) -> pd.DataFrame:
    embeddings_df = pd.DataFrame()
    arguments_list = list(arguments_df['argument'])
    total_len = len(arguments_list)
    i = 0

    # Grab embeddings from arguments column in chunks
    while i < total_len:
        embeddings = _get_embeddings(arguments_list[i:min(total_len, i+API_LIMIT)])
        embeddings_df = pd.concat([embeddings_df, embeddings], axis=0, ignore_index=True)
        i = i + API_LIMIT
    arguments_embeddings_df = pd.concat([arguments_df, embeddings_df], axis=1)
    
    # Write embeddings df to file
    if processing_unit == ProcessingUnit.GLOBAL:
        _embeddings_write_to_file(arguments_embeddings_df, None, None)
    elif processing_unit == ProcessingUnit.CATEGORY:
        _embeddings_write_to_file(arguments_embeddings_df, arguments_embeddings_df['category'].iloc[0], None)
    elif processing_unit == ProcessingUnit.DEBATE:
        _embeddings_write_to_file(arguments_embeddings_df, debate_category.value, arguments_embeddings_df['topic'].iloc[0])
    return arguments_embeddings_df

## Analyze Embeddings

#### [Imports] Analysis df

In [21]:
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import StandardScaler

#### [Write] Analysis df

In [22]:
""" Write analysis results to pickle file """

def _analysis_write_to_file(
    analysis_type: AnalysisType,
    processing_unit: ProcessingUnit, # Does the df contain 1 debate / 1 category / global
    analysis_data: pd.DataFrame,
    category: Optional[str] = None,
    topic: Optional[str] = None
    ):
    processing_level = processing_unit.value
    
    # Debate facet
    if topic and category:
        topic_path = topic.replace('-', '_')
        if processing_unit == ProcessingUnit.DEBATE:
            output_folder = f'../data_dump/{analysis_type.value}_dump/{category}/debates/'
        elif processing_unit == ProcessingUnit.CATEGORY:
            output_folder = f'../data_dump/{analysis_type.value}_dump/{category}/category-facet-debates/'
        elif processing_unit == ProcessingUnit.GLOBAL:
            output_folder = f'../data_dump/{analysis_type.value}_dump/{category}/global-facet-debates/'
        output_file_path = f'{output_folder}{topic_path}_{analysis_type.value}.pkl'
    
    # Category facet
    elif category:
        output_folder = f'../data_dump/{analysis_type.value}_dump/{category}/'
        if processing_unit == ProcessingUnit.CATEGORY:
            output_file_path = f'{output_folder}{category}_{analysis_type.value}.pkl'
        elif processing_unit == ProcessingUnit.GLOBAL:
            output_file_path = f'{output_folder}global_category_facet_{category}_{analysis_type.value}.pkl'
            
    # Global facet
    else:
        if processing_unit == ProcessingUnit.GLOBAL:
            output_folder = f'../data_dump/{analysis_type.value}_dump/'
            output_file_path = f'{output_folder}global_{analysis_type.value}.pkl'
        else:
            print(f"Invalid processing unit: {processing_unit}.")
   
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    analysis_data.to_pickle(output_file_path)

#### [Debate] Analysis df

In [23]:
""" TSNE argument embeddings from a df """
def tsne_embeddings(
        embeddings_df: pd.DataFrame,
        processing_unit: ProcessingUnit = None, # Does the df contain 1 debate / 1 category / global
        facet: ProcessingUnit = None,           # Analyze in groups of debate / category / global
        debate_category: Optional[str] = None
    ):
    numeric_columns = embeddings_df.select_dtypes(include=[np.number]).columns
    non_numeric_columns = embeddings_df.select_dtypes(exclude=[np.number]).columns
    embeddings_data = embeddings_df[numeric_columns].values
    tsne = TSNE(n_components=2, perplexity=len(embeddings_data) // 2, random_state=42, init='random', learning_rate=200)
    embeddings_tsne = tsne.fit_transform(embeddings_data)
    embeddings_tsne_data = (
        pd.DataFrame(embeddings_tsne, columns=['x','y'])
        .join(embeddings_df[non_numeric_columns].reset_index(drop=True))
    )
    if facet == ProcessingUnit.DEBATE:
        _analysis_write_to_file(AnalysisType.TSNE, processing_unit, embeddings_tsne_data, debate_category, embeddings_tsne_data['topic'].iloc[0])
    elif facet == ProcessingUnit.CATEGORY:
        _analysis_write_to_file(AnalysisType.TSNE, processing_unit, embeddings_tsne_data, embeddings_tsne_data['category'].iloc[0])
    elif facet == ProcessingUnit.GLOBAL:
        _analysis_write_to_file(AnalysisType.TSNE, processing_unit, embeddings_tsne_data)
    else:
        pass
    return embeddings_tsne_data

In [269]:
def pca_normalization(pair_df: pd.DataFrame) -> pd.DataFrame:
    """ Normalize PCA argument embeddings from a df
    """
    ret_df = pd.DataFrame()
    
    point_row = pair_df[pair_df['type'] == 'point']
    counter_row = pair_df[pair_df['type'] == 'counter']
    
    point_nonnum_row = point_row.select_dtypes(exclude=[np.number])
    counter_nonnum_row = counter_row.select_dtypes(exclude=[np.number])
    new_nonnum_rows = pd.concat([point_nonnum_row, counter_nonnum_row])
    new_nonnum_rows = new_nonnum_rows.reset_index(drop=True)
    
    point_vec = point_row.select_dtypes(include=[np.number]).values.flatten()
    counter_vec = counter_row.select_dtypes(include=[np.number]).values.flatten()

    center = (point_vec.copy() + counter_vec.copy()) / 2
    point_vec -= center
    counter_vec -= center
    point_vec = point_vec.flatten()
    counter_vec = counter_vec.flatten()

    point_new_num_row = pd.DataFrame(point_vec.reshape(1,-1), columns=['{}'.format(i) for i in range(len(point_vec))])
    counter_new_num_row = pd.DataFrame(counter_vec.reshape(1,-1), columns=['{}'.format(i) for i in range(len(counter_vec))])
    new_num_rows = pd.concat([point_new_num_row, counter_new_num_row])
    new_num_rows = new_num_rows.reset_index(drop=True)
    return new_nonnum_rows.join(new_num_rows)

In [305]:
def pca_preprocessing(embeddings_df: pd.DataFrame) -> pd.DataFrame:
    if len(embeddings_df) % 2 != 0:
        print("Warning: embeddings_df not in pairs")
    embeddings_ret_df = pd.DataFrame()
    topics = list(embeddings_df['topic'].unique())

    for topic in topics:
        topic_rows = embeddings_df[embeddings_df['topic'] == topic]
        if len(topic_rows) % 2 != 0:
            print(f"Warning: Topic '{topic}' has {len(topic_rows)} rows.")
        pair_ids = list(topic_rows['pair_id'].unique())
        
        for pair_id in pair_ids:
            pair_df = topic_rows[topic_rows['pair_id'] == pair_id]
            if len(pair_df) != 2:
                print(f"Warning: Pair {pair_id} at topic '{topic}' has {len(pair_df)} rows.")
                continue
            embeddings_ret_df = pd.concat([embeddings_ret_df, pca_normalization(pair_df)])
            embeddings_ret_df = embeddings_ret_df.reset_index(drop=True)
    return embeddings_ret_df
    

def doPCA(embeddings_df: pd.DataFrame, embedding, num_components = 10):
    matrix = []
    paired_embeddings_df = embeddings_df.copy()
    

    for a, b in pairs:
        center = (embedding.v(a) + embedding.v(b))/2
        matrix.append(embedding.v(a) - center)
        matrix.append(embedding.v(b) - center)
    matrix = np.array(matrix)
    pca = PCA(n_components = num_components)
    pca.fit(matrix)
    # bar(range(num_components), pca.explained_variance_ratio_)
    return pca


def pca_embeddings(
        embeddings_df: pd.DataFrame,
        num_components: int,
        processing_unit: ProcessingUnit=None,
        facet: ProcessingUnit=None,
        debate_category: Optional[str] = None
    ):
    embeddings_processed = pca_preprocessing(embeddings_df)
    numeric_columns = embeddings_processed.select_dtypes(include=[np.number]).columns
    non_numeric_columns = embeddings_processed.select_dtypes(exclude=[np.number]).columns
    embeddings_data = embeddings_processed[numeric_columns].values
    
    # scaler = StandardScaler()
    # embedding_vectors_scaled = scaler.fit_transform(embeddings_data)
    num_components = min(num_components, embeddings_data.shape[0], embeddings_data.shape[1])
    pca = PCA(n_components=num_components)
    embeddings_pca = pca.fit_transform(embeddings_data)
    embeddings_pca_data = (
        pd.DataFrame(embeddings_pca, columns=['pca_{}'.format(i) for i in range(num_components)])
        .join(embeddings_processed[non_numeric_columns].reset_index(drop=True))
    )

    # Printing PCA attributes
    components = pca.components_
    explained_variance = pca.explained_variance_
    explained_variance_ratio = pca.explained_variance_ratio_
    singular_values = pca.singular_values_
    mean_value = pca.mean_
    n_components = pca.n_components_
    n_features = pca.n_features_in_
    n_samples = pca.n_samples_
    
    # Displaying the attributes
    print("Components:")
    print(components)
    print("\nExplained Variance:")
    print(explained_variance)
    print("\nExplained Variance Ratio:")
    print(explained_variance_ratio)
    print("\nSingular Values:")
    print(singular_values)
    print("\nMean:")
    print(mean_value)
    print("\nNumber of Components:")
    print(n_components)
    print("\nNumber of Features:")
    print(n_features)
    print("\nNumber of Samples:")
    print(n_samples)

    # Write to file
    if facet == ProcessingUnit.DEBATE:
        _analysis_write_to_file(AnalysisType.PCA, processing_unit, embeddings_pca_data, debate_category, embeddings_pca_data['topic'].iloc[0])
    elif facet == ProcessingUnit.CATEGORY:
        _analysis_write_to_file(AnalysisType.PCA, processing_unit, embeddings_pca_data, embeddings_pca_data['category'].iloc[0])
    elif facet == ProcessingUnit.GLOBAL:
        _analysis_write_to_file(AnalysisType.PCA, processing_unit, embeddings_pca_data)
    else:
        pass
    return embeddings_pca_data

In [274]:
""" Analyze argument embeddings from a debate df """

def debate_analyze_embeddings(
        analysis_type: AnalysisType,
        num_components,
        debate_embeddings_df: pd.DataFrame,
        debate_category: str
    ):
    if analysis_type == AnalysisType.TSNE:
        debate_embeddings_analysis = tsne_embeddings(debate_embeddings_df, ProcessingUnit.DEBATE, ProcessingUnit.DEBATE, debate_category)
    elif analysis_type == AnalysisType.PCA:
        debate_embeddings_analysis = pca_embeddings(debate_embeddings_df, num_components, ProcessingUnit.DEBATE, ProcessingUnit.DEBATE, debate_category)
    return debate_embeddings_analysis

#### [Category] Analysis df

In [275]:
""" Analyze argument embeddings from a category df """

def category_analyze_embeddings(
        analysis_type: AnalysisType,
        num_components,
        category_embeddings_df: pd.DataFrame,
        facet: ProcessingUnit
    ):
    
    # Analyze Embeddings
    category = category_embeddings_df['category'].iloc[0]
    if analysis_type == AnalysisType.TSNE:
        if facet == ProcessingUnit.CATEGORY:
            category_embeddings_analysis = tsne_embeddings(category_embeddings_df, ProcessingUnit.CATEGORY, facet)
        elif facet == ProcessingUnit.DEBATE:
            category_embeddings_analysis = category_embeddings_df.groupby('topic').apply(lambda group: tsne_embeddings(group, ProcessingUnit.CATEGORY, facet, category))
            category_embeddings_analysis = category_embeddings_analysis.reset_index(drop=True)
        else:
            print(f"Inappropriate facet level: {facet}.")
    elif analysis_type == AnalysisType.PCA:
        if facet == ProcessingUnit.CATEGORY:
            category_embeddings_analysis = pca_embeddings(category_embeddings_df, num_components, ProcessingUnit.CATEGORY, facet)
        elif facet == ProcessingUnit.DEBATE:
            category_embeddings_analysis = category_embeddings_df.groupby('topic').apply(lambda group: pca_embeddings(group, num_components, ProcessingUnit.CATEGORY, facet, category))
            category_embeddings_analysis = category_embeddings_analysis.reset_index(drop=True)
        else:
            print(f"Inappropriate facet level: {facet}.")
    
    # Write to file for facet
    if facet == ProcessingUnit.DEBATE:
        output_folder = f'../data_dump/{analysis_type.value}_dump/{category}/'
        output_file_path = f'{output_folder}category_debate_facet_{category}_{analysis_type.value}.pkl'
        if not os.path.exists(output_folder):
            os.makedirs(output_folder)
        category_embeddings_analysis.to_pickle(output_file_path)
    return category_embeddings_analysis

#### [Global] Analysis df

In [298]:
""" Analyze argument embeddings from a global df """

def global_analyze_embeddings(
    analysis_type: AnalysisType,
    num_components: int,
    global_embeddings_df: pd.DataFrame,
    facet: ProcessingUnit
    ):
    if analysis_type == AnalysisType.TSNE:
        if facet == ProcessingUnit.GLOBAL:
            global_embeddings_analysis = tsne_embeddings(global_embeddings_df, ProcessingUnit.GLOBAL, facet)
        elif facet == ProcessingUnit.CATEGORY:
            global_embeddings_analysis = global_embeddings_df.groupby('category').apply(lambda group: tsne_embeddings(group, ProcessingUnit.GLOBAL, facet))
        elif facet == ProcessingUnit.DEBATE:
            global_embeddings_analysis = global_embeddings_df.groupby('topic').apply(lambda group: tsne_embeddings(group, ProcessingUnit.GLOBAL, facet, group['category'].iloc[0]))
    elif analysis_type == AnalysisType.PCA:
        if facet == ProcessingUnit.GLOBAL:
            global_embeddings_analysis = pca_embeddings(global_embeddings_df, num_components, ProcessingUnit.GLOBAL, facet)
        elif facet == ProcessingUnit.CATEGORY:
            global_embeddings_analysis = global_embeddings_df.groupby('category').apply(lambda group: pca_embeddings(group, num_components, ProcessingUnit.GLOBAL, facet))
        elif facet == ProcessingUnit.DEBATE:
            global_embeddings_analysis = global_embeddings_df.groupby('topic').apply(lambda group: pca_embeddings(group, num_components, ProcessingUnit.GLOBAL, facet, group['category'].iloc[0]))
    
    # Write to file for facet
    if facet != ProcessingUnit.GLOBAL:
        output_folder = f'../data_dump/{analysis_type.value}_dump/'
        output_file_path = f'{output_folder}global_{facet.value}_facet_{analysis_type.value}.pkl'
        if not os.path.exists(output_folder):
            os.makedirs(output_folder)
        global_embeddings_analysis.to_pickle(output_file_path)
    return global_embeddings_analysis

## Plot Embeddings

#### [Imports] Analysis Plot

In [28]:
from plotnine import ggplot, geom_point, geom_text, geom_line, aes, theme, theme_void, labs, element_text, facet_wrap, ggsave

#### [Debate] Analysis Plot

In [29]:
def _insert_line_breaks(text, max_width=50):
    words = text.split(' ')
    lines = []
    current_line = ''

    for word in words:
        if len(current_line) + len(word) <= max_width:
            current_line += word + ' '
        else:
            lines.append(current_line.strip())
            current_line = word + ' '

    lines.append(current_line.strip())
    return '\n'.join(lines)

In [30]:
""" Plot embeddings for a single debate """

def debate_plot(
        analysis_type: AnalysisType,
        debate_category: str,
        embeddings_analysis_data: pd.DataFrame,
        processing_unit: ProcessingUnit=ProcessingUnit.DEBATE
    ):
    
    # Plot
    stance_markers = {'PRO': '+', 'CON': '*'}
    debate_topic = embeddings_analysis_data['topic'].iloc[0]
    plot_topic = _insert_line_breaks(debate_topic.replace('-', ' '))
    plot_analysis_type = analysis_type.value.upper()
    gg = (
        ggplot(embeddings_analysis_data, aes(x='x', y='y', color='stance', shape='stance', group='pair_id')) +
        geom_point(size=2) +
        geom_line(color='black', size=0.5) +
        labs(
            title=f'{plot_analysis_type} Plot for Debate:\n{plot_topic}',
            x=f'{plot_analysis_type}_x',
            y=f'{plot_analysis_type}_y'
        ) +
        theme(
            axis_title=element_text(margin={'t': 20}),
            figure_size=(8, 8),
        )
    )
    
    # Save to file
    if processing_unit == ProcessingUnit.DEBATE:
        output_folder = f'../data_dump/{analysis_type.value}_plots_dump/{debate_category}/debate-plots/'
    elif processing_unit == ProcessingUnit.CATEGORY:
        output_folder = f'../data_dump/{analysis_type.value}_plots_dump/{debate_category}/debate-plots/category-facet-debate-plots/'
    elif processing_unit == ProcessingUnit.GLOBAL:
        output_folder = f'../data_dump/{analysis_type.value}_plots_dump/{debate_category}/debate-plots/global-facet-debate-plots/'
    output_file_path = f'{output_folder}{debate_topic}_{analysis_type.value}_plot.png'
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    ggsave(gg, output_file_path)
    print(gg)

#### [Category] Analysis Plot

In [31]:
""" Plot embeddings for debates in a category """

def category_plot(
        analysis_type: AnalysisType,
        category_plot_data: pd.DataFrame,
        processing_unit: ProcessingUnit=ProcessingUnit.CATEGORY,
        facet: ProcessingUnit=ProcessingUnit.CATEGORY,
        view: ProcessingUnit=ProcessingUnit.CATEGORY
    ):
    
    # Plot
    plot_category = category_plot_data['category'].iloc[0]
    plot_analysis_type = analysis_type.value.upper()
    category_plot_data['interaction'] = category_plot_data['pair_id'] + '_' + category_plot_data['topic']
    if view == ProcessingUnit.CATEGORY:
        gg = (
            ggplot(category_plot_data, aes(x='x', y='y', color='topic', shape='stance', group='interaction')) +
            geom_point(size=2) +
            geom_line(color='black', size=0.5) +
            labs(
                title=f'{plot_analysis_type} Plot for Category:\n{plot_category}',
                x=f'{plot_analysis_type}_x',
                y=f'{plot_analysis_type}_y'
            ) +
            theme(
                legend_position="none",
                plot_title=element_text(size=24),
                strip_text=element_text(angle=0, hjust=0.5, vjust=1, wrap=True),
                figure_size=(16, 16)
            )
        )
    elif view == ProcessingUnit.DEBATE:
        gg = (
            ggplot(category_plot_data, aes(x='x', y='y', group='interaction')) +
            facet_wrap('~topic', ncol=5, scales='free') +
            geom_point(aes(color='stance'), size=1) +
            geom_line(color='black', size=0.5) +
            labs(
                title=f'{plot_analysis_type} Plot for Category:\n{plot_category}',
                x=f'{plot_analysis_type}_x',
                y=f'{plot_analysis_type}_y'
            ) +
            theme(
                axis_title=element_text(size=16),
                plot_title=element_text(size=32),
                strip_text=element_text(angle=0, hjust=0.5, vjust=1, wrap=True),
                figure_size=(24, 24)
            )
        )
    else:
        print(f'Inappropriate view level: {facet}')
        
    # Save to file
    output_folder = f'../data_dump/{analysis_type.value}_plots_dump/{plot_category}/{view.value}-view/'
    if processing_unit == ProcessingUnit.CATEGORY:
        if facet == ProcessingUnit.CATEGORY:
            output_file_path = f'{output_folder}{plot_category}_{analysis_type.value}_plot.png'
        elif facet == ProcessingUnit.DEBATE:
            output_file_path = f'{output_folder}category_debate_facet_{plot_category}_{analysis_type.value}_plot.png'
    elif processing_unit == ProcessingUnit.GLOBAL:
        output_file_path = f'{output_folder}global_category_facet_{plot_category}_{analysis_type.value}_plot.png'
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    ggsave(gg, output_file_path)
    print(gg)

#### [Global] Analysis Plot

In [32]:
# Plot embeddings for all debates
def global_plot(
        analysis_type: AnalysisType,
        global_plot_data: pd.DataFrame,
        facet: ProcessingUnit=ProcessingUnit.GLOBAL,
        view: ProcessingUnit=ProcessingUnit.GLOBAL
    ):

    # Plot
    plot_analysis_type = analysis_type.value.upper()
    global_plot_data['interaction'] = global_plot_data['pair_id'] + '_' + global_plot_data['topic']
    if view == ProcessingUnit.GLOBAL:
        gg = (
            ggplot(global_plot_data, aes(x='x', y='y', color='category', shape='stance', group='interaction')) +
            geom_point(size=2) +
            geom_line(color='black', size=0.5) +
            labs(
                title=f'{plot_analysis_type} Plot for All Debates',
                x=f'{plot_analysis_type}_x',
                y=f'{plot_analysis_type}_y'
            ) +
            theme(
                legend_position="none",
                plot_title=element_text(size=32),
                strip_text=element_text(angle=0, hjust=0.5, vjust=1, wrap=True),
                figure_size=(24, 24)
            )
        )
    elif view == ProcessingUnit.CATEGORY:
        gg = (
            ggplot(global_plot_data.reset_index(drop=True), aes(x='x', y='y', group='interaction')) +
            facet_wrap('~category', ncol=2, scales='free') +
            geom_point(aes(color='topic'), size=1) +
            geom_line(color='black', size=0.5) +
            labs(
                title=f'{plot_analysis_type} Plot for All Categories',
                x=f'{plot_analysis_type}_x',
                y=f'{plot_analysis_type}_y'
            ) +
            theme(
                axis_title=element_text(size=16),
                plot_title=element_text(size=32),
                strip_text=element_text(angle=0, hjust=0.5, vjust=1, wrap=True),
                legend_position='none',
                figure_size=(24, 24)
            )
        )
    elif view == ProcessingUnit.DEBATE:
        print('But why? This is very very not recommended.')
        gg = (
            ggplot(global_plot_data.reset_index(drop=True), aes(x='x', y='y', group='interaction')) +
            facet_wrap('~category', scales='free') +
            facet_wrap('~topic', ncol=5, scales='free') +
            geom_point(aes(color='stance'), size=1) +
            geom_line(color='black', size=0.5) +
            labs(
                title=f'{plot_analysis_type} Plot for All Debates Across All Categories',
                x=f'{plot_analysis_type}_x',
                y=f'{plot_analysis_type}_y'
            ) +
            theme(
                axis_title=element_text(size=16),
                plot_title=element_text(size=32),
                strip_text=element_text(angle=0, hjust=0.5, vjust=1, wrap=True),
                legend_position='none',
                figure_size=(24, 24)
            )
        )

    # Save to file
    output_folder = f'../data_dump/{analysis_type.value}_plots_dump/{view.value}-view/'
    if facet == ProcessingUnit.GLOBAL:
        output_file_path = f'{output_folder}global_{analysis_type.value}_plot.png'
    elif facet == ProcessingUnit.CATEGORY:
        output_file_path = f'{output_folder}global_category_facet_{analysis_type.value}_plot.png'
    elif facet == ProcessingUnit.DEBATE:
        output_file_path = f'{output_folder}global_debate_facet_{analysis_type.value}_plot.png'
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    ggsave(gg, output_file_path)
    print(gg)

## Run

#### [Run] Extract Arguments

In [27]:
# Run debate level extract arguments
economy_debate_arguments = debate_extract_arguments(Category.ECONOMY, "business-economy-general-house-would-prohibit-retailers-selling-certain-items")
economy_debate_arguments

{'business-economy-general-house-would-prohibit-retailers-selling-certain-items': {'pro': [{'point': 'Banning loss leaders would help suppliersThe practice of loss leaders is bad for suppliers. Farmers and manufacturers are often forced by the dominant retail giants to participate in discount schemes, sharing the losses at the dictate of the retailer. If they refuse they will be dropped by the retailer and cut off from the marketplace. The American Antitrust Institute has concluded that these "Resale price maintenance (RPM)" agreements—which are agreed upon because retailers have all of the leverage—are usually illegal.1 Prohibiting loss leaders will prevent this abuse of market dominance by the big retail companies and ensure a fair deal for our farmers.1 John B. Kirkwood, Albert Foer, and Richard Burnell, “The American Antitrust Institute On the European Commission’s Proposed Block Exemption Regulation and Guidelines on Vertical Restraints,” American Antitrust Institute, September 27

In [28]:
# Run category level extract arguments
economy_category_arguments = category_extract_arguments(Category.ECONOMY)
economy_category_arguments

{'economy': {'business-economic-policy-economy-general-house-believes-national-minimum-wage': {'pro': [{'point': 'The minimum wage aids in the propagation of social justice and the fair treatment of workersBusinesses operating in a free market are concerned principally with their bottom lines. In order to increase profits, firms will seek to exploit workers, to lower wages as far as possible. This exploitation will continue indefinitely, unless the state intervenes. The state does so by implementing a minimum wage. The lowest paid workers tend to be less educated, less skilled, and less organized than higher-paid employees. This makes them the easiest to manipulate and the easiest to replace.In order to stop this outright exploitation of the most vulnerable members of society, the power of wage setting must fall to some extent within the purview of the state. Certainly, it is far better for state, which has citizens’ best interest at heart, to weigh in on the issue of setting wages tha

In [29]:
# Run global level extract arguments
global_arguments = global_extract_arguments()
global_arguments

File not found: -business-finance-health-addiction-house-would-introduce-minimum-pricing-alcohol.txt


{'economy': {'business-economic-policy-economy-general-house-believes-national-minimum-wage': {'pro': [{'point': 'The minimum wage aids in the propagation of social justice and the fair treatment of workersBusinesses operating in a free market are concerned principally with their bottom lines. In order to increase profits, firms will seek to exploit workers, to lower wages as far as possible. This exploitation will continue indefinitely, unless the state intervenes. The state does so by implementing a minimum wage. The lowest paid workers tend to be less educated, less skilled, and less organized than higher-paid employees. This makes them the easiest to manipulate and the easiest to replace.In order to stop this outright exploitation of the most vulnerable members of society, the power of wage setting must fall to some extent within the purview of the state. Certainly, it is far better for state, which has citizens’ best interest at heart, to weigh in on the issue of setting wages tha

#### [Run] Convert to DataFrame

In [30]:
# Run debate level convert to dataframe
economy_debate_arguments_df = debate_convert_to_df(economy_debate_arguments, Category.ECONOMY.value)
economy_debate_arguments_df

Unnamed: 0,argument,pair_id,type,stance,topic
0,Banning loss leaders would help suppliersThe p...,0,point,PRO,business-economy-general-house-would-prohibit-...
1,The use of loss leaders in marketing campaigns...,0,counter,CON,business-economy-general-house-would-prohibit-...
2,The use of loss leaders can have damaging soci...,1,point,PRO,business-economy-general-house-would-prohibit-...
3,"If retailers need to unload an item, it is tot...",1,counter,CON,business-economy-general-house-would-prohibit-...
4,Banning loss leaders protects consumers from p...,2,point,PRO,business-economy-general-house-would-prohibit-...
5,The use of loss leaders allows greater competi...,2,counter,CON,business-economy-general-house-would-prohibit-...
6,The prohibition of loss leaders would promote ...,3,point,PRO,business-economy-general-house-would-prohibit-...
7,The government has no right to tell business w...,4,point,CON,business-economy-general-house-would-prohibit-...
8,The government should be able to stop large re...,4,counter,PRO,business-economy-general-house-would-prohibit-...
9,Loss leaders are an inexpensive option availab...,5,point,CON,business-economy-general-house-would-prohibit-...


In [31]:
# Run category level convert to dataframe
economy_category_arguments_df = category_convert_to_df(economy_category_arguments)
economy_category_arguments_df

Unnamed: 0,argument,pair_id,type,stance,topic,category
0,The minimum wage aids in the propagation of so...,0,point,PRO,business-economic-policy-economy-general-house...,economy
1,There is no social justice in denying people t...,0,counter,CON,business-economic-policy-economy-general-house...,economy
2,The minimum wage provides a baseline minimum a...,1,point,PRO,business-economic-policy-economy-general-house...,economy
3,While it is of course socially desirable that ...,1,counter,CON,business-economic-policy-economy-general-house...,economy
4,Higher wages boost economic growthEmployees wo...,2,point,PRO,business-economic-policy-economy-general-house...,economy
...,...,...,...,...,...,...
683,Raising taxes for individuals with income over...,1,counter,CON,tax-politics-voting-obama-vs-romney-should-us-...,economy
684,A minimalist state enables a fairer and more c...,2,point,CON,tax-politics-voting-obama-vs-romney-should-us-...,economy
685,The private sector has indeed been the source ...,2,counter,PRO,tax-politics-voting-obama-vs-romney-should-us-...,economy
686,Tax cuts and spending cuts are necessary for g...,3,point,CON,tax-politics-voting-obama-vs-romney-should-us-...,economy


In [32]:
# Run global level convert to dataframe
global_arguments_df = global_convert_to_df(global_arguments)
global_arguments_df

Unnamed: 0,argument,pair_id,type,stance,topic,category
0,The minimum wage aids in the propagation of so...,0,point,PRO,business-economic-policy-economy-general-house...,economy
1,There is no social justice in denying people t...,0,counter,CON,business-economic-policy-economy-general-house...,economy
2,The minimum wage provides a baseline minimum a...,1,point,PRO,business-economic-policy-economy-general-house...,economy
3,While it is of course socially desirable that ...,1,counter,CON,business-economic-policy-economy-general-house...,economy
4,Higher wages boost economic growthEmployees wo...,2,point,PRO,business-economic-policy-economy-general-house...,economy
...,...,...,...,...,...,...
8143,The aim of sanctions does not have to be to di...,5,counter,PRO,warpeace-digital-freedoms-intellectual-propert...,digital-freedoms
8144,Sanctions won't workThe problem with sanctions...,6,point,CON,warpeace-digital-freedoms-intellectual-propert...,digital-freedoms
8145,Cooperation is not a helpful alternative as it...,6,counter,PRO,warpeace-digital-freedoms-intellectual-propert...,digital-freedoms
8146,Sanctions won't harm the hackersSanctions are ...,7,point,CON,warpeace-digital-freedoms-intellectual-propert...,digital-freedoms


#### [Load] Arguments df

In [33]:
arguments_data_path = "../data_dump/arguments_dump/"
loaded_economy_debate_arguments_df = pd.read_pickle(f"{arguments_data_path}economy/business_economy_general_house_would_prohibit_retailers_selling_certain_items_arguments.pkl")
loaded_economy_category_arguments_df = pd.read_pickle(f"{arguments_data_path}economy/economy_arguments.pkl")
loaded_global_arguments_df = pd.read_pickle(f"{arguments_data_path}global_arguments.pkl")

#### [Run] Get Embeddings

In [34]:
from IPython.display import display 

In [35]:
# Run debate level get embeddings
economy_debate_embeddings_df = get_embeddings_df(loaded_economy_debate_arguments_df, ProcessingUnit.DEBATE, Category.ECONOMY)
economy_debate_embeddings_df

Unnamed: 0,argument,pair_id,type,stance,topic,0,1,2,3,4,...,1526,1527,1528,1529,1530,1531,1532,1533,1534,1535
0,Banning loss leaders would help suppliersThe p...,0,point,PRO,business-economy-general-house-would-prohibit-...,-0.02998,-0.019565,0.009096,-0.007777,-0.02807,...,-0.010964,0.000403,0.024745,-0.002686,-0.026614,-0.012022,-0.002696,0.020307,-0.019194,0.000626
1,The use of loss leaders in marketing campaigns...,0,counter,CON,business-economy-general-house-would-prohibit-...,-0.004294,-0.007939,0.002236,0.010128,-0.01802,...,0.014044,-0.008622,0.010021,-0.011915,-0.030873,-0.03189,-0.005161,0.002398,-0.011902,-0.011072
2,The use of loss leaders can have damaging soci...,1,point,PRO,business-economy-general-house-would-prohibit-...,0.001049,-0.001917,0.011437,-0.011618,-0.028901,...,0.011357,0.010788,0.02033,-0.01658,-0.03091,-0.033856,-0.006924,-0.000388,-0.014116,-0.016205
3,"If retailers need to unload an item, it is tot...",1,counter,CON,business-economy-general-house-would-prohibit-...,-0.016631,-0.020733,0.006392,-0.006291,-0.036789,...,0.006144,-0.003387,0.015912,-0.007597,-0.01603,-0.002678,-0.009779,0.019061,-0.015325,-0.004047
4,Banning loss leaders protects consumers from p...,2,point,PRO,business-economy-general-house-would-prohibit-...,-0.020735,0.003473,0.0143,-0.018055,-0.03083,...,-0.000394,0.006813,0.021102,-0.001009,-0.02,-0.007504,-0.012851,0.009245,0.000695,-0.014463
5,The use of loss leaders allows greater competi...,2,counter,CON,business-economy-general-house-would-prohibit-...,-0.022444,-0.000251,-0.000261,0.005801,-0.025161,...,0.006893,-0.009384,0.014652,-0.00018,-0.044727,-0.029463,0.001875,0.003616,-0.007699,0.003143
6,The prohibition of loss leaders would promote ...,3,point,PRO,business-economy-general-house-would-prohibit-...,-0.018609,-0.01687,0.014069,-0.007187,-0.016113,...,0.005117,0.001803,0.038863,-0.001989,-0.032678,-0.013857,-0.001747,0.018158,-0.017414,-0.006013
7,The government has no right to tell business w...,4,point,CON,business-economy-general-house-would-prohibit-...,-0.004729,-0.011561,-0.008236,-0.010135,-0.003148,...,-0.001402,-0.00093,0.022407,-0.018723,-0.035324,-0.018122,-0.002481,-0.003594,-0.020143,-0.003208
8,The government should be able to stop large re...,4,counter,PRO,business-economy-general-house-would-prohibit-...,-0.013747,-0.0271,-0.00885,-0.010624,-0.028017,...,-0.002666,-0.008176,0.037077,-0.01158,-0.02079,-0.00887,-0.002558,0.005423,-0.009492,0.001389
9,Loss leaders are an inexpensive option availab...,5,point,CON,business-economy-general-house-would-prohibit-...,-0.01055,-0.003542,0.005745,-0.016415,-0.023989,...,0.009106,0.005977,0.027272,-0.003285,-0.045662,-0.019453,0.001561,-0.00116,-0.000119,0.000268


In [39]:
# Run category level get embeddings
loaded_economy_category_arguments_df = pd.read_pickle(f"{arguments_data_path}economy/economy_arguments.pkl")
economy_category_embeddings_df = get_embeddings_df(loaded_economy_category_arguments_df, ProcessingUnit.CATEGORY)
economy_category_embeddings_df

Unnamed: 0,argument,pair_id,type,stance,topic,category,0,1,2,3,...,1526,1527,1528,1529,1530,1531,1532,1533,1534,1535
0,The minimum wage aids in the propagation of so...,0,point,PRO,business-economic-policy-economy-general-house...,economy,-0.027132,-0.030008,0.001653,-0.041229,...,0.011771,-0.015185,0.015224,-0.004797,-0.029593,-0.013644,-0.024916,-0.005970,-0.011421,-0.008701
1,There is no social justice in denying people t...,0,counter,CON,business-economic-policy-economy-general-house...,economy,-0.009182,-0.029258,0.017485,-0.039518,...,0.012044,-0.017239,0.015701,-0.011631,-0.032979,-0.013143,-0.021646,-0.022021,-0.007043,0.002197
2,The minimum wage provides a baseline minimum a...,1,point,PRO,business-economic-policy-economy-general-house...,economy,-0.006708,-0.030957,0.011530,-0.046931,...,0.007217,-0.008666,0.009566,-0.008287,-0.033149,-0.025514,-0.013266,-0.013253,-0.013964,0.002690
3,While it is of course socially desirable that ...,1,counter,CON,business-economic-policy-economy-general-house...,economy,-0.019185,-0.037183,0.031429,-0.042576,...,0.003367,-0.004209,0.010728,-0.005238,-0.027997,-0.016553,-0.006167,-0.011831,0.006896,-0.008922
4,Higher wages boost economic growthEmployees wo...,2,point,PRO,business-economic-policy-economy-general-house...,economy,-0.027359,-0.028944,0.011335,-0.052978,...,-0.003314,-0.018941,0.002118,-0.004102,-0.034712,-0.032556,-0.015576,-0.018941,-0.001668,0.008386
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
683,Raising taxes for individuals with income over...,1,counter,CON,tax-politics-voting-obama-vs-romney-should-us-...,economy,-0.017871,-0.046446,0.008253,-0.042097,...,-0.009656,-0.000668,0.010737,-0.030209,-0.044516,-0.034944,0.016533,0.002454,-0.026208,-0.023377
684,A minimalist state enables a fairer and more c...,2,point,CON,tax-politics-voting-obama-vs-romney-should-us-...,economy,-0.024336,-0.027975,-0.005029,-0.027208,...,0.004212,-0.009634,0.008231,-0.028267,-0.057168,-0.015814,-0.007523,0.003257,-0.015192,-0.017508
685,The private sector has indeed been the source ...,2,counter,PRO,tax-politics-voting-obama-vs-romney-should-us-...,economy,0.013270,-0.023960,-0.002013,-0.031091,...,-0.003578,-0.007248,0.025244,-0.024310,-0.043953,-0.018320,-0.022314,-0.001778,-0.006311,-0.005715
686,Tax cuts and spending cuts are necessary for g...,3,point,CON,tax-politics-voting-obama-vs-romney-should-us-...,economy,-0.030544,-0.034559,0.001367,-0.019037,...,-0.000364,-0.005212,0.013514,-0.022514,-0.044373,-0.043061,-0.000453,0.003372,-0.010076,-0.022869


In [41]:
# Run global level get embeddings
global_embeddings_df = get_embeddings_df(loaded_global_arguments_df, ProcessingUnit.GLOBAL)
global_embeddings_df

Unnamed: 0,argument,pair_id,type,stance,topic,category,0,1,2,3,...,1526,1527,1528,1529,1530,1531,1532,1533,1534,1535
0,The minimum wage aids in the propagation of so...,0,point,PRO,business-economic-policy-economy-general-house...,economy,-0.027132,-0.030008,0.001653,-0.041229,...,0.011771,-0.015185,0.015224,-0.004797,-0.029593,-0.013644,-0.024916,-0.005970,-0.011421,-0.008701
1,There is no social justice in denying people t...,0,counter,CON,business-economic-policy-economy-general-house...,economy,-0.009182,-0.029258,0.017485,-0.039518,...,0.012044,-0.017239,0.015701,-0.011631,-0.032979,-0.013143,-0.021646,-0.022021,-0.007043,0.002197
2,The minimum wage provides a baseline minimum a...,1,point,PRO,business-economic-policy-economy-general-house...,economy,-0.006708,-0.030957,0.011530,-0.046931,...,0.007217,-0.008666,0.009566,-0.008287,-0.033149,-0.025514,-0.013266,-0.013253,-0.013964,0.002690
3,While it is of course socially desirable that ...,1,counter,CON,business-economic-policy-economy-general-house...,economy,-0.019185,-0.037183,0.031429,-0.042576,...,0.003367,-0.004209,0.010728,-0.005238,-0.027997,-0.016553,-0.006167,-0.011831,0.006896,-0.008922
4,Higher wages boost economic growthEmployees wo...,2,point,PRO,business-economic-policy-economy-general-house...,economy,-0.027367,-0.029056,0.011422,-0.052992,...,-0.003322,-0.018972,0.002152,-0.004051,-0.034696,-0.032565,-0.015659,-0.018946,-0.001696,0.008440
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8143,The aim of sanctions does not have to be to di...,5,counter,PRO,warpeace-digital-freedoms-intellectual-propert...,digital-freedoms,-0.000782,-0.020596,0.020907,-0.020272,...,0.025350,0.001296,0.019352,-0.003117,-0.047409,-0.004822,0.018860,0.007241,-0.011852,0.005528
8144,Sanctions won't workThe problem with sanctions...,6,point,CON,warpeace-digital-freedoms-intellectual-propert...,digital-freedoms,0.005694,-0.000011,0.024402,-0.014443,...,0.011732,0.005724,0.035923,-0.012823,-0.051239,-0.002786,0.005086,0.000617,-0.010297,0.002275
8145,Cooperation is not a helpful alternative as it...,6,counter,PRO,warpeace-digital-freedoms-intellectual-propert...,digital-freedoms,-0.011930,-0.003207,0.026672,-0.005762,...,0.033110,0.000583,0.023273,-0.002682,-0.051744,-0.009837,-0.001032,0.014102,0.004639,-0.000537
8146,Sanctions won't harm the hackersSanctions are ...,7,point,CON,warpeace-digital-freedoms-intellectual-propert...,digital-freedoms,-0.004378,-0.021077,0.035080,-0.032282,...,0.011126,-0.014214,0.021922,0.000710,-0.050284,0.011060,0.007859,-0.003920,-0.000936,0.003126


#### [Load] Embeddings df

In [62]:
embeddings_data_path = "../data_dump/embeddings_dump/"
loaded_economy_debate_embeddings_df = pd.read_pickle(f"{embeddings_data_path}economy/business_economy_general_house_would_prohibit_retailers_selling_certain_items_embeddings.pkl")
loaded_economy_category_embeddings_df = pd.read_pickle(f"{embeddings_data_path}economy/economy_embeddings.pkl")
loaded_global_embeddings_df = pd.read_pickle(f"{embeddings_data_path}global_embeddings.pkl")

In [127]:
loaded_global_embeddings_df2 = loaded_global_embeddings_df.copy()

In [253]:
pair_rows

Unnamed: 0,argument,pair_id,type,stance,topic,category,0,1,2,3,...,1526,1527,1528,1529,1530,1531,1532,1533,1534,1535
0,The minimum wage aids in the propagation of so...,0,point,PRO,business-economic-policy-economy-general-house...,economy,-0.027132,-0.030008,0.001653,-0.041229,...,0.011771,-0.015185,0.015224,-0.004797,-0.029593,-0.013644,-0.024916,-0.00597,-0.011421,-0.008701
1,There is no social justice in denying people t...,0,counter,CON,business-economic-policy-economy-general-house...,economy,-0.009182,-0.029258,0.017485,-0.039518,...,0.012044,-0.017239,0.015701,-0.011631,-0.032979,-0.013143,-0.021646,-0.022021,-0.007043,0.002197


In [139]:
matrix = pd.DataFrame()
for group_key, rows in loaded_global_embeddings_df2:
    matrix = pd.concat([matrix, rows])

In [143]:
matrix

Unnamed: 0,argument,pair_id,type,stance,topic,category,0,1,2,3,...,1526,1527,1528,1529,1530,1531,1532,1533,1534,1535
4528,It is immoral to kill animalsAs evolved human ...,0,point,PRO,-animals-environment-general-health-health-gen...,philosophy,0.003212,-0.032420,-0.004606,-0.025032,...,0.010232,-0.005767,0.023761,-0.015581,-0.009647,0.003345,0.031211,0.005361,-0.003964,-0.004685
4529,There is a great moral difference between huma...,0,counter,CON,-animals-environment-general-health-health-gen...,philosophy,0.015008,-0.034472,-0.004528,-0.023707,...,-0.016585,-0.000063,0.007585,-0.013594,-0.025172,0.017612,0.011898,-0.009732,-0.001299,-0.005523
4530,Being vegetarian helps the environmentBecoming...,1,point,PRO,-animals-environment-general-health-health-gen...,philosophy,0.004629,-0.028116,0.005930,-0.010556,...,0.016191,0.011244,0.017630,-0.006958,-0.025379,-0.011025,0.003076,0.007434,-0.020997,-0.018723
4531,You don’t have to be vegetarian to be green. M...,1,counter,CON,-animals-environment-general-health-health-gen...,philosophy,0.013005,-0.016711,0.004056,-0.021007,...,0.008958,0.016390,0.006906,-0.010741,-0.032576,-0.007836,0.004598,0.019712,-0.014313,-0.025137
4532,Vegetarianism is healthierThere are significan...,2,point,PRO,-animals-environment-general-health-health-gen...,philosophy,0.009905,-0.020305,-0.008331,-0.027183,...,0.006463,-0.005523,0.021523,-0.012062,-0.040888,-0.029315,-0.021231,-0.006688,-0.013084,-0.012678
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6096,The biggest news of the last few years in poli...,4,counter,PRO,y-political-philosophy-politics-leadership-hou...,politics,-0.000997,-0.002665,0.017683,-0.024808,...,-0.026557,-0.000764,0.018701,-0.010460,-0.040560,0.002059,-0.004421,-0.001479,-0.018714,-0.012469
6097,The Labour party is deprived of talent on the ...,5,point,CON,y-political-philosophy-politics-leadership-hou...,politics,-0.014630,-0.018148,0.014870,-0.028515,...,-0.015563,0.001228,0.010520,-0.023318,-0.051886,-0.014644,-0.010626,0.010313,-0.026236,-0.003003
6098,Far from depriving the Labour Party of talent ...,5,counter,PRO,y-political-philosophy-politics-leadership-hou...,politics,-0.017017,-0.009473,0.006900,-0.041570,...,-0.018632,-0.017778,-0.005584,-0.019984,-0.021363,0.011883,-0.000536,-0.014364,-0.024671,-0.016439
6099,Going left is step back not a step forwardLabo...,6,point,CON,y-political-philosophy-politics-leadership-hou...,politics,-0.008406,-0.007300,0.026987,-0.014520,...,-0.018733,-0.008491,0.023459,-0.002297,-0.060557,-0.010999,-0.002529,0.002926,-0.017245,0.000175


In [267]:
processed = pca_preprocessing(loaded_global_embeddings_df)



In [104]:
loaded_global_embeddings_df2.groups.items()

AttributeError: 'dict_items' object has no attribute 'next_iter'

#### [Run]  Analyze Embeddings

In [304]:
# Run debate level pca embeddings
economy_debate_pca_embeddings = debate_analyze_embeddings(AnalysisType.PCA, 10, loaded_economy_debate_embeddings_df, "economy")
economy_debate_pca_embeddings

Components:
[[ 1.59343125e-03  3.63993293e-03  2.56050544e-02 ... -8.87361176e-03
   6.96334447e-03 -1.27266757e-02]
 [ 6.89606078e-02  4.79514112e-02 -2.87017109e-02 ... -3.54197049e-02
   5.66492750e-03 -3.67043425e-02]
 [ 2.46912413e-03  5.89969481e-03  3.35253547e-03 ... -1.26423003e-02
  -1.86356026e-02  1.17630995e-02]
 ...
 [ 1.24676222e-01 -2.73827671e-01  3.85698140e-01 ...  2.28425190e-04
   5.54346337e-03 -1.37128441e-02]
 [ 1.28032374e-02 -3.08354170e-01  4.89568645e-01 ...  2.53764732e-04
   3.87792607e-03  9.80846154e-03]
 [-6.97238387e-01  2.12395205e-01 -1.09699320e-01 ...  1.35032220e-02
   4.14161754e-03  7.55203960e-03]]

Explained Variance:
[1.89962064e-02 1.40361783e-02 8.56955267e-03 8.34341821e-03
 7.27766532e-03 4.89992725e-03 3.96377486e-03 1.63959859e-64
 6.10695538e-65 2.10382932e-65]

Explained Variance Ratio:
[2.87443613e-01 2.12390290e-01 1.29671321e-01 1.26249537e-01
 1.10122956e-01 7.41438979e-02 5.99783842e-02 2.48098031e-63
 9.24082039e-64 3.18343719e-



Unnamed: 0,pca_0,pca_1,pca_2,pca_3,pca_4,pca_5,pca_6,pca_7,pca_8,pca_9,argument,pair_id,type,stance,topic
0,0.085125,-0.17887,-0.020315,0.091954,0.12676,-0.052,-0.008403,1.0977270000000001e-32,1.6681320000000002e-32,-4.459787e-33,Banning loss leaders would help suppliersThe p...,0,point,PRO,business-economy-general-house-would-prohibit-...
1,-0.085125,0.17887,0.020315,-0.091954,-0.12676,0.052,0.008403,1.0977270000000001e-32,1.6681320000000002e-32,-4.459787e-33,The use of loss leaders in marketing campaigns...,0,counter,CON,business-economy-general-house-would-prohibit-...
2,0.226949,0.161666,-0.057994,-0.065064,0.084347,-0.012937,-0.007718,2.809351e-32,-4.567994e-33,3.0580680000000003e-33,The use of loss leaders can have damaging soci...,1,point,PRO,business-economy-general-house-would-prohibit-...
3,-0.226949,-0.161666,0.057994,0.065064,-0.084347,0.012937,0.007718,2.809351e-32,-4.567994e-33,3.0580680000000003e-33,"If retailers need to unload an item, it is tot...",1,counter,CON,business-economy-general-house-would-prohibit-...
4,0.077133,-0.040998,-0.077827,0.060772,-0.003128,0.150009,0.035826,-1.1863330000000001e-33,7.261032000000001e-33,1.007764e-32,Banning loss leaders protects consumers from p...,2,point,PRO,business-economy-general-house-would-prohibit-...
5,-0.077133,0.040998,0.077827,-0.060772,0.003128,-0.150009,-0.035826,-1.1863330000000001e-33,7.261032000000001e-33,1.007764e-32,The use of loss leaders allows greater competi...,2,counter,CON,business-economy-general-house-would-prohibit-...
6,-0.025639,0.06586,0.184839,0.025666,0.100019,0.059184,0.002135,-1.083597e-32,4.928117e-34,1.075572e-33,The government has no right to tell business w...,4,point,CON,business-economy-general-house-would-prohibit-...
7,0.025639,-0.06586,-0.184839,-0.025666,-0.100019,-0.059184,-0.002135,-1.083597e-32,4.928117e-34,1.075572e-33,The government should be able to stop large re...,4,counter,PRO,business-economy-general-house-would-prohibit-...
8,-0.000738,0.004367,0.011976,-0.015456,0.010529,-0.038168,0.156014,-6.066710999999999e-48,4.032443e-48,2.460035e-48,Loss leaders are an inexpensive option availab...,5,point,CON,business-economy-general-house-would-prohibit-...
9,0.000738,-0.004367,-0.011976,0.015456,-0.010529,0.038168,-0.156014,-6.747464999999999e-48,3.3970279999999994e-48,3.0481489999999995e-48,Loss leaders do not help lower-income customer...,5,counter,PRO,business-economy-general-house-would-prohibit-...


In [306]:
# Run category level category facet pca embeddings
economy_category_pca_category_facet_embeddings = category_analyze_embeddings(AnalysisType.PCA, 10, loaded_economy_category_embeddings_df, ProcessingUnit.CATEGORY)
economy_category_pca_category_facet_embeddings

Components:
[[-0.0124327   0.03437832 -0.00195984 ... -0.016603   -0.01688542
  -0.01649074]
 [ 0.00433014  0.00625896  0.03369623 ... -0.00606261  0.00231242
   0.01602856]
 [ 0.00354325  0.02734512 -0.07079176 ...  0.01357739  0.02124806
  -0.00246227]
 ...
 [ 0.04469381 -0.02106351  0.00656056 ... -0.01851526 -0.01490981
  -0.02248237]
 [ 0.01758676  0.01916715 -0.00404357 ... -0.00498786  0.00130456
  -0.00176111]
 [ 0.02613375 -0.01669603 -0.00076227 ...  0.03594648 -0.03507808
   0.03823643]]

Explained Variance:
[0.00180183 0.00121526 0.00090107 0.00086188 0.00084223 0.00076642
 0.00076233 0.00073503 0.00072326 0.00066155]

Explained Variance Ratio:
[0.03406617 0.02297628 0.01703594 0.01629505 0.01592353 0.01449033
 0.01441297 0.01389684 0.0136743  0.0125075 ]

Singular Values:
[1.1109698  0.91238982 0.78564003 0.76836657 0.75955682 0.72456896
 0.72263218 0.70957554 0.70387117 0.67317174]

Mean:
[0. 0. 0. ... 0. 0. 0.]

Number of Components:
10

Number of Features:
1536

Number 

Unnamed: 0,pca_0,pca_1,pca_2,pca_3,pca_4,pca_5,pca_6,pca_7,pca_8,pca_9,argument,pair_id,type,stance,topic,category
0,0.048374,0.040219,-0.007474,0.000588,-0.021414,-0.012250,-0.022210,-0.006240,0.008461,-0.017969,The minimum wage aids in the propagation of so...,0,point,PRO,business-economic-policy-economy-general-house...,economy
1,-0.048374,-0.040219,0.007474,-0.000588,0.021414,0.012250,0.022210,0.006240,-0.008461,0.017969,There is no social justice in denying people t...,0,counter,CON,business-economic-policy-economy-general-house...,economy
2,0.050817,0.034848,-0.005063,0.004274,-0.020787,-0.014113,-0.006592,-0.013787,-0.025595,0.003490,The minimum wage provides a baseline minimum a...,1,point,PRO,business-economic-policy-economy-general-house...,economy
3,-0.050817,-0.034848,0.005063,-0.004274,0.020787,0.014113,0.006592,0.013787,0.025595,-0.003490,While it is of course socially desirable that ...,1,counter,CON,business-economic-policy-economy-general-house...,economy
4,0.053436,0.053737,0.018704,-0.031919,-0.028652,-0.018558,0.019467,-0.024879,0.023900,-0.018098,Higher wages boost economic growthEmployees wo...,2,point,PRO,business-economic-policy-economy-general-house...,economy
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
681,-0.056510,-0.001634,-0.032758,-0.036305,-0.047247,-0.063941,0.044251,-0.009140,-0.003532,-0.020397,Raising taxes for individuals with income over...,1,counter,CON,tax-politics-voting-obama-vs-romney-should-us-...,economy
682,-0.008147,-0.016451,0.054666,0.071642,-0.032336,-0.053379,-0.056776,-0.011292,0.022092,0.031378,A minimalist state enables a fairer and more c...,2,point,CON,tax-politics-voting-obama-vs-romney-should-us-...,economy
683,0.008147,0.016451,-0.054666,-0.071642,0.032336,0.053379,0.056776,0.011292,-0.022092,-0.031378,The private sector has indeed been the source ...,2,counter,PRO,tax-politics-voting-obama-vs-romney-should-us-...,economy
684,0.035683,-0.005989,-0.015306,0.033325,-0.022315,0.029754,-0.001891,-0.009516,0.058634,0.012259,Tax cuts and spending cuts are necessary for g...,3,point,CON,tax-politics-voting-obama-vs-romney-should-us-...,economy


In [307]:
# Run category level debate facet pca embeddings
economy_category_pca_debate_facet_embeddings = category_analyze_embeddings(AnalysisType.PCA, 10, loaded_economy_category_embeddings_df, ProcessingUnit.DEBATE)
economy_category_pca_debate_facet_embeddings

Components:
[[-1.63151950e-02 -1.13750828e-02 -4.23176517e-02 ...  2.04809518e-02
  -2.05259231e-02 -2.08044304e-03]
 [-4.57371911e-03  4.33729589e-03 -3.54248745e-02 ... -6.43920862e-04
   2.80608730e-02 -6.23872942e-02]
 [ 1.71698718e-03 -9.29590216e-03  1.77983162e-02 ... -7.93153719e-04
   3.19915138e-02 -1.24061293e-02]
 ...
 [-5.91522564e-02 -3.02390482e-02  3.54012969e-02 ...  3.36949955e-04
   3.66411382e-02 -3.81979092e-03]
 [-1.98956693e-01  2.56283198e-01 -5.57985041e-02 ... -2.93019784e-03
   1.45964702e-02 -1.68258037e-02]
 [ 1.56371453e-01 -3.89978838e-01 -5.48374878e-01 ...  1.85077896e-02
  -1.95805726e-02 -3.28249307e-02]]

Explained Variance:
[8.14341838e-03 6.57761147e-03 5.62482837e-03 5.26446829e-03
 3.87227302e-03 3.68453279e-03 3.18696732e-03 2.85673846e-03
 7.51473140e-65 3.17325525e-65]

Explained Variance Ratio:
[2.07682844e-01 1.67749831e-01 1.43450858e-01 1.34260540e-01
 9.87551709e-02 9.39672031e-02 8.12777149e-02 7.28558379e-02
 1.91649344e-63 8.09280138e-

Unnamed: 0,pca_0,pca_1,pca_2,pca_3,pca_4,pca_5,pca_6,pca_7,pca_8,pca_9,argument,pair_id,type,stance,topic,category
0,0.134796,-0.007294,-0.050090,-0.016641,1.304613e-02,1.302057e-01,-7.848280e-03,1.668649e-02,-4.401345e-33,5.297468e-33,The minimum wage aids in the propagation of so...,0,point,PRO,business-economic-policy-economy-general-house...,economy
1,-0.134796,0.007294,0.050090,0.016641,-1.304613e-02,-1.302057e-01,7.848280e-03,-1.668649e-02,-4.401345e-33,5.297468e-33,There is no social justice in denying people t...,0,counter,CON,business-economic-policy-economy-general-house...,economy
2,0.136920,-0.004810,-0.038635,0.028494,2.431729e-02,-5.532454e-02,6.350703e-02,-8.494482e-02,-1.051449e-32,5.275552e-33,The minimum wage provides a baseline minimum a...,1,point,PRO,business-economic-policy-economy-general-house...,economy
3,-0.136920,0.004810,0.038635,-0.028494,-2.431729e-02,5.532454e-02,-6.350703e-02,8.494482e-02,-1.051449e-32,5.275552e-33,While it is of course socially desirable that ...,1,counter,CON,business-economic-policy-economy-general-house...,economy
4,0.108503,0.026652,0.174231,0.033802,-2.059269e-02,-4.078354e-03,-2.518438e-02,7.436611e-03,6.784899e-33,1.309818e-32,Higher wages boost economic growthEmployees wo...,2,point,PRO,business-economic-policy-economy-general-house...,economy
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
681,-0.057765,-0.198751,-0.128245,0.006951,-3.619810e-20,1.150983e-18,1.009386e-17,1.381775e-17,,,Raising taxes for individuals with income over...,1,counter,CON,tax-politics-voting-obama-vs-romney-should-us-...,economy
682,0.240087,-0.025450,-0.019285,0.117769,2.458421e-16,-2.457973e-17,2.843569e-20,-4.619917e-21,,,A minimalist state enables a fairer and more c...,2,point,CON,tax-politics-voting-obama-vs-romney-should-us-...,economy
683,-0.240087,0.025450,0.019285,-0.117769,2.458421e-16,-2.457973e-17,2.843569e-20,-4.619917e-21,,,The private sector has indeed been the source ...,2,counter,PRO,tax-politics-voting-obama-vs-romney-should-us-...,economy
684,0.172689,-0.016959,-0.029412,-0.160574,2.602438e-17,2.321976e-16,-5.114082e-20,-6.636754e-20,,,Tax cuts and spending cuts are necessary for g...,3,point,CON,tax-politics-voting-obama-vs-romney-should-us-...,economy


In [None]:
# Run global level global facet pca embeddings
global_pca_global_facet_embeddings = global_analyze_embeddings(AnalysisType.PCA, 10, loaded_global_embeddings_df, ProcessingUnit.GLOBAL)
global_pca_global_facet_embeddings



In [None]:
# Run global level category facet pca embeddings
global_pca_category_facet_embeddings = global_analyze_embeddings(AnalysisType.PCA, 10, loaded_global_embeddings_df, ProcessingUnit.CATEGORY)
global_pca_category_facet_embeddings

In [None]:
# Run global level debate facet pca embeddings
global_pca_debate_facet_embeddings = global_analyze_embeddings(AnalysisType.PCA, 10, loaded_global_embeddings_df, ProcessingUnit.DEBATE)
global_pca_debate_facet_embeddings

#### [Load] Analysis df

In [None]:
""" tsne """
tsne_data_path = "../data_dump/tsne_dump/"

# Debate level
loaded_economy_debate_tsne_df = pd.read_pickle(f"{tsne_data_path}economy/debates/business_economy_general_house_would_prohibit_retailers_selling_certain_items_tsne.pkl")
loaded_category_debate_facet_economy_debate_tsne_df = pd.read_pickle(f"{tsne_data_path}economy/category-facet-debates/business_economy_general_house_would_prohibit_retailers_selling_certain_items_tsne.pkl")
loaded_global_debate_facet_economy_debate_tsne_df = pd.read_pickle(f"{tsne_data_path}economy/global-facet-debates/business_economy_general_house_would_prohibit_retailers_selling_certain_items_tsne.pkl")

# Category level
loaded_economy_category_tsne_df = pd.read_pickle(f"{tsne_data_path}economy/economy_tsne.pkl")
loaded_category_debate_facet_economy_category_tsne_df = pd.read_pickle(f"{tsne_data_path}economy/category_debate_facet_economy_tsne.pkl")
loaded_global_category_facet_economy_category_tsne_df = pd.read_pickle(f"{tsne_data_path}economy/global_category_facet_economy_tsne.pkl")

# Global level
loaded_global_tsne_df = pd.read_pickle(f"{tsne_data_path}global_tsne.pkl")
loaded_global_category_facet_tsne_df = pd.read_pickle(f"{tsne_data_path}global_category_facet_tsne.pkl")
loaded_global_debate_facet_tsne_df = pd.read_pickle(f"{tsne_data_path}global_debate_facet_tsne.pkl")

In [158]:
""" pca """
pca_data_path = "../data_dump/pca_dump/"

# Debate level
loaded_economy_debate_pca_df = pd.read_pickle(f"{pca_data_path}economy/debates/business_economy_general_house_would_prohibit_retailers_selling_certain_items_pca.pkl")
loaded_category_debate_facet_economy_debate_pca_df = pd.read_pickle(f"{pca_data_path}economy/category-facet-debates/business_economy_general_house_would_prohibit_retailers_selling_certain_items_pca.pkl")
loaded_global_debate_facet_economy_debate_pca_df = pd.read_pickle(f"{pca_data_path}economy/global-facet-debates/business_economy_general_house_would_prohibit_retailers_selling_certain_items_pca.pkl")

# Category level
loaded_economy_category_pca_df = pd.read_pickle(f"{pca_data_path}economy/economy_pca.pkl")
loaded_category_debate_facet_economy_category_pca_df = pd.read_pickle(f"{pca_data_path}economy/category_debate_facet_economy_pca.pkl")
loaded_global_category_facet_economy_category_pca_df = pd.read_pickle(f"{pca_data_path}economy/global_category_facet_economy_pca.pkl")

# Global level
loaded_global_pca_df = pd.read_pickle(f"{pca_data_path}global_pca.pkl")
loaded_global_category_facet_pca_df = pd.read_pickle(f"{pca_data_path}global_category_facet_pca.pkl")
loaded_global_debate_facet_pca_df = pd.read_pickle(f"{pca_data_path}global_debate_facet_pca.pkl")

In [159]:
loaded_global_debate_facet_pca_df

Unnamed: 0_level_0,Unnamed: 1_level_0,x,y,argument,pair_id,type,stance,topic,category
topic,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
-debate-media-and-good-government-house-would-regulate-press,0,-20.856565,-6.394248,The British tabloid press has proved singularl...,0,point,PRO,-debate-media-and-good-government-house-would-...,free-speech-debate
-debate-media-and-good-government-house-would-regulate-press,1,3.045570,-16.722408,It is part of the nature of journalism that it...,0,counter,CON,-debate-media-and-good-government-house-would-...,free-speech-debate
-debate-media-and-good-government-house-would-regulate-press,2,-20.536367,-19.577040,The British tabloid press isn’t so much free a...,1,point,PRO,-debate-media-and-good-government-house-would-...,free-speech-debate
-debate-media-and-good-government-house-would-regulate-press,3,5.815830,0.038995,There are already laws in place to respond to ...,1,counter,CON,-debate-media-and-good-government-house-would-...,free-speech-debate
-debate-media-and-good-government-house-would-regulate-press,4,-9.073161,28.036983,There have to be limits to the permissible lev...,2,point,PRO,-debate-media-and-good-government-house-would-...,free-speech-debate
...,...,...,...,...,...,...,...,...,...
university-philosophy-political-philosophy-minorities-house-would-use-positive,9,9.615086,10.577380,By having more students from disadvantaged bac...,4,counter,PRO,university-philosophy-political-philosophy-min...,education
university-philosophy-political-philosophy-minorities-house-would-use-positive,10,22.019246,-6.987274,Affirmative action can create social tensionsU...,5,point,CON,university-philosophy-political-philosophy-min...,education
university-philosophy-political-philosophy-minorities-house-would-use-positive,11,30.034635,-4.510198,"Social tension, especial in poor areas and min...",5,counter,PRO,university-philosophy-political-philosophy-min...,education
university-philosophy-political-philosophy-minorities-house-would-use-positive,12,13.230635,1.441838,Affirmative action will not workThe underlying...,6,point,CON,university-philosophy-political-philosophy-min...,education


In [None]:
"""
Josh: The TSNE plots are different depending on how you facet when processing the embeddings, but the PCA ones stay the same!
Didn't do some pairings, like global debate facet -> category view
"""

#### [Run] Debate Level TSNE Plots

In [None]:
# Run debate level tsne plot
debate_plot(AnalysisType.TSNE, "economy", loaded_economy_debate_tsne_df, ProcessingUnit.DEBATE)

In [None]:
# Run debate level category facet tsne plot
debate_plot(AnalysisType.TSNE, "economy", loaded_category_debate_facet_economy_debate_tsne_df, ProcessingUnit.CATEGORY)

In [None]:
# Run debate level global facet tsne plot
debate_plot(AnalysisType.TSNE, "economy", loaded_global_debate_facet_economy_debate_tsne_df, ProcessingUnit.GLOBAL)

#### [Run] Debate Level PCA Plots

In [None]:
# Run debate level pca plot
debate_plot(AnalysisType.PCA, "economy", loaded_economy_debate_pca_df, ProcessingUnit.DEBATE)

In [None]:
# Run debate level category facet pca plot
debate_plot(AnalysisType.PCA, "economy", loaded_category_debate_facet_economy_debate_pca_df, ProcessingUnit.CATEGORY)

In [None]:
# Run debate level global facet pca plot
debate_plot(AnalysisType.PCA, "economy", loaded_global_debate_facet_economy_debate_pca_df, ProcessingUnit.GLOBAL)

#### [Run] Category Level TSNE Plots -> Category View

In [None]:
# Run category level category view tsne plot
category_plot(AnalysisType.TSNE, loaded_economy_category_tsne_df, ProcessingUnit.CATEGORY, ProcessingUnit.CATEGORY, ProcessingUnit.CATEGORY)

In [None]:
# Run category level category debate facet category view tsne plot
category_plot(AnalysisType.TSNE, loaded_category_debate_facet_economy_category_tsne_df, ProcessingUnit.CATEGORY, ProcessingUnit.DEBATE, ProcessingUnit.CATEGORY)

In [None]:
# Run category level global category facet category view tsne plot
category_plot(AnalysisType.TSNE, loaded_global_category_facet_economy_category_tsne_df, ProcessingUnit.GLOBAL, ProcessingUnit.CATEGORY, ProcessingUnit.CATEGORY)

#### [Run] Category Level TSNE Plots -> Debate View

In [None]:
# Run category level debates view tsne plot
category_plot(AnalysisType.TSNE, loaded_economy_category_tsne_df, ProcessingUnit.CATEGORY, ProcessingUnit.CATEGORY, ProcessingUnit.DEBATE)

In [None]:
# Run category level category debate facet debates view tsne plot
category_plot(AnalysisType.TSNE, loaded_category_debate_facet_economy_category_tsne_df, ProcessingUnit.CATEGORY, ProcessingUnit.DEBATE, ProcessingUnit.DEBATE)

In [None]:
# Run category level global category facet debates view tsne plot
category_plot(AnalysisType.TSNE, loaded_global_category_facet_economy_category_tsne_df, ProcessingUnit.GLOBAL, ProcessingUnit.CATEGORY, ProcessingUnit.DEBATE)

#### [Run] Category Level PCA Plots -> Category View

In [None]:
# Run category level category view pca plot
category_plot(AnalysisType.PCA, loaded_economy_category_pca_df, ProcessingUnit.CATEGORY, ProcessingUnit.CATEGORY, ProcessingUnit.CATEGORY)

In [None]:
# Run category level category debate facet category view pca plot
category_plot(AnalysisType.PCA, loaded_category_debate_facet_economy_category_pca_df, ProcessingUnit.CATEGORY, ProcessingUnit.DEBATE, ProcessingUnit.CATEGORY)

In [None]:
# Run category level global category facet category view pca plot
category_plot(AnalysisType.PCA, loaded_global_category_facet_economy_category_pca_df, ProcessingUnit.GLOBAL, ProcessingUnit.CATEGORY, ProcessingUnit.CATEGORY)

#### [Run] Category Level PCA Plots -> Debate View

In [None]:
# Run category level debates view pca plot
category_plot(AnalysisType.PCA, loaded_economy_category_pca_df, ProcessingUnit.CATEGORY, ProcessingUnit.CATEGORY, ProcessingUnit.DEBATE)

In [None]:
# Run category level category debate facet debates view pca plot
category_plot(AnalysisType.PCA, loaded_category_debate_facet_economy_category_pca_df, ProcessingUnit.CATEGORY, ProcessingUnit.DEBATE, ProcessingUnit.DEBATE)

In [None]:
# Run category level global category facet debates view pca plot
category_plot(AnalysisType.PCA, loaded_global_category_facet_economy_category_pca_df, ProcessingUnit.GLOBAL, ProcessingUnit.CATEGORY, ProcessingUnit.DEBATE)

#### [Run] Global Level TSNE Plots -> Global View

In [None]:
# Run global level global view tsne plot
global_plot(AnalysisType.TSNE, loaded_global_tsne_df, ProcessingUnit.GLOBAL, ProcessingUnit.GLOBAL)

In [None]:
# Run global level category facet global view tsne plot
global_plot(AnalysisType.TSNE, loaded_global_category_facet_tsne_df, ProcessingUnit.CATEGORY, ProcessingUnit.GLOBAL)

In [None]:
# Run global level debate facet global view tsne plot
global_plot(AnalysisType.TSNE, loaded_global_debate_facet_tsne_df, ProcessingUnit.DEBATE, ProcessingUnit.GLOBAL)

#### [Run] Global Level TSNE Plots -> Category View

In [None]:
# Run global level category view tsne plot
global_plot(AnalysisType.TSNE, loaded_global_tsne_df, ProcessingUnit.GLOBAL, ProcessingUnit.CATEGORY)

In [None]:
# Run global level global category facet category view tsne plot
global_plot(AnalysisType.TSNE, loaded_global_category_facet_tsne_df, ProcessingUnit.CATEGORY, ProcessingUnit.CATEGORY)

In [None]:
# Run global level global debate facet category view tsne plot
global_plot(AnalysisType.TSNE, loaded_global_debate_facet_tsne_df, ProcessingUnit.DEBATE, ProcessingUnit.CATEGORY)

#### [Run] Global Level PCA Plots -> Global View

In [None]:
# Run global level global view pca plot
global_plot(AnalysisType.PCA, loaded_global_pca_df, ProcessingUnit.GLOBAL, ProcessingUnit.GLOBAL)

In [None]:
# Run global level global category facet global view pca plot
global_plot(AnalysisType.PCA, loaded_global_category_facet_pca_df, ProcessingUnit.CATEGORY, ProcessingUnit.GLOBAL)

In [None]:
# Run global level global debate facet global view pca plot
global_plot(AnalysisType.PCA, loaded_global_debate_facet_pca_df, ProcessingUnit.DEBATE, ProcessingUnit.GLOBAL)

#### [Run] Global Level PCA Plots -> Category View

In [None]:
# Run global level category view pca plot
global_plot(AnalysisType.PCA, loaded_global_pca_df, ProcessingUnit.GLOBAL, ProcessingUnit.CATEGORY)

In [None]:
# Run global level global category facet category view pca plot
global_plot(AnalysisType.PCA, loaded_global_category_facet_pca_df, ProcessingUnit.CATEGORY, ProcessingUnit.CATEGORY)

In [None]:
# Run global level global debate facet category view pca plot
global_plot(AnalysisType.PCA, loaded_global_debate_facet_pca_df, ProcessingUnit.DEBATE, ProcessingUnit.CATEGORY)

## Load Plots

In [None]:
# Insert file path 