# Baseline Assessment

After applying the few "state of the art" tools and gathering their predictions on golden-standard datasets (essays & myPersonality), We'll assess result's accuracy.


### Organize The Results

Combine predictions from various tools and assess

First, we manually combined the results to speadsheets, against the true labels.
The assessment spreadsheets are available here:

- [Essays dataset](https://github.com/eliranshemtov/Musical-Preferences-And-Textual-Expression/blob/main/analysis/tools-baseline/essays-combined-predictions.xlsx)
- [MyPersonality dataset](https://github.com/eliranshemtov/Musical-Preferences-And-Textual-Expression/blob/main/analysis/tools-baseline/myPersonality-combined-predictions.xlsx)
- [MyPersonality concatenated dataset](https://github.com/eliranshemtov/Musical-Preferences-And-Textual-Expression/blob/main/analysis/tools-baseline/myPersonality-concatenated-combined-predictions.xlsx)


### Accuracy Score

Accuracy is one metric for evaluating classification models. Informally, accuracy is the fraction of predictions our model got right. Formally, accuracy has the following definition:

<math xmlns="http://www.w3.org/1998/Math/MathML" display="block">
  <mtext>Accuracy</mtext>
  <mo>=</mo>
  <mfrac>
    <mtext>Number of correct predictions</mtext>
    <mtext>Total number of predictions</mtext>
  </mfrac>
</math>

For binary classification, accuracy can also be calculated in terms of positives and negatives as follows:
<math xmlns="http://www.w3.org/1998/Math/MathML" display="block">
<mtext>Accuracy</mtext>
<mo>=</mo>
<mfrac>
<mrow>
<mi>T</mi>
<mi>P</mi>
<mo>+</mo>
<mi>T</mi>
<mi>N</mi>
</mrow>
<mrow>
<mi>T</mi>
<mi>P</mi>
<mo>+</mo>
<mi>T</mi>
<mi>N</mi>
<mo>+</mo>
<mi>F</mi>
<mi>P</mi>
<mo>+</mo>
<mi>F</mi>
<mi>N</mi>
</mrow>
</mfrac>
</math>

Where TP = True Positives, TN = True Negatives, FP = False Positives, and FN = False Negatives.


In [1]:
from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np
from dataclasses import dataclass

# Example:
y_pred = [0, 2, 1, 3]
y_true = [0, 1, 2, 3]
accuracy_score(y_true, y_pred)

0.5

#### **Accuracy Score** per trait (out of the Big Five), per tool (out of the 3 we used)


In [2]:
###############################
### Normalization Functions ###
###############################


def convert_yn_to_01(scores: pd.DataFrame) -> pd.DataFrame:
    """
    Used by True Labels (Convert Y/N labels to 0/1)
    """
    return np.where(scores == "y", 1, 0)


def normalize_scores_by_half_threshold(scores: pd.DataFrame) -> pd.DataFrame:
    """
    Used by Tool #3, #1 & #4
    """
    return np.where(scores >= 0.5, 1, 0)


def re_normalize_tool_1(column_df: pd.DataFrame) -> pd.DataFrame:
    """
    The original normalization logic done by the actual tool is as follows and does not yield meaningful results because the prediction for OPN and NEU are constantly the max and min values.
        def original_tool_normalization():
            min_value = min(pred_sOPN, pred_sCON, pred_sEXT, pred_sAGR, pred_sNEU)
            max_value = max(pred_sOPN, pred_sCON, pred_sEXT, pred_sAGR, pred_sNEU)

            scaled_min = 0.05
            scaled_max = 0.95

            pred_sOPN_normalized = (pred_sOPN - min_value) / (max_value - min_value) * (scaled_max - scaled_min) + scaled_min  # Always scores to 0.95
            pred_sCON_normalized = (pred_sCON - min_value) / (max_value - min_value) * (scaled_max - scaled_min) + scaled_min
            pred_sEXT_normalized = (pred_sEXT - min_value) / (max_value - min_value) * (scaled_max - scaled_min) + scaled_min
            pred_sAGR_normalized = (pred_sAGR - min_value) / (max_value - min_value) * (scaled_max - scaled_min) + scaled_min
            pred_sNEU_normalized = (pred_sNEU - min_value) / (max_value - min_value) * (scaled_max - scaled_min) + scaled_min  # Always scores to 0.5
    """
    scaled_min = 0.05
    scaled_max = 0.95

    min_value = column_df.min()
    max_value = column_df.max()

    return scaled_min + (column_df - min_value) / (max_value - min_value) * (
        scaled_max - scaled_min
    )

In [3]:
######################################
### Init PredictionLoader per tool ###
######################################


@dataclass
class PredictionsLoader:
    columns: list[str]
    normalization_functions: list[callable]
    predictions: pd.DataFrame = None
    accuracy: list[float] = None

    def load(self, df: pd.DataFrame) -> None:
        self.predictions = pd.DataFrame()
        for col in self.columns:
            for norm_func in self.normalization_functions:
                df[col] = norm_func(df[col])
            self.predictions = pd.concat([self.predictions, df[col]], axis=1)

    def calc_accuracy(self, truth: pd.DataFrame) -> None:
        self.accuracy = []
        for i in range(len(self.columns)):
            self.accuracy.append(
                accuracy_score(
                    truth.predictions[truth.columns[i]],
                    self.predictions[self.columns[i]],
                )
            )


truth = PredictionsLoader(
    ["cEXT", "cNEU", "cAGR", "cCON", "cOPN"], [convert_yn_to_01])

tool1 = PredictionsLoader(
    ["pred_sEXT", "pred_sNEU", "pred_sAGR", "pred_sCON", "pred_sOPN"],
    [re_normalize_tool_1, normalize_scores_by_half_threshold],
)

tool3 = PredictionsLoader(
    [
        "BIG5_Extraversion",
        "BIG5_Neuroticism",
        "BIG5_Agreeableness",
        "BIG5_Conscientiousness",
        "BIG5_Openness",
    ],
    [normalize_scores_by_half_threshold],
)

tool4 = PredictionsLoader(
    [
        "cEXT_prediction",
        "cNEU_prediction",
        "cAGR_prediction",
        "cCON_prediction",
        "cOPN_prediction",
    ],
    [normalize_scores_by_half_threshold],
)

loaders = [truth, tool1, tool3, tool4]

In [4]:
def load_and_measure_accuracy(input_file_path: str, output_file_path: str):
    df = pd.read_excel(input_file_path, header=1)
    for loader in loaders:
        loader.load(df)
        loader.calc_accuracy(truth)
        print("Accuracy:", loader.accuracy)
    csv_headers = ["Tool"] + truth.columns
    tool1.accuracy.insert(0, "Tool #1")
    tool3.accuracy.insert(0, "Tool #3")
    tool4.accuracy.insert(0, "Tool #4")
    pd.DataFrame(
        [tool1.accuracy, tool3.accuracy, tool4.accuracy], columns=csv_headers
    ).to_csv(output_file_path, index=False)
    print("Accuracy results saved to", output_file_path)

#### Load tool's predictions & measure accuracy - Essays


In [5]:
load_and_measure_accuracy(
    "./analysis/tools-baseline/essays-combined-predictions.xlsx",
    "./analysis/tools-baseline/essays-accuracy.csv",
)

Accuracy: [1.0, 1.0, 1.0, 1.0, 1.0]
Accuracy: [0.5214748784440842, 0.5040518638573744, 0.5072933549432739, 0.5113452188006483, 0.5672609400324149]
Accuracy: [0.4959481361426256, 0.549837925445705, 0.5101296596434359, 0.5388978930307942, 0.5575364667747164]
Accuracy: [0.5145867098865479, 0.5113452188006483, 0.5121555915721232, 0.5324149108589952, 0.5178282009724473]
Accuracy results saved to ./analysis/tools-baseline/essays-accuracy.csv


In [6]:
df = pd.read_excel(
    "./analysis/tools-baseline/essays-combined-predictions.xlsx", header=1
)

result = {k: [] for k in truth.columns}
for i in range(len(tool1.predictions)):
    for col in range(5):
        val = (
            tool1.predictions[tool1.columns[col]][i]
            + tool3.predictions[tool3.columns[col]][i]
            + tool4.predictions[tool4.columns[col]][i]
        )
        result[truth.columns[col]].append(1 if val > 1 else 0)

print("Majority Vote!")
for col in range(5):
    print(
        truth.columns[col],
        accuracy_score(
            truth.predictions[truth.columns[col]], result[truth.columns[col]]
        ),
    )

Majority Vote!
cEXT 0.5190437601296597
cNEU 0.5243111831442464
cAGR 0.5178282009724473
cCON 0.5478119935170178
cOPN 0.5611831442463533


#### Load tool's predictions & measure accuracy - myPersonality


In [7]:
load_and_measure_accuracy(
    "./analysis/tools-baseline/myPersonality-concatenated-combined-predictions.xlsx",
    "./analysis/tools-baseline/myPersonality-accuracy.csv",
)

Accuracy: [1.0, 1.0, 1.0, 1.0, 1.0]
Accuracy: [0.652, 0.812, 0.608, 0.688, 0.844]
Accuracy: [0.596, 0.56, 0.568, 0.572, 0.544]
Accuracy: [0.424, 0.396, 0.524, 0.448, 0.684]
Accuracy results saved to ./analysis/tools-baseline/myPersonality-accuracy.csv


In [8]:
df = pd.read_excel(
    "./analysis/tools-baseline/myPersonality-concatenated-combined-predictions.xlsx",
    header=1,
)

result = {k: [] for k in truth.columns}
for i in range(len(tool1.predictions)):
    for col in range(5):
        val = (
            tool1.predictions[tool1.columns[col]][i]
            + tool3.predictions[tool3.columns[col]][i]
            + tool4.predictions[tool4.columns[col]][i]
        )
        result[truth.columns[col]].append(1 if val > 1 else 0)

print("Majority Vote!")
for col in range(5):
    print(
        truth.columns[col],
        accuracy_score(
            truth.predictions[truth.columns[col]], result[truth.columns[col]]
        ),
    )

Majority Vote!
cEXT 0.624
cNEU 0.644
cAGR 0.596
cCON 0.612
cOPN 0.78
