In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


pd.set_option("display.max_rows", 50)
pd.set_option("display.max_columns", 50)
pd.set_option("display.max_colwidth", 1000)


### Reading the dataset


In [None]:
files = [
    "data/yahoo-news-annotated-comments-dataset/ydata-ynacc-v1_0_expert_annotations.tsv",
    "data/yahoo-news-annotated-comments-dataset/ydata-ynacc-v1_0_turk_annotations.tsv",
]

datasets = []
for file in files:
    datasets.append(pd.read_csv(file, sep="\t"))

yahoo_df = pd.concat(datasets, axis=0, ignore_index=True)


In [None]:
print(yahoo_df.shape)
yahoo_df.head()


### Building the comment threads at each comment level in the dataset


In [None]:
import string

import contractions
from num2words import num2words


def numbers_to_words(text: str) -> str:
    t = text.split()
    for ind, word in enumerate(t):
        if all(c.isdigit() for c in word):
            t[ind] = num2words(word)
        elif (
            len(word) > 2
            and all(c.isdigit() for c in word[:-2])
            and word[-2:] in ["st", "nd", "rd", "th"]
        ):
            t[ind] = num2words(int(word[:-2]), to="ordinal")

    return " ".join(t)


def build_comment_thread(row: pd.Series) -> str:
    if not row["text"]:
        return ""
    if row["text"][-1] not in string.punctuation:
        row["text"] += "."

    if row["commentindex"] != 0:
        parent_df = yahoo_df[yahoo_df.commentid == row["parentid"]]
        if parent_df.shape[0] == 0:
            return f"{row['headline']}. {row['text']}"
        else:
            return f"{parent_df.iloc[0].thread}. {row['text']}"
    else:
        return row["text"]


yahoo_df["text"] = (
    yahoo_df["text"]
    .str.replace(r"[^\w\s]+", "", regex=True)
    .str.replace(r"\s+", " ", regex=True)
    .str.replace("..", ".", regex=False)
    .str.strip()
    .str.lower()
    .apply(lambda x: contractions.fix(x, slang=False))
    .apply(numbers_to_words)
    .astype(str)
)
yahoo_df = yahoo_df.sort_values(by=["commentindex"])
yahoo_df["thread"] = ""
for index, row in yahoo_df.iterrows():
    yahoo_df.at[index, "thread"] = build_comment_thread(row)


### Splitting the dataset


In [None]:
with open(
    "data/yahoo-news-annotated-comments-dataset/ydata-ynacc-v1_0_train-ids.txt"
) as f:
    train_ids = [int(x) for x in f.read().splitlines()]

with open(
    "data/yahoo-news-annotated-comments-dataset/ydata-ynacc-v1_0_dev-ids.txt"
) as f:
    dev_ids = [int(x) for x in f.read().splitlines()]

with open(
    "data/yahoo-news-annotated-comments-dataset/ydata-ynacc-v1_0_test-ids.txt"
) as f:
    test_ids = [int(x) for x in f.read().splitlines()]

train_df = yahoo_df[yahoo_df["sdid"].isin(train_ids)].copy(deep=True)
dev_df = yahoo_df[yahoo_df["sdid"].isin(dev_ids)].copy(deep=True)
test_df = yahoo_df[yahoo_df["sdid"].isin(test_ids)].copy(deep=True)


### Visualizing the dataset


In [None]:
cols = [
    ("persuasiveness", "Persuasiveness"),
    ("constructiveclass", "Constructiveness"),
    ("intendedaudience", "Intended Audience"),
]
for column_name, display_name in cols:
    data = (
        train_df.fillna("Unlabeled")
        .groupby(["commentindex", column_name])["sdid"]
        .count()
        .reset_index()
    )
    data["commentindex"] += 1
    data = pd.pivot_table(
        data, values="sdid", index=["commentindex"], columns=[column_name]
    )
    data = data.sort_values(by="commentindex", ascending=False)

    fig, ax = plt.subplots(figsize=(12, 5))
    data.plot(kind="barh", stacked=True, ax=ax)
    ax.set(ylabel="Comment Indices", xlabel="Count")
    ax.set_title(f"Comment Index Frequencies by {display_name.capitalize()}")
plt.show()


In [None]:
cols = ["tone", "topic"]
for column_name in cols:
    unique_values = list(yahoo_df[column_name].str.split(",").explode().unique())
    unique_values.remove(np.NaN)
    unique_values.remove("NA")

    for value in unique_values:
        data = yahoo_df.fillna("Unlabeled")
        data[value] = yahoo_df[column_name].str.contains(value)

        data = (
            data.fillna("Unlabeled")
            .groupby(["commentindex", value])["sdid"]
            .count()
            .reset_index()
        )
        data["commentindex"] += 1
        data = pd.pivot_table(
            data, values="sdid", index=["commentindex"], columns=[value]
        )
        data = data.sort_values(by="commentindex", ascending=False)

        fig, ax = plt.subplots(figsize=(12, 5))
        data.plot(kind="barh", stacked=True, ax=ax)
        ax.set(ylabel="Comment Indices", xlabel="Count")
        ax.set_title(f"Comment Index Frequencies by {value.capitalize()}")
plt.show()


### Baseline Models


In [None]:
import gensim
import gensim.downloader as api
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score

In [None]:
for df in [train_df, dev_df, test_df]:
    df["controversial"] = (
        df["tone"].str.contains("controversial", case=False).fillna(False).astype(str)
    )
    df["mean"] = df["tone"].str.contains("mean", case=False).fillna(False).astype(str)
    df["sarcastic"] = (
        df["tone"].str.contains("sarcastic", case=False).fillna(False).astype(str)
    )


Bag of Words

In [None]:
baseline_scores = pd.DataFrame(columns=["Dataset", "Dev F1", "Test F1"])

for target_col, display_name in [
    ("persuasiveness", "Persuasiveness"),
    ("constructiveclass", "Constructiveness"),
    ("intendedaudience", "Intended Audience"),
    ("controversial", "Controversial"),
    ("mean", "Mean"),
    ("sarcastic", "Sarcastic"),
]:
    vectorizer = CountVectorizer()
    train_X = vectorizer.fit_transform(train_df["text"])
    dev_X = vectorizer.transform(dev_df["text"])
    test_X = vectorizer.transform(test_df["text"])

    train_y = train_df[target_col]
    dev_y = dev_df[target_col]
    test_y = test_df[target_col]

    train_X = train_X[train_y.notna()]
    train_y = train_y[train_y.notna()]

    dev_X = dev_X[dev_y.notna()]
    dev_y = dev_y[dev_y.notna()]

    test_X = test_X[test_y.notna()]
    test_y = test_y[test_y.notna()]

    clf = LogisticRegression(max_iter=10000)
    clf.fit(train_X, train_y)

    pred_dev_y = clf.predict(dev_X)
    pred_test_y = clf.predict(test_X)

    baseline_scores.loc[len(baseline_scores.index)] = [
        display_name,
        f1_score(dev_y, pred_dev_y, average="weighted"),
        f1_score(test_y, pred_test_y, average="weighted"),
    ]


baseline_scores.round(2)


TF-IDF

In [None]:
baseline_scores = pd.DataFrame(columns=["Dataset", "Dev F1", "Test F1"])

for target_col, display_name in [
    ("persuasiveness", "Persuasiveness"),
    ("constructiveclass", "Constructiveness"),
    ("intendedaudience", "Intended Audience"),
    ("controversial", "Controversial"),
    ("mean", "Mean"),
    ("sarcastic", "Sarcastic"),
]:
    vectorizer = TfidfVectorizer()
    train_X = vectorizer.fit_transform(train_df["text"])
    dev_X = vectorizer.transform(dev_df["text"])
    test_X = vectorizer.transform(test_df["text"])

    train_y = train_df[target_col]
    dev_y = dev_df[target_col]
    test_y = test_df[target_col]

    train_X = train_X[train_y.notna()]
    train_y = train_y[train_y.notna()]

    dev_X = dev_X[dev_y.notna()]
    dev_y = dev_y[dev_y.notna()]

    test_X = test_X[test_y.notna()]
    test_y = test_y[test_y.notna()]

    clf = LogisticRegression(max_iter=10000)
    clf.fit(train_X, train_y)

    pred_dev_y = clf.predict(dev_X)
    pred_test_y = clf.predict(test_X)

    baseline_scores.loc[len(baseline_scores.index)] = [
        display_name,
        f1_score(dev_y, pred_dev_y, average="weighted"),
        f1_score(test_y, pred_test_y, average="weighted"),
    ]


baseline_scores.round(2)


Word Embeddings

In [None]:
w2v_model = api.load("word2vec-google-news-300")


def get_average_w2v_vector(row):
    words = row["text"].split()

    avg_text_w2v = np.sum([w2v_model[w] for w in words if w in w2v_model], axis=0) / (
        len(words) if words else 1
    )

    if avg_text_w2v.shape != (300,):
        avg_text_w2v = np.zeros((300,))

    return avg_text_w2v


baseline_scores = pd.DataFrame(columns=["Dataset", "Dev F1", "Test F1"])

for target_col, display_name in [
    ("persuasiveness", "Persuasiveness"),
    ("constructiveclass", "Constructiveness"),
    ("intendedaudience", "Intended Audience"),
    ("controversial", "Controversial"),
    ("mean", "Mean"),
    ("sarcastic", "Sarcastic"),
]:
    train_X = train_df.apply(
        lambda x: get_average_w2v_vector(x), axis=1, result_type="expand"
    )
    dev_X = dev_df.apply(
        lambda x: get_average_w2v_vector(x), axis=1, result_type="expand"
    )
    test_X = test_df.apply(
        lambda x: get_average_w2v_vector(x), axis=1, result_type="expand"
    )

    train_y = train_df[target_col]
    dev_y = dev_df[target_col]
    test_y = test_df[target_col]

    train_X = train_X[train_y.notna()]
    train_y = train_y[train_y.notna()]

    dev_X = dev_X[dev_y.notna()]
    dev_y = dev_y[dev_y.notna()]

    test_X = test_X[test_y.notna()]
    test_y = test_y[test_y.notna()]

    clf = LogisticRegression(max_iter=10000)
    clf.fit(train_X, train_y)

    pred_dev_y = clf.predict(dev_X)
    pred_test_y = clf.predict(test_X)

    baseline_scores.loc[len(baseline_scores.index)] = [
        display_name,
        f1_score(dev_y, pred_dev_y, average="weighted"),
        f1_score(test_y, pred_test_y, average="weighted"),
    ]


baseline_scores.round(2)


### Baseline Model with Extractive Summary Features


In [None]:
from sumy.nlp.stemmers import Stemmer
from sumy.nlp.tokenizers import Tokenizer
from sumy.parsers.plaintext import PlaintextParser
from sumy.summarizers.lex_rank import LexRankSummarizer
from sumy.summarizers.lsa import LsaSummarizer
from sumy.summarizers.luhn import LuhnSummarizer
from sumy.summarizers.text_rank import TextRankSummarizer


In [None]:
class Summarizer:
    def __init__(self) -> None:
        self.language = "english"
        self.num_sentences = 3
        self.tokenizer = Tokenizer(self.language)
        self.stemmer = Stemmer(self.language)

    def summarize(self, thread: str) -> str:
        parser = PlaintextParser.from_string(thread, self.tokenizer)
        return "".join(
            (x._text for x in self.summarizer(parser.document, self.num_sentences))
        )


class Luhn(Summarizer):
    def __init__(self) -> None:
        super().__init__()
        self.summarizer = LuhnSummarizer()

    def __str__(self):
        return "Luhn"


class LSA(Summarizer):
    def __init__(self) -> None:
        super().__init__()
        self.summarizer = LsaSummarizer()

    def __str__(self):
        return "LSA"


class LexRank(Summarizer):
    def __init__(self) -> None:
        super().__init__()
        self.summarizer = LexRankSummarizer()

    def __str__(self):
        return "LexRank"


class TextRank(Summarizer):
    def __init__(self) -> None:
        super().__init__()
        self.summarizer = TextRankSummarizer()

    def __str__(self):
        return "TextRank"


summarizer_models = [Luhn(), LSA(), LexRank(), TextRank()]


Bag of Words

In [None]:
baseline_scores = pd.DataFrame(columns=["Summarizer", "Target", "Dev F1", "Test F1"])


for summarizer in summarizer_models:
    train_df["summary"] = train_df["thread"].apply(summarizer.summarize)
    dev_df["summary"] = dev_df["thread"].apply(summarizer.summarize)
    test_df["summary"] = test_df["thread"].apply(summarizer.summarize)

    for target_col, display_name in [
        ("persuasiveness", "Persuasiveness"),
        ("constructiveclass", "Constructiveness"),
        ("intendedaudience", "Intended Audience"),
        ("controversial", "Controversial"),
        ("mean", "Mean"),
        ("sarcastic", "Sarcastic"),
    ]:


        vectorizer = CountVectorizer()
        train_X = vectorizer.fit_transform(train_df["summary"])
        dev_X = vectorizer.transform(dev_df["summary"])
        test_X = vectorizer.transform(test_df["summary"])

        train_y = train_df[target_col]
        dev_y = dev_df[target_col]
        test_y = test_df[target_col]

        train_X = train_X[train_y.notna()]
        train_y = train_y[train_y.notna()]

        dev_X = dev_X[dev_y.notna()]
        dev_y = dev_y[dev_y.notna()]

        test_X = test_X[test_y.notna()]
        test_y = test_y[test_y.notna()]

        clf = LogisticRegression(max_iter=10000)
        clf.fit(train_X, train_y)

        pred_dev_y = clf.predict(dev_X)
        pred_test_y = clf.predict(test_X)

        baseline_scores.loc[len(baseline_scores.index)] = [
            str(summarizer),
            display_name,
            f1_score(dev_y, pred_dev_y, average="weighted"),
            f1_score(test_y, pred_test_y, average="weighted"),
        ]

    train_df = train_df.drop(["summary"], axis=1)
    dev_df = dev_df.drop(["summary"], axis=1)
    test_df = test_df.drop(["summary"], axis=1)


baseline_scores.round(2)


TF-IDF

In [None]:
baseline_scores = pd.DataFrame(columns=["Summarizer", "Target", "Dev F1", "Test F1"])


for summarizer in summarizer_models:
    train_df["summary"] = train_df["thread"].apply(summarizer.summarize)
    dev_df["summary"] = dev_df["thread"].apply(summarizer.summarize)
    test_df["summary"] = test_df["thread"].apply(summarizer.summarize)

    for target_col, display_name in [
        ("persuasiveness", "Persuasiveness"),
        ("constructiveclass", "Constructiveness"),
        ("intendedaudience", "Intended Audience"),
        ("controversial", "Controversial"),
        ("mean", "Mean"),
        ("sarcastic", "Sarcastic"),
    ]:


        vectorizer = TfidfVectorizer()
        train_X = vectorizer.fit_transform(train_df["summary"])
        dev_X = vectorizer.transform(dev_df["summary"])
        test_X = vectorizer.transform(test_df["summary"])

        train_y = train_df[target_col]
        dev_y = dev_df[target_col]
        test_y = test_df[target_col]

        train_X = train_X[train_y.notna()]
        train_y = train_y[train_y.notna()]

        dev_X = dev_X[dev_y.notna()]
        dev_y = dev_y[dev_y.notna()]

        test_X = test_X[test_y.notna()]
        test_y = test_y[test_y.notna()]

        clf = LogisticRegression(max_iter=10000)
        clf.fit(train_X, train_y)

        pred_dev_y = clf.predict(dev_X)
        pred_test_y = clf.predict(test_X)

        baseline_scores.loc[len(baseline_scores.index)] = [
            str(summarizer),
            display_name,
            f1_score(dev_y, pred_dev_y, average="weighted"),
            f1_score(test_y, pred_test_y, average="weighted"),
        ]

    train_df = train_df.drop(["summary"], axis=1)
    dev_df = dev_df.drop(["summary"], axis=1)
    test_df = test_df.drop(["summary"], axis=1)


baseline_scores.round(2)


In [None]:
w2v_model = api.load("word2vec-google-news-300")


def get_average_w2v_vector_thread(row):
    words = row["text"].split()
    summary_words = row["summary"].split()

    avg_text_w2v = np.sum([w2v_model[w] for w in words if w in w2v_model], axis=0) / (
        len(words) if words else 1
    )

    if avg_text_w2v.shape != (300,):
        avg_text_w2v = np.zeros((300,))

    avg_thread_w2v = np.sum(
        [w2v_model[w] for w in summary_words if w in w2v_model], axis=0
    ) / (len(summary_words) if summary_words else 1)

    if avg_thread_w2v.shape != (300,):
        avg_thread_w2v = np.zeros((300,))

    embedding = np.concatenate((avg_text_w2v, avg_thread_w2v), axis=None)
    return embedding


baseline_scores = pd.DataFrame(columns=["Summarizer", "Target", "Dev F1", "Test F1"])


for summarizer in summarizer_models:
    train_df["summary"] = train_df["thread"].apply(summarizer.summarize)
    dev_df["summary"] = dev_df["thread"].apply(summarizer.summarize)
    test_df["summary"] = test_df["thread"].apply(summarizer.summarize)

    for target_col, display_name in [
        ("persuasiveness", "Persuasiveness"),
        ("constructiveclass", "Constructiveness"),
        ("intendedaudience", "Intended Audience"),
        ("controversial", "Controversial"),
        ("mean", "Mean"),
        ("sarcastic", "Sarcastic"),
    ]:
        train_X = train_df.apply(
            lambda x: get_average_w2v_vector_thread(x), axis=1, result_type="expand"
        )
        dev_X = dev_df.apply(
            lambda x: get_average_w2v_vector_thread(x), axis=1, result_type="expand"
        )
        test_X = test_df.apply(
            lambda x: get_average_w2v_vector_thread(x), axis=1, result_type="expand"
        )

        train_y = train_df[target_col]
        dev_y = dev_df[target_col]
        test_y = test_df[target_col]

        train_X = train_X[train_y.notna()]
        train_y = train_y[train_y.notna()]

        dev_X = dev_X[dev_y.notna()]
        dev_y = dev_y[dev_y.notna()]

        test_X = test_X[test_y.notna()]
        test_y = test_y[test_y.notna()]

        clf = LogisticRegression(max_iter=10000)
        clf.fit(train_X, train_y)

        pred_dev_y = clf.predict(dev_X)
        pred_test_y = clf.predict(test_X)

        baseline_scores.loc[len(baseline_scores.index)] = [
            str(summarizer),
            display_name,
            f1_score(dev_y, pred_dev_y, average="weighted"),
            f1_score(test_y, pred_test_y, average="weighted"),
        ]

    train_df = train_df.drop(["summary"], axis=1)
    dev_df = dev_df.drop(["summary"], axis=1)
    test_df = test_df.drop(["summary"], axis=1)


baseline_scores.round(2)


The results above show essentially no changes aside from a small improvement for constructiveness. Constructiveness does have the most well-defined labels but maybe we need more complex features or models?


### Models with sentence embedding features


In [None]:
from sentence_transformers import SentenceTransformer

sbert_model = SentenceTransformer("stsb-distilroberta-base-v2")

train_sentence_embeddings = {}
dev_entence_embeddings = {}
test_sentence_embeddings = {}

train_sentence_embeddings["Base"] = sbert_model.encode(train_df["text"].to_list())
dev_entence_embeddings["Base"] = sbert_model.encode(dev_df["text"].to_list())
test_sentence_embeddings["Base"] = sbert_model.encode(test_df["text"].to_list())


In [None]:
for summarizer in summarizer_models:
    train_sentence_embeddings[str(summarizer)] = sbert_model.encode(
        train_df["thread"].apply(summarizer.summarize).to_list()
    )
    dev_entence_embeddings[str(summarizer)] = sbert_model.encode(
        dev_df["thread"].apply(summarizer.summarize).to_list()
    )
    test_sentence_embeddings[str(summarizer)] = sbert_model.encode(
        test_df["thread"].apply(summarizer.summarize).to_list()
    )


In [None]:
baseline_scores = pd.DataFrame(columns=["Dataset", "Dev F1", "Test F1"])


for target_col, display_name in [
    ("persuasiveness", "Persuasiveness"),
    ("constructiveclass", "Constructiveness"),
    ("intendedaudience", "Intended Audience"),
    ("controversial", "Controversial"),
    ("mean", "Mean"),
    ("sarcastic", "Sarcastic"),
]:
    train_X = train_sentence_embeddings["Base"].copy()
    dev_X = dev_entence_embeddings["Base"].copy()
    test_X = test_sentence_embeddings["Base"].copy()

    train_y = train_df[target_col]
    dev_y = dev_df[target_col]
    test_y = test_df[target_col]

    train_X = train_X[train_y.notna()]
    train_y = train_y[train_y.notna()]

    dev_X = dev_X[dev_y.notna()]
    dev_y = dev_y[dev_y.notna()]

    test_X = test_X[test_y.notna()]
    test_y = test_y[test_y.notna()]

    clf = LogisticRegression(max_iter=10000)
    clf.fit(train_X, train_y)

    pred_dev_y = clf.predict(dev_X)
    pred_test_y = clf.predict(test_X)

    baseline_scores.loc[len(baseline_scores.index)] = [
        display_name,
        f1_score(dev_y, pred_dev_y, average="weighted"),
        f1_score(test_y, pred_test_y, average="weighted"),
    ]


baseline_scores.round(2)


In [None]:
baseline_scores = pd.DataFrame(columns=["Summarizer", "Target", "Dev F1", "Test F1"])

for summarizer in summarizer_models:
    for target_col, display_name in [
        ("persuasiveness", "Persuasiveness"),
        ("constructiveclass", "Constructiveness"),
        ("intendedaudience", "Intended Audience"),
        ("controversial", "Controversial"),
        ("mean", "Mean"),
        ("sarcastic", "Sarcastic"),
    ]:
        train_X = np.concatenate(
            (
                train_sentence_embeddings["Base"],
                train_sentence_embeddings[str(summarizer)],
            ),
            axis=1,
        )
        dev_X = np.concatenate(
            (dev_entence_embeddings["Base"], dev_entence_embeddings[str(summarizer)]),
            axis=1,
        )
        test_X = np.concatenate(
            (
                test_sentence_embeddings["Base"],
                test_sentence_embeddings[str(summarizer)],
            ),
            axis=1,
        )

        train_y = train_df[target_col]
        dev_y = dev_df[target_col]
        test_y = test_df[target_col]

        train_X = train_X[train_y.notna()]
        train_y = train_y[train_y.notna()]

        dev_X = dev_X[dev_y.notna()]
        dev_y = dev_y[dev_y.notna()]

        test_X = test_X[test_y.notna()]
        test_y = test_y[test_y.notna()]

        clf = LogisticRegression(max_iter=10000)
        clf.fit(train_X, train_y)

        pred_dev_y = clf.predict(dev_X)
        pred_test_y = clf.predict(test_X)

        baseline_scores.loc[len(baseline_scores.index)] = [
            str(summarizer),
            display_name,
            f1_score(dev_y, pred_dev_y, average="weighted"),
            f1_score(test_y, pred_test_y, average="weighted"),
        ]

baseline_scores.round(2)


### Trying out a better classification models with sentence embeddings features


In [None]:
from xgboost import XGBClassifier
from sklearn.preprocessing import LabelEncoder


In [None]:
baseline_scores = pd.DataFrame(columns=["Dataset", "Dev F1", "Test F1"])

enc = LabelEncoder()

for target_col, display_name in [
    ("persuasiveness", "Persuasiveness"),
    ("constructiveclass", "Constructiveness"),
    ("intendedaudience", "Intended Audience"),
    ("controversial", "Controversial"),
    ("mean", "Mean"),
    ("sarcastic", "Sarcastic"),
]:
    train_X = train_sentence_embeddings["Base"].copy()
    dev_X = dev_entence_embeddings["Base"].copy()
    test_X = test_sentence_embeddings["Base"].copy()

    train_y = train_df[target_col].copy()
    dev_y = dev_df[target_col].copy()
    test_y = test_df[target_col].copy()

    train_X = train_X[train_y.notna()]
    train_y = enc.fit_transform(train_y[train_y.notna()])

    dev_X = dev_X[dev_y.notna()]
    dev_y = enc.transform(dev_y[dev_y.notna()])

    test_X = test_X[test_y.notna()]
    test_y = enc.transform(test_y[test_y.notna()])

    clf = XGBClassifier()
    clf.fit(train_X, train_y)

    pred_dev_y = clf.predict(dev_X)
    pred_test_y = clf.predict(test_X)

    baseline_scores.loc[len(baseline_scores.index)] = [
        display_name,
        f1_score(dev_y, pred_dev_y, average="weighted"),
        f1_score(test_y, pred_test_y, average="weighted"),
    ]


baseline_scores.round(2)


In [None]:
baseline_scores = pd.DataFrame(columns=["Summarizer", "Target", "Dev F1", "Test F1"])

for summarizer in summarizer_models:
    for target_col, display_name in [
        ("persuasiveness", "Persuasiveness"),
        ("constructiveclass", "Constructiveness"),
        ("intendedaudience", "Intended Audience"),
        ("controversial", "Controversial"),
        ("mean", "Mean"),
        ("sarcastic", "Sarcastic"),
    ]:
        train_X = np.concatenate(
            (
                train_sentence_embeddings["Base"],
                train_sentence_embeddings[str(summarizer)],
            ),
            axis=1,
        )
        dev_X = np.concatenate(
            (dev_entence_embeddings["Base"], dev_entence_embeddings[str(summarizer)]),
            axis=1,
        )
        test_X = np.concatenate(
            (
                test_sentence_embeddings["Base"],
                test_sentence_embeddings[str(summarizer)],
            ),
            axis=1,
        )

        train_y = train_df[target_col]
        dev_y = dev_df[target_col]
        test_y = test_df[target_col]

        train_X = train_X[train_y.notna()]
        train_y = enc.fit_transform(train_y[train_y.notna()])

        dev_X = dev_X[dev_y.notna()]
        dev_y = enc.transform(dev_y[dev_y.notna()])

        test_X = test_X[test_y.notna()]
        test_y = enc.transform(test_y[test_y.notna()])

        clf = XGBClassifier()
        clf.fit(train_X, train_y)

        pred_dev_y = clf.predict(dev_X)
        pred_test_y = clf.predict(test_X)

        baseline_scores.loc[len(baseline_scores.index)] = [
            str(summarizer),
            display_name,
            f1_score(dev_y, pred_dev_y, average="weighted"),
            f1_score(test_y, pred_test_y, average="weighted"),
        ]

baseline_scores.round(2)
