# Natural Language Processing Final Assignment

## Real - Fake News Classification: A Comparison of Natural Language Models for Classification

In [1]:
# Import Libraries
import json
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import string
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.metrics import (
    classification_report,
    accuracy_score,
    precision_score,
    recall_score,
    f1_score
)
from sklearn.pipeline import make_pipeline
from sklearn.base import clone
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier



In [2]:
df_raw = pd.read_json("data/Sarcasm_Headlines_Dataset.json", lines=True)

In [3]:
df_raw.head()

Unnamed: 0,article_link,headline,is_sarcastic
0,https://www.huffingtonpost.com/entry/versace-b...,former versace store clerk sues over secret 'b...,0
1,https://www.huffingtonpost.com/entry/roseanne-...,the 'roseanne' revival catches up to our thorn...,0
2,https://local.theonion.com/mom-starting-to-fea...,mom starting to fear son's web series closest ...,1
3,https://politics.theonion.com/boehner-just-wan...,"boehner just wants wife to listen, not come up...",1
4,https://www.huffingtonpost.com/entry/jk-rowlin...,j.k. rowling wishes snape happy birthday in th...,0


In [4]:
df_raw.shape

(26709, 3)

In [5]:
df_raw.isnull().sum()

Unnamed: 0,0
article_link,0
headline,0
is_sarcastic,0


In [6]:
df_raw.headline.value_counts()

Unnamed: 0_level_0,count
headline,Unnamed: 1_level_1
the 20 funniest tweets from women this week,10
sunday roundup,10
"'no way to prevent this,' says only nation where this regularly happens",8
the funniest tweets from parents this week,6
the funniest tweets from women this week,4
...,...
man's dream to get drunk in an a-frame finally realized,1
eager understudy beginning to think john lithgow impervious to disease,1
grateful for my mom's legacy this mother's day,1
republican pick-up lines are about as creepy as you might imagine,1


In [7]:
# drop duplicates in headline
df_raw.drop_duplicates(subset=['headline'], inplace=True)

In [8]:
df_raw.article_link.value_counts()

Unnamed: 0_level_0,count
article_link,Unnamed: 1_level_1
https://www.huffingtonpost.com/entry/gourmet-gifts-for-the-foo_b_6272532.html,1
https://www.huffingtonpost.com/entry/versace-black-code_us_5861fbefe4b0de3a08f600d5,1
https://www.huffingtonpost.com/entry/roseanne-revival-review_us_5ab3a497e4b054d118e04365,1
https://local.theonion.com/mom-starting-to-fear-son-s-web-series-closest-thing-she-1819576697,1
https://politics.theonion.com/boehner-just-wants-wife-to-listen-not-come-up-with-alt-1819574302,1
...,...
https://www.huffingtonpost.com/entry/barcelona-refugee-protest_us_58aa040ce4b037d17d290230,1
https://www.huffingtonpost.com/entry/jeffrey-lord-worst-comments_us_598cd410e4b09071f6989d91,1
https://www.huffingtonpost.comhttp://www.theguardian.com/world/2016/may/31/north-korea-praises-trump-and-urges-us-voters-to-reject-dull-hillary,1
https://www.huffingtonpost.com/entry/facebook-healthcare_n_5926140.html,1


In [9]:
# drop duplicates in article_link
df_raw.drop_duplicates(subset=['article_link'], inplace=True)

In [10]:
df_raw.article_link.value_counts()

Unnamed: 0_level_0,count
article_link,Unnamed: 1_level_1
https://www.huffingtonpost.com/entry/gourmet-gifts-for-the-foo_b_6272532.html,1
https://www.huffingtonpost.com/entry/versace-black-code_us_5861fbefe4b0de3a08f600d5,1
https://www.huffingtonpost.com/entry/roseanne-revival-review_us_5ab3a497e4b054d118e04365,1
https://local.theonion.com/mom-starting-to-fear-son-s-web-series-closest-thing-she-1819576697,1
https://politics.theonion.com/boehner-just-wants-wife-to-listen-not-come-up-with-alt-1819574302,1
...,...
https://www.huffingtonpost.com/entry/barcelona-refugee-protest_us_58aa040ce4b037d17d290230,1
https://www.huffingtonpost.com/entry/jeffrey-lord-worst-comments_us_598cd410e4b09071f6989d91,1
https://www.huffingtonpost.comhttp://www.theguardian.com/world/2016/may/31/north-korea-praises-trump-and-urges-us-voters-to-reject-dull-hillary,1
https://www.huffingtonpost.com/entry/facebook-healthcare_n_5926140.html,1


### Preprocessing Function

In [11]:
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [12]:
def preprocess_text(text):

    text = text.str.lower()

    # this remove links and twitter handels, and emails etc
    text = text.str.replace(r'http\S+|www\S+|https\S+', '', regex=True)
    text = text.str.replace(r'\S+@\S+', '', regex=True)
    text = text.str.replace(r'@\w+', '', regex=True)

    text = text.str.replace(f'[{string.punctuation}]', '', regex=True)

    text = text.str.replace(r'\d+', '', regex=True)

    text = text.str.replace(r'\s+', ' ', regex=True).str.strip()

    text_tokens = text.apply(nltk.word_tokenize)

    stop_words = set(stopwords.words('english'))
    text_tokens = text_tokens.apply(lambda tokens: [w for w in tokens if w not in stop_words])

    lemmatizer = WordNetLemmatizer()
    text_tokens = text_tokens.apply(lambda tokens: [lemmatizer.lemmatize(w) for w in tokens])

    processed_text = text_tokens.apply(lambda tokens: ' '.join(tokens))

    return processed_text


In [13]:
df = df_raw.copy()

In [14]:
import nltk
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
# Download the punkt_tab resource
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

In [15]:
df["headline_clean"] = preprocess_text(df["headline"])

### Train - Test Split

In [16]:
X = df["headline_clean"]
y = df["is_sarcastic"]

In [17]:
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.30, stratify=y, random_state=7)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.50, stratify=y_temp, random_state=7)



X_train = X_train.reset_index(drop=True)
X_val = X_val.reset_index(drop=True)
X_test = X_test.reset_index(drop=True)
y_train = y_train.reset_index(drop=True)
y_val = y_val.reset_index(drop=True)
y_test = y_test.reset_index(drop=True)

In [18]:
print(X_train.shape)
print(X_val.shape)
print(X_test.shape)

(18621,)
(3990,)
(3991,)


In [19]:
# ===== NEW: Shuffled Labels Experiment =====
print("\n=== Shuffled Labels Experiment ===")

# Shuffle the training labels to destroy any real relationship
y_train_shuffled = y_train.sample(frac=1.0, random_state=42).reset_index(drop=True)

# Sanity check: make sure labels are actually shuffled
assert not all(y_train_shuffled == y_train), "Shuffling failed: labels match original"

# Create pipeline with TF-IDF + Logistic Regression
shuffled_pipeline = make_pipeline(
    TfidfVectorizer(stop_words='english', ngram_range=(1, 1)),
    LogisticRegression(penalty='l2', C=0.1, max_iter=1000)
)

print("Fitting model on shuffled labels...")
shuffled_pipeline.fit(X_train, y_train_shuffled)

print("Predicting on validation set with correct labels...")
y_val_pred_shuffled = shuffled_pipeline.predict(X_val)

print("Evaluating model trained on shuffled labels:")
def evaluate_models(y_true, y_pred_dict, label=None):
    metrics = {
        'Model': [],
        'Accuracy': [],
        'Precision': [],
        'Recall': [],
        'F1-score': []
    }
    for model_name, y_pred in y_pred_dict.items():
        print(f"\n===== {model_name}: {label if label else ''} =====")
        print("Classification Report:")
        print(classification_report(y_true, y_pred))

        metrics['Model'].append(model_name)
        metrics['Accuracy'].append(accuracy_score(y_true, y_pred))

        unique_labels = np.unique(y_true)
        if len(unique_labels) == 2:
            metrics['Precision'].append(precision_score(y_true, y_pred, average='binary', pos_label=unique_labels[1]))
            metrics['Recall'].append(recall_score(y_true, y_pred, average='binary', pos_label=unique_labels[1]))
            metrics['F1-score'].append(f1_score(y_true, y_pred, average='binary', pos_label=unique_labels[1]))
        else:
            metrics['Precision'].append(precision_score(y_true, y_pred, average='weighted'))
            metrics['Recall'].append(recall_score(y_true, y_pred, average='weighted'))
            metrics['F1-score'].append(f1_score(y_true, y_pred, average='weighted'))

    return pd.DataFrame(metrics)

evaluate_models(y_val, {"Shuffled Labels Model": y_val_pred_shuffled}, label="Validation Set")
# ===== END NEW: Shuffled Labels Experiment =====


=== Shuffled Labels Experiment ===
Fitting model on shuffled labels...
Predicting on validation set with correct labels...
Evaluating model trained on shuffled labels:

===== Shuffled Labels Model: Validation Set =====
Classification Report:
              precision    recall  f1-score   support

           0       0.56      1.00      0.72      2243
           1       0.55      0.00      0.01      1747

    accuracy                           0.56      3990
   macro avg       0.55      0.50      0.36      3990
weighted avg       0.56      0.56      0.41      3990



Unnamed: 0,Model,Accuracy,Precision,Recall,F1-score
0,Shuffled Labels Model,0.562406,0.545455,0.003434,0.006826


## Model Fitting

### Define functions for efficient model evaluation and fitting

In [20]:
def evaluate_models(y_true, y_pred_dict, label=None):
    """
    Evaluate and compare multiple models' predictions.
    Parameters:
    y_true : array-like
        True labels.
    y_pred_dict : dict
        Dictionary where keys are model names and values are predicted labels.
    label : str, optional
        Description of the evaluation set (e.g., "Validation Set").
    Returns:
    pd.DataFrame containing accuracy, precision, recall, and F1 for each model.
    """

    metrics = {
        'Model': [],
        'Accuracy': [],
        'Precision': [],
        'Recall': [],
        'F1-score': []
    }

    for model_name, y_pred in y_pred_dict.items():
        print(f"\n===== {model_name}: {label if label else ''} =====")
        print("Classification Report:")
        print(classification_report(y_true, y_pred))

        metrics['Model'].append(model_name)
        metrics['Accuracy'].append(accuracy_score(y_true, y_pred))

        unique_labels = np.unique(y_true)
        if len(unique_labels) == 2:
            metrics['Precision'].append(precision_score(y_true, y_pred, average='binary', pos_label=unique_labels[1]))
            metrics['Recall'].append(recall_score(y_true, y_pred, average='binary', pos_label=unique_labels[1]))
            metrics['F1-score'].append(f1_score(y_true, y_pred, average='binary', pos_label=unique_labels[1]))
        else:
            metrics['Precision'].append(precision_score(y_true, y_pred, average='weighted'))
            metrics['Recall'].append(recall_score(y_true, y_pred, average='weighted'))
            metrics['F1-score'].append(f1_score(y_true, y_pred, average='weighted'))

    return pd.DataFrame(metrics)


In [21]:
def fit_pipeline_predict_evaluate(
    model_class,
    model_name,
    X_train,
    y_train,
    X_val,
    y_val,
    vectorizer_type='tfidf',
    ngram_range=(1,1)
):
    if vectorizer_type == 'tfidf':
        vectorizer = TfidfVectorizer(stop_words='english', ngram_range=ngram_range)
    elif vectorizer_type == 'bow':
        vectorizer = CountVectorizer(stop_words='english', ngram_range=ngram_range)
    else:
        raise ValueError("vectorizer_type must be either 'tfidf' or 'bow'")

    pipeline = make_pipeline(vectorizer, model_class)

    print(f"Fitting {model_name}...")
    pipeline.fit(X_train, y_train)

    print(f"Predicting on training set...")
    y_pred_train = pipeline.predict(X_train)

    print(f"Predicting on validation set...")
    y_pred_val = pipeline.predict(X_val)

    print(f"Evaluating {model_name} on training set...")
    train_metrics = evaluate_models(y_train, {model_name: y_pred_train}, label="Train Set")

    print(f"Evaluating {model_name} on validation set...")
    val_metrics = evaluate_models(y_val, {model_name: y_pred_val}, label="Validation Set")

    train_metrics["Dataset"] = "Train"
    val_metrics["Dataset"] = "Validation"

    return pd.concat([train_metrics, val_metrics], ignore_index=True)

### Bag Of Words + Unigrams Vecotrizer

In [22]:
logistic_bow_uni_metrics = fit_pipeline_predict_evaluate(
    LogisticRegression(penalty='l2', C=0.1,max_iter=1000),
    "Logistic Regression (BoW)",
    X_train,
    y_train,
    X_val,
    y_val,
    vectorizer_type='bow',
    ngram_range=(1, 1),
)
print(logistic_bow_uni_metrics)

Fitting Logistic Regression (BoW)...
Predicting on training set...
Predicting on validation set...
Evaluating Logistic Regression (BoW) on training set...

===== Logistic Regression (BoW): Train Set =====
Classification Report:
              precision    recall  f1-score   support

           0       0.81      0.93      0.87     10465
           1       0.88      0.72      0.80      8156

    accuracy                           0.84     18621
   macro avg       0.85      0.83      0.83     18621
weighted avg       0.84      0.84      0.83     18621

Evaluating Logistic Regression (BoW) on validation set...

===== Logistic Regression (BoW): Validation Set =====
Classification Report:
              precision    recall  f1-score   support

           0       0.74      0.88      0.81      2243
           1       0.80      0.61      0.69      1747

    accuracy                           0.76      3990
   macro avg       0.77      0.75      0.75      3990
weighted avg       0.77      0.76    

In [23]:
bow_uni_nb_metrics = fit_pipeline_predict_evaluate(
    model_class=MultinomialNB(alpha=3),
    model_name="Multinomial Naive Bayes (BoW Unigrams)",
    X_train=X_train,
    y_train=y_train,
    X_val=X_val,
    y_val=y_val,
    vectorizer_type='bow',
    ngram_range=(1, 1)
)

print(bow_uni_nb_metrics)

Fitting Multinomial Naive Bayes (BoW Unigrams)...
Predicting on training set...
Predicting on validation set...
Evaluating Multinomial Naive Bayes (BoW Unigrams) on training set...

===== Multinomial Naive Bayes (BoW Unigrams): Train Set =====
Classification Report:
              precision    recall  f1-score   support

           0       0.87      0.93      0.90     10465
           1       0.91      0.82      0.86      8156

    accuracy                           0.89     18621
   macro avg       0.89      0.88      0.88     18621
weighted avg       0.89      0.89      0.88     18621

Evaluating Multinomial Naive Bayes (BoW Unigrams) on validation set...

===== Multinomial Naive Bayes (BoW Unigrams): Validation Set =====
Classification Report:
              precision    recall  f1-score   support

           0       0.79      0.87      0.82      2243
           1       0.80      0.70      0.75      1747

    accuracy                           0.79      3990
   macro avg       0.79   

In [24]:
bow_uni_rf_metrics = fit_pipeline_predict_evaluate(
    model_class=RandomForestClassifier(
    n_estimators=100,
    max_depth=120,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=7),
    model_name="Random Forest (BoW Unigrams)",
    X_train=X_train,
    y_train=y_train,
    X_val=X_val,
    y_val=y_val,
    vectorizer_type='bow',
    ngram_range=(1, 1)
)

print(bow_uni_rf_metrics)

Fitting Random Forest (BoW Unigrams)...
Predicting on training set...
Predicting on validation set...
Evaluating Random Forest (BoW Unigrams) on training set...

===== Random Forest (BoW Unigrams): Train Set =====
Classification Report:
              precision    recall  f1-score   support

           0       0.71      0.94      0.81     10465
           1       0.86      0.50      0.63      8156

    accuracy                           0.75     18621
   macro avg       0.78      0.72      0.72     18621
weighted avg       0.77      0.75      0.73     18621

Evaluating Random Forest (BoW Unigrams) on validation set...

===== Random Forest (BoW Unigrams): Validation Set =====
Classification Report:
              precision    recall  f1-score   support

           0       0.69      0.93      0.79      2243
           1       0.84      0.45      0.59      1747

    accuracy                           0.72      3990
   macro avg       0.76      0.69      0.69      3990
weighted avg       0.7

In [25]:
bow_uni_mlp_metrics = fit_pipeline_predict_evaluate(
    model_class=MLPClassifier(
    hidden_layer_sizes=(32,),
    activation='relu',
    solver='adam',
    alpha=0.001,
    batch_size=64,
    learning_rate='adaptive',
    max_iter=100,
    early_stopping=True,
    random_state=42
),
    model_name="MLP",
    X_train=X_train,
    y_train=y_train,
    X_val=X_val,
    y_val=y_val,
    vectorizer_type='bow',
    ngram_range=(1, 1)
)
print(bow_uni_mlp_metrics)

Fitting MLP...
Predicting on training set...
Predicting on validation set...
Evaluating MLP on training set...

===== MLP: Train Set =====
Classification Report:
              precision    recall  f1-score   support

           0       0.92      0.94      0.93     10465
           1       0.92      0.90      0.91      8156

    accuracy                           0.92     18621
   macro avg       0.92      0.92      0.92     18621
weighted avg       0.92      0.92      0.92     18621

Evaluating MLP on validation set...

===== MLP: Validation Set =====
Classification Report:
              precision    recall  f1-score   support

           0       0.80      0.85      0.82      2243
           1       0.79      0.72      0.75      1747

    accuracy                           0.79      3990
   macro avg       0.79      0.79      0.79      3990
weighted avg       0.79      0.79      0.79      3990

  Model  Accuracy  Precision    Recall  F1-score     Dataset
0   MLP  0.922024   0.922805  0

### Bag of Words + Bigrams Vecotrizer:

In [26]:
logistic_bow_bigrams_metrics = fit_pipeline_predict_evaluate(
    LogisticRegression(penalty='l2', C=0.05,max_iter=1000),
    "Logistic Regression (BoW + Bigrams)",
    X_train,
    y_train,
    X_val,
    y_val,
    vectorizer_type='bow',
    ngram_range=(1, 2)
)
print(logistic_bow_bigrams_metrics)

Fitting Logistic Regression (BoW + Bigrams)...
Predicting on training set...
Predicting on validation set...
Evaluating Logistic Regression (BoW + Bigrams) on training set...

===== Logistic Regression (BoW + Bigrams): Train Set =====
Classification Report:
              precision    recall  f1-score   support

           0       0.81      0.95      0.88     10465
           1       0.92      0.72      0.81      8156

    accuracy                           0.85     18621
   macro avg       0.87      0.83      0.84     18621
weighted avg       0.86      0.85      0.85     18621

Evaluating Logistic Regression (BoW + Bigrams) on validation set...

===== Logistic Regression (BoW + Bigrams): Validation Set =====
Classification Report:
              precision    recall  f1-score   support

           0       0.72      0.91      0.80      2243
           1       0.83      0.54      0.66      1747

    accuracy                           0.75      3990
   macro avg       0.77      0.73      0.

In [27]:
bow_bi_nb_metrics = fit_pipeline_predict_evaluate(
    model_class=MultinomialNB(alpha=10),
    model_name="Multinomial Naive Bayes (BoW Bigrams)",
    X_train=X_train,
    y_train=y_train,
    X_val=X_val,
    y_val=y_val,
    vectorizer_type='bow',
    ngram_range=(1, 2)
)

print(bow_bi_nb_metrics)

Fitting Multinomial Naive Bayes (BoW Bigrams)...
Predicting on training set...
Predicting on validation set...
Evaluating Multinomial Naive Bayes (BoW Bigrams) on training set...

===== Multinomial Naive Bayes (BoW Bigrams): Train Set =====
Classification Report:
              precision    recall  f1-score   support

           0       0.87      0.97      0.92     10465
           1       0.96      0.81      0.88      8156

    accuracy                           0.90     18621
   macro avg       0.91      0.89      0.90     18621
weighted avg       0.91      0.90      0.90     18621

Evaluating Multinomial Naive Bayes (BoW Bigrams) on validation set...

===== Multinomial Naive Bayes (BoW Bigrams): Validation Set =====
Classification Report:
              precision    recall  f1-score   support

           0       0.74      0.92      0.82      2243
           1       0.86      0.58      0.69      1747

    accuracy                           0.77      3990
   macro avg       0.80      0.

In [28]:
bow_bi_rf_metrics = fit_pipeline_predict_evaluate(
    model_class=RandomForestClassifier(
    n_estimators=100,
    max_depth=120,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=7),
    model_name="Random Forest (BoW Bigrams)",
    X_train=X_train,
    y_train=y_train,
    X_val=X_val,
    y_val=y_val,
    vectorizer_type='bow',
    ngram_range=(1, 2)
)

print(bow_bi_rf_metrics)

Fitting Random Forest (BoW Bigrams)...
Predicting on training set...
Predicting on validation set...
Evaluating Random Forest (BoW Bigrams) on training set...

===== Random Forest (BoW Bigrams): Train Set =====
Classification Report:
              precision    recall  f1-score   support

           0       0.69      0.96      0.81     10465
           1       0.90      0.45      0.60      8156

    accuracy                           0.74     18621
   macro avg       0.80      0.71      0.70     18621
weighted avg       0.78      0.74      0.72     18621

Evaluating Random Forest (BoW Bigrams) on validation set...

===== Random Forest (BoW Bigrams): Validation Set =====
Classification Report:
              precision    recall  f1-score   support

           0       0.67      0.95      0.79      2243
           1       0.87      0.40      0.55      1747

    accuracy                           0.71      3990
   macro avg       0.77      0.68      0.67      3990
weighted avg       0.76    

In [29]:
bow_bi_mlp_metrics = fit_pipeline_predict_evaluate(
    model_class=MLPClassifier(
    hidden_layer_sizes=(32,),
    activation='relu',
    solver='adam',
    alpha=0.001,
    batch_size=64,
    learning_rate='adaptive',
    max_iter=100,
    early_stopping=True,
    random_state=42
),
    model_name="MLP",
    X_train=X_train,
    y_train=y_train,
    X_val=X_val,
    y_val=y_val,
    vectorizer_type='bow',
    ngram_range=(1, 2)
)
print(bow_bi_mlp_metrics)

Fitting MLP...
Predicting on training set...
Predicting on validation set...
Evaluating MLP on training set...

===== MLP: Train Set =====
Classification Report:
              precision    recall  f1-score   support

           0       0.98      0.99      0.98     10465
           1       0.98      0.97      0.98      8156

    accuracy                           0.98     18621
   macro avg       0.98      0.98      0.98     18621
weighted avg       0.98      0.98      0.98     18621

Evaluating MLP on validation set...

===== MLP: Validation Set =====
Classification Report:
              precision    recall  f1-score   support

           0       0.80      0.87      0.83      2243
           1       0.81      0.73      0.77      1747

    accuracy                           0.81      3990
   macro avg       0.81      0.80      0.80      3990
weighted avg       0.81      0.81      0.80      3990

  Model  Accuracy  Precision    Recall  F1-score     Dataset
0   MLP  0.979002   0.981879  0

### TF-IDF Vectorizer:

In [30]:
logistic_tfidf_metrics = fit_pipeline_predict_evaluate(
    LogisticRegression(penalty='l2', C=0.1, max_iter=1000),
    "Logistic Regression",
    X_train,
    y_train,
    X_val,
    y_val,
    vectorizer_type='tfidf',
    ngram_range=(1, 1)
)
print(logistic_tfidf_metrics)

Fitting Logistic Regression...
Predicting on training set...
Predicting on validation set...
Evaluating Logistic Regression on training set...

===== Logistic Regression: Train Set =====
Classification Report:
              precision    recall  f1-score   support

           0       0.68      0.97      0.80     10465
           1       0.90      0.42      0.57      8156

    accuracy                           0.73     18621
   macro avg       0.79      0.69      0.69     18621
weighted avg       0.78      0.73      0.70     18621

Evaluating Logistic Regression on validation set...

===== Logistic Regression: Validation Set =====
Classification Report:
              precision    recall  f1-score   support

           0       0.67      0.96      0.79      2243
           1       0.88      0.38      0.53      1747

    accuracy                           0.71      3990
   macro avg       0.77      0.67      0.66      3990
weighted avg       0.76      0.71      0.68      3990

            

In [31]:
tfidf_uni_nb_metrics = fit_pipeline_predict_evaluate(
    model_class=MultinomialNB(alpha=7),
    model_name="Multinomial Naive Bayes (TF-IDF Unigrams)",
    X_train=X_train,
    y_train=y_train,
    X_val=X_val,
    y_val=y_val,
    vectorizer_type='tfidf',
    ngram_range=(1, 1)
)

print(tfidf_uni_nb_metrics)

Fitting Multinomial Naive Bayes (TF-IDF Unigrams)...
Predicting on training set...
Predicting on validation set...
Evaluating Multinomial Naive Bayes (TF-IDF Unigrams) on training set...

===== Multinomial Naive Bayes (TF-IDF Unigrams): Train Set =====
Classification Report:
              precision    recall  f1-score   support

           0       0.72      0.99      0.83     10465
           1       0.97      0.50      0.66      8156

    accuracy                           0.77     18621
   macro avg       0.84      0.74      0.74     18621
weighted avg       0.83      0.77      0.75     18621

Evaluating Multinomial Naive Bayes (TF-IDF Unigrams) on validation set...

===== Multinomial Naive Bayes (TF-IDF Unigrams): Validation Set =====
Classification Report:
              precision    recall  f1-score   support

           0       0.67      0.97      0.79      2243
           1       0.92      0.37      0.53      1747

    accuracy                           0.71      3990
   macro av

In [32]:
tfidf_uni_rf_metrics = fit_pipeline_predict_evaluate(
    model_class=RandomForestClassifier(
    n_estimators=100,
    max_depth=120,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=7),
    model_name="Random Forest (TF-IDF Unigrams)",
    X_train=X_train,
    y_train=y_train,
    X_val=X_val,
    y_val=y_val,
    vectorizer_type='tfidf',
    ngram_range=(1, 1)
)

print(tfidf_uni_rf_metrics)

Fitting Random Forest (TF-IDF Unigrams)...
Predicting on training set...
Predicting on validation set...
Evaluating Random Forest (TF-IDF Unigrams) on training set...

===== Random Forest (TF-IDF Unigrams): Train Set =====
Classification Report:
              precision    recall  f1-score   support

           0       0.72      0.93      0.81     10465
           1       0.86      0.53      0.65      8156

    accuracy                           0.76     18621
   macro avg       0.79      0.73      0.73     18621
weighted avg       0.78      0.76      0.74     18621

Evaluating Random Forest (TF-IDF Unigrams) on validation set...

===== Random Forest (TF-IDF Unigrams): Validation Set =====
Classification Report:
              precision    recall  f1-score   support

           0       0.69      0.93      0.79      2243
           1       0.83      0.46      0.59      1747

    accuracy                           0.72      3990
   macro avg       0.76      0.69      0.69      3990
weighte

In [33]:
tfidf_mlp_metrics = fit_pipeline_predict_evaluate(
    model_class=MLPClassifier(
    hidden_layer_sizes=(32,),
    activation='relu',
    solver='adam',
    alpha=0.001,
    batch_size=64,
    learning_rate='adaptive',
    max_iter=100,
    early_stopping=True,
    random_state=42
),
    model_name="MLP",
    X_train=X_train,
    y_train=y_train,
    X_val=X_val,
    y_val=y_val,
    vectorizer_type='tfidf',
    ngram_range=(1, 1)
)
print(tfidf_mlp_metrics)

Fitting MLP...
Predicting on training set...
Predicting on validation set...
Evaluating MLP on training set...

===== MLP: Train Set =====
Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.96      0.95     10465
           1       0.95      0.93      0.94      8156

    accuracy                           0.95     18621
   macro avg       0.95      0.94      0.95     18621
weighted avg       0.95      0.95      0.95     18621

Evaluating MLP on validation set...

===== MLP: Validation Set =====
Classification Report:
              precision    recall  f1-score   support

           0       0.80      0.83      0.82      2243
           1       0.77      0.74      0.75      1747

    accuracy                           0.79      3990
   macro avg       0.79      0.78      0.78      3990
weighted avg       0.79      0.79      0.79      3990

  Model  Accuracy  Precision    Recall  F1-score     Dataset
0   MLP  0.946566   0.947507  0