# Packages Install and Loading

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Imports

In [None]:
!pip install transformers nltk
!pip install tensorflow

# Standard library imports
import os

# Third-party imports
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import spearmanr, kendalltau
from tqdm import tqdm

# NLTK imports
import nltk
from nltk.corpus import stopwords
from nltk.sentiment import SentimentIntensityAnalyzer
from nltk.stem import WordNetLemmatizer

# TensorFlow and Keras imports
import tensorflow as tf
from tensorflow.keras import layers, backend as K
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.layers import (Input, Layer, Embedding, Bidirectional, LSTM, Dense, Dropout,
                                     Concatenate, GlobalAveragePooling1D, BatchNormalization,
                                     LayerNormalization)
from tensorflow.keras.models import Model, load_model, Sequential
from tensorflow.keras.optimizers import RMSprop, Adam
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.mixed_precision import global_policy
from tensorflow.keras.regularizers import l2

# Sklearn imports
from sklearn.model_selection import train_test_split
from sklearn.utils import resample
from sklearn.metrics import (mean_absolute_error, mean_squared_error, accuracy_score,
                             precision_score, recall_score, f1_score)

# Transformers imports
from transformers import pipeline, BertTokenizer, TFAutoModelForSequenceClassification, AutoTokenizer, TFBertForSequenceClassification

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download('vader_lexicon')



[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


True

In [None]:
# Set up GPU device
physical_devices = tf.config.list_physical_devices('GPU')
if len(physical_devices) > 0:
    print('GPU available')
    tf.config.experimental.set_memory_growth(physical_devices[0], True)

GPU available


# Utils

## Correlation Code

In [None]:
def plot_correlation_matrix(df, *columns):
    if not all(column in df for column in columns):
        raise ValueError("One or more specified columns do not exist in the DataFrame.")
    data_temp = df[list(columns)]
    correlation_matrix = data_temp.corr()

    # Plot correlation matrix as a heatmap
    plt.figure(figsize=(8, 6))
    sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f", linewidths=0.5)
    plt.title('Correlation Matrix')
    plt.show()

## Evaluation Code

In [None]:
def convert_to_discrete(continuous_values):
  discrete_scores = [1 if x <= 0.2 else
                    2 if x <= 0.4 else
                    3 if x <= 0.6 else
                    4 if x <= 0.8 else
                    5 for x in continuous_values]
  return discrete_scores

In [None]:
def calculate_metrics(column_one, column_two):
  # Multiclass accuracy
  accuracy = accuracy_score(column_one, column_two)
  print(f"Multiclass Accuracy: {accuracy:.3f}")

  # Weighted Accuracy: Absolute difference scaled inversely by the max error (4 in this case, e.g., 1 vs. 5)
  weighted_accuracies = 1 - (np.abs(column_one - column_two) / 4)
  mean_accuracy = np.mean(weighted_accuracies)
  print(f"Weighted Mutliclass Accuracy: {mean_accuracy:.3f}")

  # Spearman's rank correlation
  spearman_corr, _ = spearmanr(column_one, column_two)
  print(f"Spearman's Rank Correlation: {spearman_corr:.3f}")

  # Kendall's tau
  kendall_corr, _ = kendalltau(column_one, column_two)
  print(f"Kendall's tau: {kendall_corr:.3f}")

In [None]:
def evaluate_model(model, X_val, y_val_sentiment, y_val_sarcasm, X_test, y_test_sentiment, y_test_sarcasm):
    # Predictions for the validation set
    val_predictions = model.predict(X_val)
    val_sentiment_predictions = val_predictions[0]
    val_sarcasm_predictions = val_predictions[1]

    # Predictions for the test set
    test_predictions = model.predict(X_test)
    test_sentiment_predictions = test_predictions[0]
    test_sarcasm_predictions = test_predictions[1]

    val_sarcasm_labels = (val_sarcasm_predictions > 0.5).astype(int)
    test_sarcasm_labels = (test_sarcasm_predictions > 0.5).astype(int)

    # Metrics for sentiment analysis
    val_mse = mean_squared_error(y_val_sentiment, val_sentiment_predictions)
    val_mae = mean_absolute_error(y_val_sentiment, val_sentiment_predictions)
    test_mse = mean_squared_error(y_test_sentiment, test_sentiment_predictions)
    test_mae = mean_absolute_error(y_test_sentiment, test_sentiment_predictions)

    # Metrics for sarcasm detection
    val_accuracy = accuracy_score(y_val_sarcasm, val_sarcasm_labels)
    val_precision = precision_score(y_val_sarcasm, val_sarcasm_labels)
    val_recall = recall_score(y_val_sarcasm, val_sarcasm_labels)
    val_f1 = f1_score(y_val_sarcasm, val_sarcasm_labels)
    test_accuracy = accuracy_score(y_test_sarcasm, test_sarcasm_labels)
    test_precision = precision_score(y_test_sarcasm, test_sarcasm_labels)
    test_recall = recall_score(y_test_sarcasm, test_sarcasm_labels)
    test_f1 = f1_score(y_test_sarcasm, test_sarcasm_labels)

    print("Validation - Sentiment Analysis: MSE =", val_mse, ", MAE =", val_mae)
    print("Validation - Sarcasm Detection: Accuracy =", val_accuracy, ", Precision =", val_precision, ", Recall =", val_recall, ", F1 Score =", val_f1)
    print("Test - Sentiment Analysis: MSE =", test_mse, ", MAE =", test_mae)
    print("Test - Sarcasm Detection: Accuracy =", test_accuracy, ", Precision =", test_precision, ", Recall =", test_recall, ", F1 Score =", test_f1)

    return {
        'val': {
            'sentiment': {'mse': val_mse, 'mae': val_mae},
            'sarcasm': {'accuracy': val_accuracy, 'precision': val_precision, 'recall': val_recall, 'f1': val_f1}
        },
        'test': {
            'sentiment': {'mse': test_mse, 'mae': test_mae},
            'sarcasm': {'accuracy': test_accuracy, 'precision': test_precision, 'recall': test_recall, 'f1': test_f1}
        }
    }

## Multi-Head Attention

In [None]:
class MultiHeadAttention(Layer):
    def __init__(self, num_heads, model_dim, **kwargs):
        super(MultiHeadAttention, self).__init__(**kwargs)
        self.num_heads = num_heads
        self.model_dim = model_dim
        assert model_dim % num_heads == 0, "model_dim must be divisible by num_heads"
        self.depth = model_dim // num_heads

    def build(self, input_shape):
        self.Wq = self.add_weight(shape=(input_shape[-1], self.model_dim),
                                  initializer='glorot_uniform', trainable=True, name='query_weight')
        self.Wk = self.add_weight(shape=(input_shape[-1], self.model_dim),
                                  initializer='glorot_uniform', trainable=True, name='key_weight')
        self.Wv = self.add_weight(shape=(input_shape[-1], self.model_dim),
                                  initializer='glorot_uniform', trainable=True, name='value_weight')
        self.dense = Dense(self.model_dim)
        super(MultiHeadAttention, self).build(input_shape)

    def split_heads(self, x, batch_size):
        x = K.reshape(x, (batch_size, -1, self.num_heads, self.depth))
        return K.permute_dimensions(x, pattern=(0, 2, 1, 3))

    def call(self, inputs):
        batch_size = tf.shape(inputs)[0]

        query = K.dot(inputs, self.Wq)
        key = K.dot(inputs, self.Wk)
        value = K.dot(inputs, self.Wv)

        query = self.split_heads(query, batch_size)
        key = self.split_heads(key, batch_size)
        value = self.split_heads(value, batch_size)

        attention, weights = self.scaled_dot_product_attention(query, key, value)
        attention = K.permute_dimensions(attention, pattern=(0, 2, 1, 3))
        concat_attention = K.reshape(attention, (batch_size, -1, self.model_dim))

        output = self.dense(concat_attention)
        return output

    def scaled_dot_product_attention(self, query, key, value):
        matmul_qk = tf.matmul(query, key, transpose_b=True)
        dk = tf.cast(tf.shape(key)[-1], dtype=global_policy().compute_dtype)

        scaled_attention_logits = matmul_qk / tf.math.sqrt(dk)
        attention_weights = tf.nn.softmax(scaled_attention_logits, axis=-1)
        output = tf.matmul(attention_weights, value)
        return output, attention_weights

# Preparing the Yelp Dataset

### Data Loading

In [None]:
file_path = '/content/drive/MyDrive/Yelp Dataset/Reviews.csv'
# file_path = '/content/drive/MyDrive/WPI/DS595_NLP/Final Project/Reviews.csv'
yelp_df = pd.read_csv(file_path)
print(yelp_df.head())

   Id   ProductId          UserId                      ProfileName  \
0   1  B001E4KFG0  A3SGXH7AUHU8GW                       delmartian   
1   2  B00813GRG4  A1D87F6ZCVE5NK                           dll pa   
2   3  B000LQOCH0   ABXLMWJIXXAIN  Natalia Corres "Natalia Corres"   
3   4  B000UA0QIQ  A395BORC6FGVXV                             Karl   
4   5  B006K2ZZ7K  A1UQRSCLF8GW1T    Michael D. Bigham "M. Wassir"   

   HelpfulnessNumerator  HelpfulnessDenominator  Score        Time  \
0                     1                       1      5  1303862400   
1                     0                       0      1  1346976000   
2                     1                       1      4  1219017600   
3                     3                       3      2  1307923200   
4                     0                       0      5  1350777600   

                 Summary                                               Text  
0  Good Quality Dog Food  I have bought several of the Vitality canned d...  
1 

### Dataset Balancing (Undersampling)

In [None]:
# Analyzing Score Distribution
score_distribution = yelp_df['Score'].value_counts()
print("Original distribution:\n", score_distribution)
min_class_size = yelp_df['Score'].value_counts().min()

# Resampling dataset to balance
df_balanced = pd.DataFrame()

for score in yelp_df['Score'].unique():
    df_score = yelp_df[yelp_df['Score'] == score]
    df_score_downsampled = resample(df_score,
                                    replace=False,
                                    n_samples=min_class_size,
                                    random_state=123)
    df_balanced = pd.concat([df_balanced, df_score_downsampled])

# New Distribution
new_score_distribution = df_balanced['Score'].value_counts()
print("New distribution:\n", new_score_distribution)
yelp_df = df_balanced

Original distribution:
 Score
5    363122
4     80655
1     52268
3     42640
2     29769
Name: count, dtype: int64
New distribution:
 Score
5    29769
1    29769
4    29769
2    29769
3    29769
Name: count, dtype: int64


### Train/Test Split

In [None]:
yelp_df_train_val, yelp_df_test = train_test_split(
    yelp_df,
    test_size=0.2,  # 20% for testing
    random_state=42)

yelp_df_train, yelp_df_val = train_test_split(
    yelp_df_train_val,
    test_size=0.25,  # 25% of 80% (20% of the total data)
    random_state=42)

In [None]:
columns_to_drop = ['Id', 'ProductId', 'UserId', 'ProfileName', 'HelpfulnessNumerator', 'HelpfulnessDenominator', 'Time', 'Summary']
yelp_df_test = yelp_df_test.drop(columns=columns_to_drop)

print(yelp_df_test.head())

        Score                                               Text
289438      1  This was an X-mas gift for my dog and he loved...
333254      5  Surprised me.....This one is really good.  Fla...
333853      1  Bought the nacho chips regularly for my kids b...
414716      4  Well, I wasn't bright when I ordered this coff...
359800      1  These chips (all flavors of the variety pack) ...


# Sentiment analysis using NLTK package

In [None]:
sia = SentimentIntensityAnalyzer()

def get_compound_sentiment(text):
    return sia.polarity_scores(text)['compound']

yelp_df_test['NLTK_Compound_Score'] = yelp_df_test['Text'].apply(get_compound_sentiment)

def normalize_score(compound_score):
    # Transform from [-1, 1] to [1, 5]
    return 1 + (compound_score + 1) * 2

yelp_df_test['NLTK_Normalized_Score'] = yelp_df_test['NLTK_Compound_Score'].apply(normalize_score).round().astype(int)

In [None]:
calculate_metrics(yelp_df_test['NLTK_Normalized_Score'],yelp_df_test['Score'])

In [None]:
plot_correlation_matrix(yelp_df_test, "Score", "NLTK_Normalized_Score")

# Sentiment Analysis using a Pretrained BERT Model

In [None]:
model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = TFAutoModelForSequenceClassification.from_pretrained(model_name)

texts = yelp_df_test['Text'].tolist()
batch_size = 128
encoded_input = tokenizer(texts, padding=True, truncation=True, return_tensors="tf", max_length=512)

dataset = tf.data.Dataset.from_tensor_slices(encoded_input).batch(batch_size)

scores = []
for batch in dataset:
    predictions = model(batch)
    batch_scores = tf.argmax(predictions.logits, axis=1) + 1
    scores.extend(batch_scores.numpy())

yelp_df_test['BERT_Score'] = scores

In [None]:
calculate_metrics(yelp_df_test['Score'], yelp_df_test['BERT_Score'])

In [None]:
plot_correlation_matrix(yelp_df_test, "Score", "BERT_Score")

# Sentiment Analysis using a Custom Sarcasm Detector + Sentiment Model Combination

In [None]:
vocab_size = 10000
max_length = 100
embedding_dim = 200
padding_type = 'post'
trunc_type = 'post'
oov_tok = "<OOV>"

tokenizer = Tokenizer(oov_token=oov_tok)

### Sarcasm Model

In [None]:
df = pd.read_json('/content/drive/MyDrive/Yelp Dataset/Sarcasm_Headlines_Dataset_v2.json', lines=True)
# df = pd.read_json('/content/drive/MyDrive/WPI/DS595_NLP/Final Project/Sarcasm_Headlines_Dataset_v2.json', lines=True)

print(df.head())

tokenizer.fit_on_texts(df['headline'])

# Convert text to sequences and pad
sequences = tokenizer.texts_to_sequences(df['headline'])
padded = pad_sequences(sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)

labels = df['is_sarcastic'].values

model_sarcasm = Sequential([
    Embedding(vocab_size, embedding_dim, input_length=max_length),
    Bidirectional(LSTM(64)),
    Dense(24, activation='relu'),
    Dense(1, activation='sigmoid')
])

model_sarcasm.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=['accuracy'])

model_sarcasm.summary()

X_train, X_val, y_train, y_val = train_test_split(padded, labels, test_size=0.2, random_state=42)

history = model_sarcasm.fit(X_train, y_train, epochs=10, validation_data=(X_val, y_val), batch_size=128)
# model_sarcasm = load_model('/content/drive/MyDrive/WPI/DS595_NLP/Final Project/model_sarcasm.h5')

model_path = '/content/drive/MyDrive/Yelp Dataset/model_sarcasm.h5'
# # model_sarcasm = load_model('/content/drive/MyDrive/Yelp Dataset/Yelp Pred Train Test Split/model_sarcasm.h5')
model_sarcasm.save(model_path)

weights_path = '/content/drive/MyDrive/Yelp Dataset/model_sarcasm_weights.h5'
# # weights_path = '/content/drive/MyDrive/WPI/DS595_NLP/Final Project/model_sarcasm_weights.h5'
model_sarcasm.save_weights(weights_path)

# Evaluation on the validation set
loss, accuracy = model_sarcasm.evaluate(X_val, y_val, verbose=0)

print(f'Validation Loss: {loss}')
print(f'Validation Accuracy: {accuracy}')

# Generate predictions
predictions = model_sarcasm.predict(X_val)
predictions_binary = (predictions > 0.5).astype("int32")

# Calculate Precision, Recall, and F1 Score
precision = precision_score(y_val, predictions_binary)
recall = recall_score(y_val, predictions_binary)
f1 = f1_score(y_val, predictions_binary)

print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1 Score: {f1}')

   is_sarcastic                                           headline  \
0             1  thirtysomething scientists unveil doomsday clo...   
1             0  dem rep. totally nails why congress is falling...   
2             0  eat your veggies: 9 deliciously different recipes   
3             1  inclement weather prevents liar from getting t...   
4             1  mother comes pretty close to using word 'strea...   

                                        article_link  
0  https://www.theonion.com/thirtysomething-scien...  
1  https://www.huffingtonpost.com/entry/donna-edw...  
2  https://www.huffingtonpost.com/entry/eat-your-...  
3  https://local.theonion.com/inclement-weather-p...  
4  https://www.theonion.com/mother-comes-pretty-c...  




Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 100, 200)          2000000   
                                                                 
 bidirectional (Bidirection  (None, 128)               135680    
 al)                                                             
                                                                 
 dense (Dense)               (None, 24)                3096      
                                                                 
 dense_1 (Dense)             (None, 1)                 25        
                                                                 
Total params: 2138801 (8.16 MB)
Trainable params: 2138801 (8.16 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8

  saving_api.save_model(


Validation Loss: 0.8572922348976135
Validation Accuracy: 0.846960186958313
Precision: 0.8458379992534528
Recall: 0.8303407841700257
F1 Score: 0.8380177514792899


### Sentiment Model

In [None]:
phrase_file_path = '/content/drive/MyDrive/Yelp Dataset/stanfordSentimentTreebank/dictionary.txt'
# phrase_file_path = '/content/drive/MyDrive/WPI/DS595_NLP/Final Project/dictionary.txt'

phrase_df = pd.read_csv(phrase_file_path, sep='|', header=None, names=['Phrase', 'ID'])

labels_file_path = '/content/drive/MyDrive/Yelp Dataset/stanfordSentimentTreebank/sentiment_labels.txt'
# labels_file_path = '/content/drive/MyDrive/WPI/DS595_NLP/Final Project/sentiment_labels.txt'

label_df = pd.read_csv(labels_file_path, sep='|', header=0)

print(f"Size of phrase_df: {phrase_df.shape}")
print(f"Size of label_df: {label_df.shape}")

df_sentiment = pd.merge(phrase_df, label_df, left_on='ID', right_on='phrase ids')

print(df_sentiment.head())

df_sentiment.dropna(subset=['Phrase'], inplace=True)
tokenizer.fit_on_texts(df_sentiment['Phrase'])
sequences = tokenizer.texts_to_sequences(df_sentiment['Phrase'])
padded = pad_sequences(sequences, maxlen=100)

X_train_sentiment, X_val_sentiment, y_train_sentiment, y_val_sentiment = train_test_split(padded, df_sentiment['sentiment values'], test_size=0.2, random_state=42)

model_sentiment = Sequential([
    Embedding(10000, 100, input_length=100),
    Bidirectional(LSTM(64, return_sequences=True)),
    Bidirectional(LSTM(32)),
    Dense(64, activation='relu'),
    Dense(1, activation='linear')
])

model_sentiment.compile(loss='mean_squared_error', optimizer='adam', metrics=['mean_absolute_error'])

model_sentiment.summary()

history = model_sentiment.fit(X_train_sentiment, y_train_sentiment, epochs=10, validation_data=(X_val_sentiment, y_val_sentiment), batch_size=1024)
# model_sentiment = load_model('/content/drive/MyDrive/WPI/DS595_NLP/Final Project/model_sentiment.h5')

weights_path = '/content/drive/MyDrive/Yelp Dataset/model_sentiment_weights.h5'
# weights_path = '/content/drive/MyDrive/WPI/DS595_NLP/Final Project/model_sentiment_weights.h5'
model_sentiment.save_weights(weights_path)

model_path = '/content/drive/MyDrive/Yelp Dataset/model_sentiment.h5'
# model_path = '/content/drive/MyDrive/WPI/DS595_NLP/Final Project/model_sentiment.h5'
model_sentiment.save(model_path)

# Evaluation on the validation set
loss, accuracy = model_sentiment.evaluate(X_val_sentiment, y_val_sentiment, verbose=0)

Size of phrase_df: (239232, 2)
Size of label_df: (239232, 2)
        Phrase      ID  phrase ids  sentiment values
0            !       0           0           0.50000
1          ! '   22935       22935           0.52778
2         ! ''   18235       18235           0.50000
3       ! Alas  179257      179257           0.44444
4  ! Brilliant   22936       22936           0.86111




Model: "sequential_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_8 (Embedding)     (None, 100, 100)          1000000   
                                                                 
 bidirectional_12 (Bidirect  (None, 100, 128)          84480     
 ional)                                                          
                                                                 
 bidirectional_13 (Bidirect  (None, 64)                41216     
 ional)                                                          
                                                                 
 dense_15 (Dense)            (None, 64)                4160      
                                                                 
 dense_16 (Dense)            (None, 1)                 65        
                                                                 
Total params: 1129921 (4.31 MB)
Trainable params: 1129

  saving_api.save_model(


### Using Sarcasm Model to label Sentiment Data

In [None]:
sentiment_sequences = tokenizer.texts_to_sequences(df_sentiment['Phrase'])
sentiment_padded = pad_sequences(sentiment_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)

sarcasm_predictions = model_sarcasm.predict(sentiment_padded)
sarcasm_labels_binary = (sarcasm_predictions > 0.5).astype(int)

df_sentiment['Sarcasm Label'] = sarcasm_labels_binary

print(df_sentiment.head())

        Phrase      ID  phrase ids  sentiment values  Sarcasm Label
0            !       0           0           0.50000              0
1          ! '   22935       22935           0.52778              0
2         ! ''   18235       18235           0.50000              0
3       ! Alas  179257      179257           0.44444              0
4  ! Brilliant   22936       22936           0.86111              1


In [None]:
tokenizer.fit_on_texts(df_sentiment['Phrase'])
sequences = tokenizer.texts_to_sequences(df_sentiment['Phrase'])
X_padded = pad_sequences(sequences, maxlen=100)

X_train_sentiment, X_temp_sentiment, y_train_sentiment, y_temp_sentiment, y_train_sarcasm, y_temp_sarcasm = train_test_split(
    X_padded,
    df_sentiment['sentiment values'],
    df_sentiment['Sarcasm Label'],
    test_size=0.4,
    random_state=42)

X_val_sentiment, X_test_sentiment, y_val_sentiment, y_test_sentiment, y_val_sarcasm, y_test_sarcasm = train_test_split(
    X_temp_sentiment,
    y_temp_sentiment,
    y_temp_sarcasm,
    test_size=0.5,
    random_state=42)

# Prepare labels for multi-output
sentiment_train_labels = {'sentiment_output': y_train_sentiment, 'sarcasm_output': y_train_sarcasm}
sentiment_val_labels = {'sentiment_output': y_val_sentiment, 'sarcasm_output': y_val_sarcasm}
sentiment_test_labels = {'sentiment_output': y_test_sentiment, 'sarcasm_output': y_test_sarcasm}

### Applying the Custom Sentiment Model to the Yelp Dataset

In [None]:
tokenizer.fit_on_texts(yelp_df_train['Text'])

# Tokenize and pad Yelp review sequences
yelp_train_sequences = tokenizer.texts_to_sequences(yelp_df_train['Text'])
X_train_yelp = pad_sequences(yelp_train_sequences, maxlen=100)
yelp_val_sequences = tokenizer.texts_to_sequences(yelp_df_val['Text'])
X_val_yelp = pad_sequences(yelp_val_sequences, maxlen=100)
yelp_test_sequences = tokenizer.texts_to_sequences(yelp_df_test['Text'])
X_test_yelp = pad_sequences(yelp_test_sequences, maxlen=100)

min_score = 1
max_score = 5

# Normalize the scores to be between 0 and 1
y_train_yelp_normalized = (yelp_df_train['Score'] - min_score) / (max_score - min_score)
y_val_yelp_normalized = (yelp_df_val['Score'] - min_score) / (max_score - min_score)
y_test_yelp_normalized = (yelp_df_test['Score'] - min_score) / (max_score - min_score)

# Convert scores to a format suitable for regression
y_train_yelp = y_train_yelp_normalized.values.astype(float)
y_val_yelp = y_val_yelp_normalized.values.astype(float)
y_test_yelp = y_test_yelp_normalized.values.astype(float)

In [None]:
# model_sentiment = load_model('/content/drive/MyDrive/Yelp Dataset/model_sentiment.h5')
history_yelp = model_sentiment.fit(X_train_yelp, y_train_yelp, epochs=10, validation_data=(X_val_yelp, y_val_yelp), batch_size=1024)

loss_yelp, mae_yelp = model_sentiment.evaluate(X_val_yelp, y_val_yelp, verbose=0)
print(f"Loss on Yelp data: {loss_yelp}")
print(f"Mean Absolute Error on Yelp data: {mae_yelp}")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss on Yelp data: 0.05565645173192024
Mean Absolute Error on Yelp data: 0.1658799797296524


In [None]:
print(model_sentiment.predict(X_val_yelp[10:15]))
print(y_val_yelp[10:15])

[[0.15503009]
 [0.04428293]
 [0.38575307]
 [0.44541404]
 [0.38169912]]
[0.5  0.   0.5  0.25 1.  ]


In [None]:
predicted_continuous_test = model_sentiment.predict(X_test_yelp)
predicted_rounded_test = convert_to_discrete(predicted_continuous_test)
yelp_df_test["Custom Sentiment Model Score, no Sarcasm"] = predicted_rounded_test

predicted_continuous_val = model_sentiment.predict(X_val_yelp)
predicted_rounded_val = convert_to_discrete(predicted_continuous_val)
yelp_df_val["Custom Sentiment Model Score, no Sarcasm"] = predicted_rounded_val

predicted_continuous_train = model_sentiment.predict(X_train_yelp)
predicted_rounded_train = convert_to_discrete(predicted_continuous_train)
yelp_df_train["Custom Sentiment Model Score, no Sarcasm"] = predicted_rounded_train

calculate_metrics(yelp_df_test['Score'], yelp_df_test['Custom Sentiment Model Score, no Sarcasm'])

Multiclass Accuracy: 0.521
Weighted Mutliclass Accuracy: 0.837
Spearman's Rank Correlation: 0.748
Kendall's tau: 0.657


In [None]:
train_sarcasm_predictions = model_sarcasm.predict(X_train_yelp)
val_sarcasm_predictions = model_sarcasm.predict(X_val_yelp)
test_sarcasm_predictions = model_sarcasm.predict(X_test_yelp)

train_sarcasm_labels_binary = (train_sarcasm_predictions > 0.5).astype(int)
val_sarcasm_labels_binary = (val_sarcasm_predictions > 0.5).astype(int)
test_sarcasm_labels_binary = (test_sarcasm_predictions > 0.5).astype(int)

yelp_df_train['Sarcasm Label'] = train_sarcasm_labels_binary
yelp_df_val['Sarcasm Label'] = val_sarcasm_labels_binary
yelp_df_test['Sarcasm Label'] = test_sarcasm_labels_binary

yelp_train_labels = {'sentiment_output': y_train_yelp, 'sarcasm_output': train_sarcasm_labels_binary}
yelp_val_labels = {'sentiment_output': y_val_yelp, 'sarcasm_output': val_sarcasm_labels_binary}
yelp_test_labels = {'sentiment_output': y_test_yelp, 'sarcasm_output': test_sarcasm_labels_binary}



In [None]:
sentiment_weights = model_sentiment.save_weights('/content/drive/MyDrive/Yelp Dataset/model_sentiment_weights.h5')
sarcasm_weights = model_sarcasm.save_weights('/content/drive/MyDrive/Yelp Dataset/model_sarcasm_weights.h5')

### Custom Combined Models

In [None]:
sentiment_weights = '/content/drive/MyDrive/Yelp Dataset/model_sentiment_weights.h5'
sarcasm_weights = '/content/drive/MyDrive/Yelp Dataset/model_sarcasm_weights.h5'
# sentiment_weights = '/content/drive/MyDrive/WPI/DS595_NLP/Final Project/model_sentiment_yelp_weights.h5'
# sarcasm_weights = '/content/drive/MyDrive/WPI/DS595_NLP/Final Project/model_sarcasm_weights.h5'

input_shape = 100  # max_length for sentiment and sarcasm models

# Rebuild the sentiment model architecture
sentiment_base = tf.keras.Sequential([
    Embedding(10000, 100, input_length=input_shape),
    Bidirectional(LSTM(64, return_sequences=True)),
    Bidirectional(LSTM(32)),
    Dense(64, activation='relu')
])

sentiment_base.load_weights(sentiment_weights, by_name=True)

# for layer in sentiment_base.layers:
#     layer.trainable = False

# Rebuild the sarcasm model architecture
sarcasm_base = tf.keras.Sequential([
    Embedding(10000, 100, input_length=input_shape),
    Bidirectional(LSTM(64)),
    Dense(24, activation='relu')
])

sarcasm_base.load_weights(sarcasm_weights, by_name=True)

# for layer in sarcasm_base.layers:
#     layer.trainable = False

input_layer = Input(shape=(input_shape,))

sentiment_features = sentiment_base(input_layer)
sarcasm_features = sarcasm_base(input_layer)

combined_features = Concatenate()([sentiment_features, sarcasm_features])

x = Dense(64, activation='relu')(combined_features)
x = Dropout(0.5)(x)

sentiment_output = Dense(1, activation='sigmoid', name='sentiment_output')(x)
sarcasm_output = Dense(1, activation='sigmoid', name='sarcasm_output')(x)

combined_model = Model(inputs=input_layer, outputs=[sentiment_output, sarcasm_output])
combined_model.compile(optimizer=Adam(0.0001),
                       loss={'sentiment_output': 'categorical_crossentropy', 'sarcasm_output': 'binary_crossentropy'},
                       metrics={'sentiment_output': ['mean_absolute_error'], 'sarcasm_output': ['accuracy']})

combined_model.summary()



Model: "model_3"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_4 (InputLayer)        [(None, 100)]                0         []                            
                                                                                                  
 sequential_10 (Sequential)  (None, 64)                   1129856   ['input_4[0][0]']             
                                                                                                  
 sequential_11 (Sequential)  (None, 24)                   1087576   ['input_4[0][0]']             
                                                                                                  
 concatenate_3 (Concatenate  (None, 88)                   0         ['sequential_10[0][0]',       
 )                                                                   'sequential_11[0][0]'] 

In [None]:
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=0.0001, verbose=1)

combined_model.fit(
    X_train_sentiment, sentiment_train_labels,
    validation_data=(X_val_sentiment, sentiment_val_labels),
    epochs=15,
    batch_size=2048,
    callbacks=[early_stopping, reduce_lr]
)

In [None]:
sentiment_df_combined_predictions = combined_model.predict(X_test_sentiment)



In [None]:
sentiment_df_combined_predictions
sentiment_predictions = pd.Series(sentiment_df_combined_predictions[0].flatten(), name='Sentiment Predictions')
sarcasm_predictions = pd.Series(sentiment_df_combined_predictions[1].flatten(), name='Sarcasm Predictions').apply(lambda x: 1 if x > 0.5 else 0)

sentiment_predictions

In [None]:
original_scores = convert_to_discrete(sentiment_test_labels['sentiment_output'])
calculated_scores = convert_to_discrete(sentiment_df_combined_predictions[0])
print(original_scores)
print(calculated_scores)
calculate_metrics(np.array(original_scores), np.array(calculated_scores))

In [None]:
# Fine-Tune on Yelp Data (train with categorical crossentropy as loss funciton)
history = combined_model.fit(X_train_yelp, yelp_train_labels,
                             validation_data=(X_val_yelp, yelp_val_labels),
                             epochs=10,
                             batch_size=2048)

In [None]:
predicted_continuous_test = combined_model.predict(X_test_yelp)
predicted_rounded_test = convert_to_discrete(predicted_continuous_test[0])
yelp_df_test["Combined Model Score"] = predicted_rounded_test



In [None]:
original_scores = yelp_df_test['Score'].values
calculated_scores = convert_to_discrete(yelp_df_test["Combined Model Score"].values)
calculate_metrics(original_scores, calculated_scores)

In [None]:
evaluate_model(combined_model, X_val_yelp, y_val_yelp, val_sarcasm_labels_binary, X_test_yelp, y_test_yelp, test_sarcasm_labels_binary)

### Custom Multi-Head Attention Combined Model

In [None]:
sentiment_weights = '/content/drive/MyDrive/Yelp Dataset/model_sentiment_weights.h5'
sarcasm_weights = '/content/drive/MyDrive/Yelp Dataset/model_sarcasm_weights.h5'
# sentiment_weights = '/content/drive/MyDrive/WPI/DS595_NLP/Final Project/model_sentiment_yelp_weights.h5'
# sarcasm_weights = '/content/drive/MyDrive/WPI/DS595_NLP/Final Project/model_sarcasm_weights.h5'

model_dim = 64
num_heads = 4
input_shape = 100  # max_length for sentiment and sarcasm models
reg_factor = 0.01

# Rebuild the sentiment model architecture
sentiment_base = tf.keras.Sequential([
    Embedding(10000, 100, input_length=input_shape),
    Bidirectional(LSTM(64, return_sequences=True)),
    Dropout(0.5),
    Bidirectional(LSTM(32)),
    Dense(64, activation='relu', kernel_regularizer=l2(reg_factor)),
    BatchNormalization()
])

sentiment_base.load_weights(sentiment_weights, by_name=True)
for layer in sentiment_base.layers:
    layer.trainable = False

# Rebuild the sarcasm model architecture
sarcasm_base = tf.keras.Sequential([
    Embedding(10000, 100, input_length=input_shape),
    Bidirectional(LSTM(64)),
    Dropout(0.5),
    Dense(24, activation='relu', kernel_regularizer=l2(reg_factor)),
    BatchNormalization()
])

sarcasm_base.load_weights(sarcasm_weights, by_name=True)
for layer in sarcasm_base.layers:
    layer.trainable = False

# Shared input layer for both tasks
input_layer = Input(shape=(input_shape,))

sentiment_features = sentiment_base(input_layer)
sarcasm_features = sarcasm_base(input_layer)

# Combine the outputs from both models
combined_features = Concatenate()([sentiment_features, sarcasm_features])

# Multi-Head Attention blocks
transformed_features = Dense(model_dim, activation='relu')(combined_features)
transformed_features = BatchNormalization()(transformed_features)
normalized_features = LayerNormalization(epsilon=1e-6)(transformed_features)
attn_output = MultiHeadAttention(num_heads=num_heads, model_dim=model_dim)(normalized_features)

transformed_features_2 = Dense(model_dim, activation='relu')(attn_output)
transformed_features_2 = BatchNormalization()(transformed_features_2)
normalized_features_2 = LayerNormalization(epsilon=1e-6)(transformed_features_2)
attn_output_2 = MultiHeadAttention(num_heads=num_heads, model_dim=model_dim)(normalized_features_2)

pooled_output = GlobalAveragePooling1D()(attn_output_2)

combined_dense = Dense(32, activation='relu', kernel_regularizer=l2(reg_factor))(pooled_output)
combined_dense = BatchNormalization()(combined_dense)
combined_dense = Dropout(0.5)(combined_dense)

sentiment_output = Dense(1, activation='sigmoid', name='sentiment_output')(combined_dense)
sarcasm_output = Dense(1, activation='sigmoid', name='sarcasm_output')(combined_dense)

combined_model_attn = Model(inputs=input_layer, outputs=[sentiment_output, sarcasm_output])
combined_model_attn.compile(optimizer=Adam(0.001),
                       loss={'sentiment_output': 'categorical_crossentropy', 'sarcasm_output': 'binary_crossentropy'},
                       metrics={'sentiment_output': ['mean_absolute_error'], 'sarcasm_output': ['accuracy']})

combined_model_attn.summary()



Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_2 (InputLayer)        [(None, 100)]                0         []                            
                                                                                                  
 sequential_4 (Sequential)   (None, 64)                   1130112   ['input_2[0][0]']             
                                                                                                  
 sequential_5 (Sequential)   (None, 24)                   1087672   ['input_2[0][0]']             
                                                                                                  
 concatenate_1 (Concatenate  (None, 88)                   0         ['sequential_4[0][0]',        
 )                                                                   'sequential_5[0][0]']  

In [None]:
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.00001)
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

history = combined_model_attn.fit(X_train_yelp, yelp_train_labels,
                                       validation_data=(X_val_yelp, yelp_val_labels),
                                       epochs=10,
                                       batch_size=512,
                                       callbacks=[reduce_lr, early_stopping])

In [None]:
predicted_continuous_test = combined_model_attn.predict(X_test_yelp)
predicted_rounded_test = convert_to_discrete(predicted_continuous_test[0])
yelp_df_test["Combined Attention Model Score"] = predicted_rounded_test

In [None]:
original_scores = yelp_df_test['Score'].values
calculated_scores = convert_to_discrete(yelp_df_test["Combined Attention Model Score"].values)
calculate_metrics(original_scores, calculated_scores)

In [None]:
evaluate_model(combined_model_attn, X_val_yelp, y_val_yelp, val_sarcasm_labels_binary, X_test_yelp, y_test_yelp, test_sarcasm_labels_binary)