<a href="https://colab.research.google.com/github/dcdlima/NLP-Metaphor/blob/main/Metaphor_NLP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Bidirectional, LSTM, GRU, Dense, Attention, Dropout, GlobalAveragePooling1D
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Load dataset (replace 'dataset.csv' with your dataset file)
# The dataset should have two columns: 'text' and 'label'
data = pd.read_csv('dataset.csv')

# Preprocess the data
texts = data['text'].values
labels = data['label'].values

# Tokenize the text
tokenizer = Tokenizer(num_words=20000, oov_token='<OOV>')
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
word_index = tokenizer.word_index

# Pad sequences
max_length = 100
padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='post', truncating='post')

# Split the data
X_train, X_test, y_train, y_test = train_test_split(padded_sequences, labels, test_size=0.2, random_state=42)

# Convert labels to categorical
y_train = tf.keras.utils.to_categorical(y_train, num_classes=3)  # Assuming 3 classes: metaphor, sarcasm, irony
y_test = tf.keras.utils.to_categorical(y_test, num_classes=3)

# Define the model
def create_model(vocab_size, embedding_dim, input_length):
    inputs = Input(shape=(input_length,))
    x = Embedding(vocab_size, embedding_dim, input_length=input_length)(inputs)
    x = Bidirectional(LSTM(64, return_sequences=True))(x)
    x = Attention()([x, x])  # Attention mechanism
    x = GRU(64, return_sequences=False)(x)
    x = Dropout(0.5)(x)
    x = Dense(64, activation='relu')(x)
    x = Dropout(0.5)(x)
    outputs = Dense(3, activation='softmax')(x)  # 3 classes
    model = Model(inputs, outputs)
    return model

# Hyperparameters
vocab_size = len(word_index) + 1
embedding_dim = 100

# Create the model
model = create_model(vocab_size, embedding_dim, max_length)

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy}")

# Generate classification report
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test, axis=1)
print(classification_report(y_true, y_pred_classes, target_names=['Metaphor', 'Sarcasm', 'Irony']))