In [None]:
# 1 How to implement a simple text classification model using LSTM in Keras 

from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense
import keras

texts = [...]   # A list of raw text samples
labels = [...]  # Corresponding integer labels

max_words = 10000
max_len = 100

tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
x = pad_sequences(sequences, maxlen=max_len)
y = keras.utils.to_categorical(labels)

model = Sequential()
model.add(Embedding(max_words, 100, input_length=max_len))
model.add(LSTM(64))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(x, y, batch_size=32, epochs=10)


In [None]:
# 2 How to generate sequences of text using a Recurrent Neural Network (RNN) 
# After training an RNN model (e.g., LSTM or GRU), use:
def generate_text(model, seed_text, length):
    for _ in range(length):
        tokenized_input = ... # preprocess as required
        preds = model.predict(tokenized_input)[0]
        next_token = ... # sample or argmax
        seed_text += next_token
    return seed_text


In [None]:
# 3How to perform sentiment analysis using a simple CNN model 
from keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense
model = Sequential([
    Embedding(max_words, 128, input_length=max_len),
    Conv1D(128, 5, activation='relu'),
    GlobalMaxPooling1D(),
    Dense(1, activation='sigmoid')
])
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(x, y, epochs=5, batch_size=32)


In [None]:
# 4 How to perform Named Entity Recognition (NER) using spaCy 
import spacy

nlp = spacy.load("en_core_web_sm")
doc = nlp("Apple is looking at buying U.K. startup for $1 billion.")

for ent in doc.ents:
    print(ent.text, ent.label_)


In [None]:
# 5 How to implement a simple Seq2Seq model for machine translation using LSTM in Keras 
from keras.layers import Input, LSTM, Dense
from keras.models import Model

# Encoder
encoder_inputs = Input(shape=(None, num_encoder_tokens))
encoder = LSTM(256, return_state=True)
encoder_outputs, state_h, state_c = encoder(encoder_inputs)
encoder_states = [state_h, state_c]

# Decoder
decoder_inputs = Input(shape=(None, num_decoder_tokens))
decoder_lstm = LSTM(256, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states)
decoder_dense = Dense(num_decoder_tokens, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')


In [None]:
# 6 How to generate text using a pre-trained transformer model (GPT-2) 
from transformers import GPT2LMHeadModel, GPT2Tokenizer

tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2")

input_ids = tokenizer.encode("The future of AI is", return_tensors="pt")
output = model.generate(input_ids, max_length=50, num_return_sequences=1)
print(tokenizer.decode(output, skip_special_tokens=True))


In [None]:
# 7How to apply data augmentation for text in NLP 

# Common methods:
# Synonym replacement (using WordNet or embedding neighbors)
# Random deletion, random swap, or random insertion
# Back translation (translate to another language, then back)
# Contextual augmentation (masked language model predictions)
# Character-level perturbations
# Libraries: NLPAug, TextAttack, or nlpaug.

In [None]:
# 8How can you add an Attention Mechanism to a Seq2Seq model?

from keras.layers import Attention

# encoder_outputs: (batch, input_seq_len, units)
# decoder_outputs: (batch, target_seq_len, units)
attention_layer = Attention()
context_vector = attention_layer([decoder_outputs, encoder_outputs])
# Concatenate context with decoder for final outputs
