In [None]:
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import LSTM, Embedding, Dense, SpatialDropout1D
from keras.preprocessing import sequence

# Load the IMDb dataset
vocab_size = 5000  # Consider the top 5000 most frequent words
max_words = 500     # Consider only the first 500 words in each review
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=vocab_size)

# Pad sequences to the same length
X_train = sequence.pad_sequences(X_train, maxlen=max_words)
X_test = sequence.pad_sequences(X_test, maxlen=max_words)

# Define the LSTM model
model = Sequential()
model.add(Embedding(vocab_size, 128, input_length=max_words))
model.add(SpatialDropout1D(0.4))
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=5, batch_size=64)

# Evaluate the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz




Epoch 1/5

In [None]:
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import LSTM, Embedding, Dense, SpatialDropout1D, Bidirectional
from keras.preprocessing import sequence

# Load the IMDb dataset
vocab_size = 5000  # Consider the top 5000 most frequent words
max_words = 500     # Consider only the first 500 words in each review
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=vocab_size)

# Pad sequences to the same length
X_train = sequence.pad_sequences(X_train, maxlen=max_words)
X_test = sequence.pad_sequences(X_test, maxlen=max_words)

# Define the bidirectional LSTM model
model = Sequential()
model.add(Embedding(vocab_size, 128, input_length=max_words))
model.add(SpatialDropout1D(0.4))
model.add(Bidirectional(LSTM(128, dropout=0.2, recurrent_dropout=0.2)))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=5, batch_size=64)

# Evaluate the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Accuracy: 85.06%


In [None]:
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import LSTM, Embedding, Dense, SpatialDropout1D
from keras.preprocessing import sequence

# Load the IMDb dataset
vocab_size = 5000  # Consider the top 5000 most frequent words
max_words = 500    # Consider only the first 500 words in each review
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=vocab_size)

# Pad sequences to the same length
X_train = sequence.pad_sequences(X_train, maxlen=max_words)
X_test = sequence.pad_sequences(X_test, maxlen=max_words)

# Define the LSTM model with 2 LSTM layers
model = Sequential()
model.add(Embedding(vocab_size, 128, input_length=max_words))
model.add(SpatialDropout1D(0.4))
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
model.add(LSTM(64, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=5, batch_size=64)

# Evaluate the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))


In [None]:
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import LSTM, Embedding, Dense, SpatialDropout1D, Bidirectional
from keras.preprocessing import sequence

# Load the IMDb dataset
vocab_size = 5000  # Consider the top 5000 most frequent words
max_words = 500    # Consider only the first 500 words in each review
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=vocab_size)

# Pad sequences to the same length
X_train = sequence.pad_sequences(X_train, maxlen=max_words)
X_test = sequence.pad_sequences(X_test, maxlen=max_words)

# Define the bidirectional LSTM model with 2 layers
model = Sequential()
model.add(Embedding(vocab_size, 128, input_length=max_words))
model.add(SpatialDropout1D(0.4))
model.add(Bidirectional(LSTM(64, dropout=0.2, recurrent_dropout=0.2, return_sequences=True)))
model.add(Bidirectional(LSTM(64, dropout=0.2, recurrent_dropout=0.2)))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=5, batch_size=64)

# Evaluate the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5

In [None]:
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import LSTM, Embedding, Dense, SpatialDropout1D, Conv1D, MaxPooling1D, Flatten
from keras.preprocessing import sequence

# Load the IMDb dataset
vocab_size = 5000  # Consider the top 5000 most frequent words
max_words = 500    # Consider only the first 500 words in each review
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=vocab_size)

# Pad sequences to the same length
X_train = sequence.pad_sequences(X_train, maxlen=max_words)
X_test = sequence.pad_sequences(X_test, maxlen=max_words)

# Define the model with CNN and LSTM layers
model = Sequential()
model.add(Embedding(vocab_size, 128, input_length=max_words))
model.add(Conv1D(filters=64, kernel_size=3, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=2))
model.add(SpatialDropout1D(0.4))
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=5, batch_size=64)

# Evaluate the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))



Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz




Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Accuracy: 87.98%


In [None]:
import torch
from transformers import Wav2Vec2Model
import torchaudio
import librosa

# Load the Conformer model for text processing
text_encoder = Wav2Vec2Model.from_pretrained('facebook/wav2vec2-base-960h')

# Load the Vocoder model for audio synthesis
# Define and initialize your vocoder model here (e.g., WaveNet, Tacotron, etc.)

# Define input text
input_text = "Hello, how are you?"

# Process input text with the Conformer model
outputs = text_encoder(input_text)
acoustic_features = outputs.last_hidden_state

# Synthesize audio waveform from acoustic features using the Vocoder model
vocoder_output = vocoder(acoustic_features)

# Create a dummy waveform (for demonstration purposes)
waveform = torch.randn(16000)

# Convert the waveform to a numpy array
audio_np = waveform.numpy()

# Save the waveform as a wave file
librosa.output.write_wav('output.wav', audio_np, sr=16000)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/378M [00:00<?, ?B/s]

Some weights of Wav2Vec2Model were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original0', 'wav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original1', 'wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


TypeError: string indices must be integers