<a href="https://colab.research.google.com/github/Adityachauhan2344/Adityachauhan2344/blob/main/EEG-Text_Decoder.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, Dropout, LayerNormalization, Dense
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K

class MultiLayerTransformerEncoder(tf.keras.layers.Layer):
    def __init__(self, vocab_size, hidden_size, num_layers, num_heads, name="multi_layer_transformer_encoder", **kwargs):
        super(MultiLayerTransformerEncoder, self).__init__(name=name, **kwargs)

        self.embedding = Embedding(vocab_size, hidden_size)
        self.layers = [
            TransformerEncoderLayer(hidden_size, num_heads, name=f"encoder_layer_{i+1}") for i in range(num_layers)
        ]

    def call(self, inputs):
        embedded = self.embedding(inputs)

        output = embedded
        for layer in self.layers:
            output = layer(output)

        return output

class TransformerEncoderLayer(tf.keras.layers.Layer):
    def __init__(self, hidden_size, num_heads, feed_forward_size=2048, dropout=0.1, name="transformer_encoder_layer", **kwargs):
        super(TransformerEncoderLayer, self).__init__(name=name, **kwargs)

        self.self_attention = tf.keras.layers.MultiHeadAttention(num_heads, hidden_size // num_heads, dropout=dropout)
        self.feed_forward = tf.keras.Sequential([
            Dense(feed_forward_size, activation="relu"),
            Dense(hidden_size)
        ])
        self.dropout = Dropout(dropout)
        self.layer_norm1 = LayerNormalization(epsilon=1e-6)
        self.layer_norm2 = LayerNormalization(epsilon=1e-6)

    def call(self, inputs):
        attended = self.self_attention(inputs, inputs, inputs)
        attended = self.dropout(attended)
        attended = self.layer_norm1(inputs + attended)

        feed_forward_output = self.feed_forward(attended)
        feed_forward_output = self.dropout(feed_forward_output)
        output = self.layer_norm2(attended + feed_forward_output)

        return output


In [1]:
pip install OpenNMT-py

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting OpenNMT-py
  Downloading OpenNMT_py-3.2.0-py3-none-any.whl (240 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m240.7/240.7 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torch<2,>=1.13 (from OpenNMT-py)
  Downloading torch-1.13.1-cp310-cp310-manylinux1_x86_64.whl (887.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m887.5/887.5 MB[0m [31m986.0 kB/s[0m eta [36m0:00:00[0m
[?25hCollecting configargparse (from OpenNMT-py)
  Downloading ConfigArgParse-1.5.3-py3-none-any.whl (20 kB)
Collecting ctranslate2<4,>=3.2 (from OpenNMT-py)
  Downloading ctranslate2-3.16.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (33.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m33.7/33.7 MB[0m [31m30.6 MB/s[0m eta [36m0:00:00[0m
Collecting waitress (from OpenNMT-py)
  Downloading waitress-2.1.2-py3-none-

In [13]:
import subprocess

def install_transformers():
    try:
        # Use pip to install Transformers
        subprocess.check_call(["pip", "install", "transformers"])
        print("Transformers library has been successfully installed!")
    except subprocess.CalledProcessError:
        print("An error occurred while installing Transformers.")

# Call the function to install Transformers
install_transformers()


from transformers import BartTokenizer

# Step 1: Load the ZuCo dataset
# Assuming you have loaded the dataset into a list of sentences
sentences = ["The Rock is very beautiful boy",
"Aditya is vefry intelligent",
"Ankit is very sincere",
"Anil is very hardworking"]

# Step 2: Tokenize the text
tokenizer = BartTokenizer.from_pretrained('facebook/bart-base')
tokenized_sentences = tokenizer.tokenize(sentence) for sentence in sentences:

# Step 3: Apply any necessary text normalization
# You can apply lowercase conversion, punctuation removal, etc. here if needed

# Step 4: Encode the tokens
encoded_sentences = [tokenizer.encode(sentence) for sentence in tokenized_sentences]

# Step 5: Pad or truncate sequences
max_length = 128  # Maximum sequence length for BART
padded_sentences = [sentence[:max_length] + [tokenizer.pad_token_id] * (max_length - len(sentence)) for sentence in encoded_sentences]

# Step 6: Create input and output pairs
input_sentences = padded_sentences
output_sentences = [sentence[1:] + [tokenizer.pad_token_id] for sentence in padded_sentences]

# Step 7: Save the preprocessed data
# Save the input and output pairs to a suitable file format (e.g., CSV or JSON) for training

# Example code ends here, you can modify and extend it based on your specific needs


SyntaxError: ignored