<a href="https://colab.research.google.com/github/VishnuReddy25/DL/blob/main/BERT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
from transformers import TFAutoModel, AutoTokenizer
from sklearn.model_selection import train_test_split
import numpy as np

# Step 1: Prepare Dataset
texts = [
    "I love this movie!",
    "This is terrible.",
    "I feel amazing today.",
    "Worst experience ever.",
    "Best product I bought!",
    "I hate this."
]
labels = [1, 0, 1, 0, 1, 0]  # 1=Positive, 0=Negative

# Split data
train_texts, val_texts, train_labels, val_labels = train_test_split(
    texts, labels, test_size=0.2, random_state=42
)

# Step 2: Initialize Tokenizer
model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Tokenize texts
train_encodings = tokenizer(train_texts, truncation=True, padding=True, return_tensors="tf")
val_encodings = tokenizer(val_texts, truncation=True, padding=True, return_tensors="tf")

# Step 3: Build Model
bert_base = TFAutoModel.from_pretrained(model_name)

# Input layers
input_ids = tf.keras.Input(shape=(None,), dtype=tf.int32, name="input_ids")
attention_mask = tf.keras.Input(shape=(None,), dtype=tf.int32, name="attention_mask")

# BERT outputs
outputs = bert_base(input_ids, attention_mask=attention_mask)

# Classification head
cls_output = outputs.last_hidden_state[:, 0, :]  # [CLS] token
x = tf.keras.layers.Dense(128, activation='relu')(cls_output)
x = tf.keras.layers.Dropout(0.2)(x)
final_output = tf.keras.layers.Dense(1, activation='sigmoid')(x)

model = tf.keras.Model(inputs=[input_ids, attention_mask], outputs=final_output)

# Step 4: Compile and Train
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=3e-5),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

history = model.fit(
    x={'input_ids': train_encodings['input_ids'],
       'attention_mask': train_encodings['attention_mask']},
    y=np.array(train_labels),
    validation_data=(
        {'input_ids': val_encodings['input_ids'],
         'attention_mask': val_encodings['attention_mask']},
        np.array(val_labels)
    ),
    batch_size=2,
    epochs=3
)

# Step 5: Prediction Function
def predict_sentiment(text):
    tokens = tokenizer(text, return_tensors="tf", truncation=True, padding=True)
    prediction = model.predict({'input_ids': tokens['input_ids'],
                               'attention_mask': tokens['attention_mask']})
    return "Positive" if prediction[0][0] > 0.5 else "Negative"

# Test predictions
print(predict_sentiment("I absolutely loved it!"))  # Should output: Positive
print(predict_sentiment("This is the worst thing ever."))  # Should output: Negative