In [None]:
pip install pandas sentence_transformers

In [19]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import re
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer


In [3]:
data = pd.read_csv('IMDB Dataset.csv')
data['sentiment'] = data['sentiment'].map({'positive': 1, 'negative': 0})
data['review'] = data['review'].apply(lambda x: x.lower())
data['review'] = data['review'].apply(lambda x: re.sub('[^a-zA-z0-9\s]','',x))
data['review'] = data['review'].apply(str)

In [None]:
sentence_model = SentenceTransformer('distilbert-base-nli-stsb-quora-ranking')
reviews = data['review'].tolist()
sentiments = data['sentiment'].tolist()
numerical_reviews = sentence_model.encode(reviews)
x_train, x_test, y_train, y_test = train_test_split(numerical_reviews, sentiments, test_size=0.2, random_state=42)

In [7]:
x_train = torch.tensor(x_train)
x_test = torch.tensor(x_test)
y_train = torch.tensor(y_train)
y_test = torch.tensor(y_test)
train_dataset = TensorDataset(x_train, y_train)
test_dataset = TensorDataset(x_test, y_test)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32)

In [12]:
class Sentiment(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, n_layers, dropout=0.5):
        super(Sentiment, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, n_layers, dropout=dropout, batch_first=True)
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, inputs):
        inputs = inputs.unsqueeze(1)
        output, (hidden, cell) = self.lstm(inputs)
        hidden = self.dropout(torch.cat((hidden[-1, :, :].unsqueeze(0),), dim=1))
        return self.fc(hidden.squeeze(0))

In [13]:
input_dim, hidden_dim, output_dim, n_layers, dropout = 768, 256, 2, 1, 0.5
model = Sentiment(input_dim, hidden_dim, output_dim, n_layers, dropout)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())

In [None]:
for epoch in range(10):
    for inputs, labels in train_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch {epoch+1}/{10}, Loss: {loss.item():.4f}')

In [None]:
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Test Accuracy: {accuracy:.2f}%')

In [35]:
#inference
input_text = "I'm very angry at you"
encoded_input = torch.tensor(sentence_model.encode(input_text)).unsqueeze(0)  # Add batch dimension
encoded_input = encoded_input.to(device)  # Move input to device if necessary

with torch.no_grad():  
    model.eval()  # Set the model to evaluation mode
    output = model(encoded_input)  # Pass input through the model

# Convert output to probabilities
probabilities = torch.softmax(output, dim=1)

# Get the predicted sentiment (assuming binary classification)
predicted_sentiment = torch.argmax(probabilities, dim=1).item()

# Print the predicted sentiment
if predicted_sentiment == 1:
    print("Positive sentiment")
else:
    print("Negative sentiment")



Negative sentiment
