In [19]:
import torch

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


Using device: cuda


In [20]:
!pip install transformers torch pandas




In [21]:
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification

# Load the tokenizer and model
tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english", num_labels=2)

# Move the model to GPU (if available)
model.to(device)


DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): DistilBertSdpaAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)


In [22]:
# Function to predict sentiment for a batch of dialogues (0 = Negative, 1 = Positive)
def predict_sentiment_batch(dialogues):
    # Tokenize the input text and move the tensors to the GPU
    inputs = tokenizer(dialogues, return_tensors='pt', padding=True, truncation=True, max_length=512).to(device)

    with torch.no_grad():
        # Run the model on the GPU
        outputs = model(**inputs)
        logits = outputs.logits
        predicted_classes = torch.argmax(logits, dim=1).tolist()  # Get sentiment prediction for each sentence

    return ['Positive' if x == 1 else 'Negative' for x in predicted_classes]


In [None]:
import pandas as pd

# Load the Friends dataset
friends_df = pd.read_csv('/friends_dataset.csv')

# Split the dataset into batches for faster processing
batch_size = 8  # You can adjust this based on your system's memory
chunks = [friends_df[i:i + batch_size] for i in range(0, len(friends_df), batch_size)]

# Process each chunk and add predictions to the dataframe
predictions = []
for chunk in chunks:
    dialogues = chunk['Cleaned_Dialogue'].tolist()
    batch_predictions = predict_sentiment_batch(dialogues)
    predictions.extend(batch_predictions)

# Add the predictions to the dataframe
friends_df['Positive/Negative'] = predictions

# Display the updated dataframe
print(friends_df.head())


     Cleaned_Dialogue  Happiness  Contentment  Confidence  Neutral  Sadness  \
0                 hey          1            0           0        0        0   
1                  hi          1            0           0        0        0   
2  pheebs whats wrong          0            0           0        0        1   
3                  oh          0            0           0        0        0   
4                 god          0            0           0        0        0   

   Anger  Fear  Surprise  Disgust  Love  Excitement  Anticipation  Nostalgia  \
0      0     0         1        0     0           1             0          0   
1      0     0         0        0     0           1             0          0   
2      0     0         0        0     0           0             0          0   
3      0     0         1        0     0           1             1          0   
4      0     0         1        0     0           0             0          0   

   Confusion  Frustration  Longing  Optimism

In [24]:
# Save the updated dataframe to a CSV file
friends_df.to_csv('friends_with_sentiment_gpu.csv', index=False)


In [25]:
# Filter the DataFrame to create two separate datasets: one for positive and one for negative
positive_df = friends_df[friends_df['Positive/Negative'] == 'Positive']
negative_df = friends_df[friends_df['Positive/Negative'] == 'Negative']


In [None]:
# Save the positive instances to 'positive.csv'
positive_df.to_csv('friends_positive.csv', index=False)

# Save the negative instances to 'negative.csv'
negative_df.to_csv('friends_negative.csv', index=False)

print("Datasets saved: 'positive.csv' and 'negative.csv'")


Datasets saved: 'positive.csv' and 'negative.csv'
