In [3]:

# Import necessary libraries
import torch
from transformers import GPT2ForSequenceClassification, GPT2Tokenizer, GPT2Config
from torch.utils.data import DataLoader, TensorDataset
from transformers import AdamW
import pandas as pd
import gradio as gr

# Load Sentiment140 dataset
# Assuming you have the dataset file named 'sentiment140.csv'
sentiment140_path = '/home/awez_mehtab/WiDS/training.1600000.processed.noemoticon.csv'
sentiment140_df = pd.read_csv(sentiment140_path, encoding='latin-1', names=['target', 'id', 'date', 'flag', 'user', 'text'])

# Keep only the target (sentiment) and text columns
reviews_df = sentiment140_df[['target', 'text']].copy()  # Create a copy of the DataFrame
reviews_df['sentiment'] = reviews_df['target'].replace({4: 1, 0: 0})

# Part 3: Preprocess the Data
# Tokenize the reviews and convert labels to numerical format
from transformers import GPT2TokenizerFast

tokenizer = GPT2TokenizerFast.from_pretrained('gpt2')
tokenizer.add_special_tokens({'pad_token': '[PAD]'})  # Add a padding token

max_length = 128  # You may adjust this based on your requirements

tokenized_reviews = tokenizer(reviews_df['text'].tolist(), truncation=True, padding=True, max_length=max_length, return_tensors='pt')
input_ids = tokenized_reviews['input_ids']
attention_masks = tokenized_reviews['attention_mask']
labels = torch.tensor(reviews_df['sentiment'].tolist())

# Create DataLoader for training
train_dataset = TensorDataset(input_ids, attention_masks, labels)
batch_size = 16  # You may adjust this based on your GPU memory
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)


# Part 4: Load Pre-trained GPT-2 Model
# Load pre-trained GPT-2 model for sequence classification
config = GPT2Config.from_pretrained('gpt2', num_labels=2)
model = GPT2ForSequenceClassification.from_pretrained('gpt2', config=config)

# Part 5: Define Optimizer and Loss Function
# Define optimizer and loss function
learning_rate = 5e-5  # You may adjust this based on your requirements
optimizer = AdamW(model.parameters(), lr=learning_rate)
loss_fn = torch.nn.CrossEntropyLoss()

# Part 6: Fine-tune the Model
# Fine-tune the model
num_epochs = 3  # You may adjust this based on your requirements

for epoch in range(num_epochs):
    for batch in train_dataloader:
        inputs, masks, labels = batch
        optimizer.zero_grad()
        outputs = model(inputs, attention_mask=masks, labels=labels)
        loss = outputs.loss
        loss.backward()
        optimizer.step()

# Part 7: Save the Fine-tuned Model
# Save the fine-tuned model
model.save_pretrained('fine_tuned_gpt2_model')

# Part 8: Gradio Interface for Demo
# Load the fine-tuned GPT-2 model
model = GPT2ForSequenceClassification.from_pretrained('fine_tuned_gpt2_model')
tokenizer = GPT2Tokenizer.from_pretrained('fine_tuned_gpt2_model')

# Define a function to make predictions
def predict_sentiment(tweet):
    inputs = tokenizer(tweet, return_tensors='pt', truncation=True)
    outputs = model(**inputs)
    predicted_class = torch.argmax(outputs.logits).item()
    sentiment = "Positive" if predicted_class == 1 else "Negative"
    return sentiment

# Define a sample input
sample_input = "I love using transformers for natural language processing!"

# Tokenize the input
tokenized_input = tokenizer(sample_input, return_tensors='pt', truncation=True)

# Make predictions
outputs = model(**tokenized_input)
predicted_class = torch.argmax(outputs.logits).item()

# Interpret the prediction
sentiment = "Positive" if predicted_class == 1 else "Negative"

print(f"Sample Input: {sample_input}")
print(f"Predicted Sentiment: {sentiment}")



FileNotFoundError: [Errno 2] No such file or directory: '/home/awez_mehtab/WiDS/training.1600000.processed.noemoticon.csv'

In [3]:
import gradio as gr
# Define a function to make predictions
def predict_sentiment(tweet):
    inputs = tokenizer(tweet, return_tensors='pt', truncation=True)
    outputs = model(**inputs)
    predicted_class = torch.argmax(outputs.logits).item()
    sentiment = "Positive" if predicted_class == 1 else "Negative"
    return sentiment
    
# Create a Gradio interface
iface = gr.Interface(
    fn=predict_sentiment,
    inputs="text",
    outputs="text",
    live=True,
    capture_session=True
)

# Launch the Gradio interface
iface.launch()

Running on local URL:  http://127.0.0.1:7870

To create a public link, set `share=True` in `launch()`.


