In [None]:
# Define a Config class to store constants
class Config:
    PROJECT_NAME = 'detect-ai'
    EPOCHS = 2
    BATCH_SIZE = 16
    LEARNING_RATE = 1e-5
    TRAIN_RATIO = 0.8
    MODEL_NAME = 'microsoft/deberta-base'
    MODEL_PATH = '/kaggle/input/deberta-base/'
    DATA_PATH = '/kaggle/input/mydataset.csv'

In [None]:
# Required Libraries
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from transformers import DebertaTokenizer, DebertaModel, AdamW, get_linear_schedule_with_warmup
from sklearn.metrics import roc_auc_score

In [None]:
# Load Data
df = pd.read_csv(Config.DATA_PATH)
df = df.fillna('') # Replace NaN with empty strings

texts = df['text'].values.tolist()
prompts = df['prompt'].values.tolist()
labels = df['generated'].values.tolist()

In [4]:
# Tokenization
tokenizer = DebertaTokenizer.from_pretrained('microsoft/deberta-base')
text_encodings = tokenizer(texts, truncation=True, padding='max_length', max_length=256, return_tensors='pt')
prompt_encodings = tokenizer(prompts, truncation=True, padding='max_length', max_length=256, return_tensors='pt')

In [5]:
# Combine text and prompt encodings for DataLoader
input_ids = torch.cat((text_encodings['input_ids'], prompt_encodings['input_ids']), dim=1)
attention_masks = torch.cat((text_encodings['attention_mask'], prompt_encodings['attention_mask']), dim=1)

In [6]:
train_inputs, val_inputs, train_labels, val_labels, train_masks, val_masks = train_test_split(
    input_ids, torch.tensor(labels, dtype=torch.long), attention_masks, test_size=0.1, random_state=42
)

In [7]:
# Data Loaders
train_data = torch.utils.data.TensorDataset(train_inputs, train_masks, train_labels)
train_dataloader = torch.utils.data.DataLoader(train_data, batch_size=16, shuffle=True)
val_data = torch.utils.data.TensorDataset(val_inputs, val_masks, val_labels)
val_dataloader = torch.utils.data.DataLoader(val_data, batch_size=16, shuffle=False)

In [8]:
# Model Definition
class DebertaWithAttention(nn.Module):
    def __init__(self):
        super(DebertaWithAttention, self).__init__()
        self.deberta = DebertaModel.from_pretrained('microsoft/deberta-base')
        self.attention = nn.Parameter(torch.randn(1, 512, 1))
        self.classifier = nn.Linear(self.deberta.config.hidden_size, 1)
        
    def forward(self, input_ids, attention_mask):
        outputs = self.deberta(input_ids, attention_mask=attention_mask)
        last_hidden_state = outputs.last_hidden_state
        
        # Applying attention
        attention_weights = F.softmax(self.attention, dim=1)
        context_vector = torch.sum(attention_weights * last_hidden_state, dim=1)
        
        logits = self.classifier(context_vector)
        return logits

In [9]:
model = DebertaWithAttention()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

DebertaWithAttention(
  (deberta): DebertaModel(
    (embeddings): DebertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=0)
      (LayerNorm): DebertaLayerNorm()
      (dropout): StableDropout()
    )
    (encoder): DebertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x DebertaLayer(
          (attention): DebertaAttention(
            (self): DisentangledSelfAttention(
              (in_proj): Linear(in_features=768, out_features=2304, bias=False)
              (pos_dropout): StableDropout()
              (pos_proj): Linear(in_features=768, out_features=768, bias=False)
              (pos_q_proj): Linear(in_features=768, out_features=768, bias=True)
              (dropout): StableDropout()
            )
            (output): DebertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): DebertaLayerNorm()
              (dropout): StableDropout()
            )
          )
          (intermediate

In [10]:
# Optimizer & Learning Rate Scheduler
optimizer = AdamW(model.parameters(), lr=2e-5)
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=len(train_dataloader) * 3)



In [11]:
from tqdm import tqdm

# Training Loop with AUC optimization
epochs = 3
for epoch in range(epochs):
    model.train()
    
    train_progress_bar = tqdm(train_dataloader, desc=f"Epoch {epoch + 1} Training", unit="batch")
    for batch in train_progress_bar:
        inputs, masks, labels = tuple(t.to(device) for t in batch)
        logits = model(inputs, attention_mask=masks).squeeze(-1)
        loss = F.binary_cross_entropy_with_logits(logits, labels.float())
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        scheduler.step()
        optimizer.zero_grad()
        
        # Update the progress bar with the latest loss.
        train_progress_bar.set_postfix({'loss': loss.item()})
    
    # Validation
    model.eval()
    val_progress_bar = tqdm(val_dataloader, desc=f"Epoch {epoch + 1} Validation", unit="batch")
    predictions, true_labels = [], []
    for batch in val_progress_bar:
        inputs, masks, labels = tuple(t.to(device) for t in batch)
        with torch.no_grad():
            logits = model(inputs, attention_mask=masks).squeeze(-1)
        logits = logits.detach().cpu().numpy()
        predictions.extend(logits)
        true_labels.extend(labels)
    
    # Calculate AUC
    auc_score = roc_auc_score(true_labels, predictions)
    print(f"Epoch {epoch + 1}, AUC: {auc_score:.4f}")

# Save Model
torch.save(model.state_dict(), 'debarta_with_attention_model.pt')

Epoch 1 Training:   0%|          | 2/2799 [02:09<50:07:01, 64.51s/batch, loss=0.74] 


KeyboardInterrupt: 