In [None]:
import torch
import pandas as pd
from torch.utils.data import DataLoader, TensorDataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AdamW

MODEL_PATH = "./local_hate_speech_model"
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_PATH)

device = torch.device("cpu")
model.to(device)

df = pd.read_csv("new_data.csv") 
texts = df["text"].tolist()
labels = df["label"].tolist()

encodings = tokenizer(texts, truncation=True, padding=True, max_length=64, return_tensors="pt")

input_ids = encodings["input_ids"]
attention_mask = encodings["attention_mask"]
labels = torch.tensor(labels)

dataset = TensorDataset(input_ids, attention_mask, labels)
dataloader = DataLoader(dataset, batch_size=8, shuffle=True)

optimizer = AdamW(model.parameters(), lr=5e-5)
loss_fn = torch.nn.CrossEntropyLoss()

# Training loop
epochs = 3  # You can increase if needed
for epoch in range(epochs):
    model.train()
    total_loss = 0
    
    for batch in dataloader:
        batch_inputs, batch_masks, batch_labels = [b.to(device) for b in batch]
        
        optimizer.zero_grad()
        outputs = model(input_ids=batch_inputs, attention_mask=batch_masks, labels=batch_labels)
        
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    
    print(f"Epoch {epoch+1}, Loss: {total_loss / len(dataloader)}")

# Save the updated model
model.save_pretrained("./new_model")
tokenizer.save_pretrained(MODEL_PATH)
print("Model training completed and saved.")


In [7]:
import torch
import pandas as pd
from torch.utils.data import DataLoader, TensorDataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AdamW
import torch.optim as optim


MODEL_PATH = "./local_hate_speech_model"
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_PATH)

In [8]:
device = torch.device("cpu")
model.to(device)

df = pd.read_csv("hate_speech_dataset.csv") 
texts = df["text"].tolist()
labels = df["label"].tolist()

In [9]:
encodings = tokenizer(texts, truncation=True, padding=True, max_length=64, return_tensors="pt")

input_ids = encodings["input_ids"]
attention_mask = encodings["attention_mask"]
labels = torch.tensor(labels)

dataset = TensorDataset(input_ids, attention_mask, labels)
dataloader = DataLoader(dataset, batch_size=8, shuffle=True)

In [10]:
optimizer = optim.AdamW(model.parameters(), lr=5e-5)
loss_fn = torch.nn.CrossEntropyLoss()


In [11]:
# Training loop
epochs = 3  # You can increase if needed
for epoch in range(epochs):
    model.train()
    total_loss = 0
    
    for batch in dataloader:
        batch_inputs, batch_masks, batch_labels = [b.to(device) for b in batch]
        
        optimizer.zero_grad()
        outputs = model(input_ids=batch_inputs, attention_mask=batch_masks, labels=batch_labels)
        
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    
    print(f"Epoch {epoch+1}, Loss: {total_loss / len(dataloader)}")

Epoch 1, Loss: 0.02621420647576451
Epoch 2, Loss: 0.001980334903113544
Epoch 3, Loss: 0.0008671746612526477


In [14]:
model.save_pretrained("./new_model")
tokenizer.save_pretrained("./new_model")
print("Model training completed and saved.")

Model training completed and saved.
