<a href="https://colab.research.google.com/github/jerryk42/SemEval-Food-Hazard-Detection-Challenge/blob/main/Submission.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
from google.colab import drive
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import pandas as pd
import torch
from torch.utils.data import DataLoader, Dataset
from sklearn.preprocessing import LabelEncoder
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm

# Mount Google Drive
drive.mount('/content/drive')

config = {
    'max_len': 256,
    'batch_size': 16,
    'learning_rate': 0.00005,
    'epochs': 30,  # Reduced maximum epochs
    'early_stopping_patience': 6,  # Early stopping patience
    'model_name': "dmis-lab/biobert-base-cased-v1.1"  # BioBERT model name
}


# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Load labeled training data
url = "https://raw.githubusercontent.com/food-hazard-detection-semeval-2025/food-hazard-detection-semeval-2025.github.io/refs/heads/main/data/incidents_train.csv"
df_train = pd.read_csv(url)
df_train = df_train.drop(df_train.columns[0], axis=1)

# Clean text
import re

def clean_text(text):
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
    text = text.lower()
    return ' '.join(text.split())

df_train['text'] = df_train['text'].apply(clean_text)

# Load unlabeled validation data
unlabeled_path = '/content/drive/My Drive/AUEB/incidents.csv'
df_unlabeled = pd.read_csv(unlabeled_path)

df_unlabeled['text'] = df_unlabeled['text'].apply(clean_text)

# Tokenizer
tokenizer = AutoTokenizer.from_pretrained(config['model_name'])

# Custom Dataset Class
class TextDataset(Dataset):
    def __init__(self, texts, tokenizer, max_len, labels=None):
        self.texts = texts
        self.tokenizer = tokenizer
        self.max_len = max_len
        self.labels = labels

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = str(self.texts[idx])
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_len,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )
        item = {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten()
        }
        if self.labels is not None:
            item['label'] = torch.tensor(self.labels[idx], dtype=torch.long)
        return item

# Encode labels
targets_st1 = ['hazard-category', 'product-category']
targets_st2 = ['hazard', 'product']
label_encoders = {}
for target in targets_st1 + targets_st2:
    le = LabelEncoder()
    df_train[target] = le.fit_transform(df_train[target])
    label_encoders[target] = le

# Training and validation data
train_texts = df_train['text'].values
val_texts = df_unlabeled['text'].values

# Train and validate for each target
for target in targets_st1 + targets_st2:
    print(f"\nTraining for target: {target}")
    train_labels = df_train[target].values
    train_dataset = TextDataset(train_texts, tokenizer, config['max_len'], train_labels)
    val_dataset = TextDataset(val_texts, tokenizer, config['max_len'])

    train_loader = DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=config['batch_size'], shuffle=False)

    num_labels = len(label_encoders[target].classes_)
    model = AutoModelForSequenceClassification.from_pretrained(config['model_name'], num_labels=num_labels).to(device)
    optimizer = optim.Adam(model.parameters(), lr=config['learning_rate'])
    criterion = nn.CrossEntropyLoss()
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3)

    best_loss = float('inf')
    early_stop_counter = 0

    for epoch in range(config['epochs']):
        print(f"Epoch {epoch+1}/{config['epochs']} - Target: {target}")
        model.train()
        total_loss = 0

        for batch in tqdm(train_loader):
            optimizer.zero_grad()
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['label'].to(device)
            outputs = model(input_ids, attention_mask=attention_mask)
            loss = criterion(outputs.logits, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        avg_loss = total_loss / len(train_loader)
        print(f"Average Training Loss: {avg_loss}")

        # Validation loop
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for batch in val_loader:
                input_ids = batch['input_ids'].to(device)
                attention_mask = batch['attention_mask'].to(device)
                outputs = model(input_ids, attention_mask=attention_mask)

        scheduler.step(avg_loss)

        # Check early stopping
        if avg_loss < best_loss:
            best_loss = avg_loss
            early_stop_counter = 0
            torch.save(model.state_dict(), f'/content/drive/My Drive/AUEB/best_model_{target}.pt')
        else:
            early_stop_counter += 1
            print(f"Early stopping counter: {early_stop_counter}/{config['early_stopping_patience']}")

        if early_stop_counter >= config['early_stopping_patience']:
            print(f"Early stopping triggered for {target}.")
            break

    # Generate predictions on the unlabeled dataset
    print(f"\nGenerating predictions for target: {target}")
    model.load_state_dict(torch.load(f'/content/drive/My Drive/AUEB/best_model_{target}.pt'))
    model.eval()
    predictions = []
    with torch.no_grad():
        for batch in tqdm(val_loader):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            outputs = model(input_ids, attention_mask=attention_mask)
            _, preds = torch.max(outputs.logits, dim=1)
            predictions.extend(preds.cpu().numpy())

    # Save predictions for each target
    df_unlabeled[f'predicted_{target}'] = label_encoders[target].inverse_transform(predictions)

# Save ST1 and ST2 predictions separately
st1_output_path = '/content/drive/My Drive/AUEB/ST1_predictions.csv'
st2_output_path = '/content/drive/My Drive/AUEB/ST2_predictions.csv'

df_st1 = df_unlabeled[['text', 'predicted_hazard-category', 'predicted_product-category']]
df_st2 = df_unlabeled[['text', 'predicted_hazard', 'predicted_product']]

df_st1.to_csv(st1_output_path, index=False)
df_st2.to_csv(st2_output_path, index=False)

print(f"ST1 predictions saved to {st1_output_path}")
print(f"ST2 predictions saved to {st2_output_path}")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Using device: cuda

Training for target: hazard-category


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-base-cased-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/30 - Target: hazard-category


100%|██████████| 318/318 [01:44<00:00,  3.05it/s]


Average Training Loss: 0.5553847384644942
Epoch 2/30 - Target: hazard-category


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.24715398916236633
Epoch 3/30 - Target: hazard-category


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.17716041635780497
Epoch 4/30 - Target: hazard-category


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.14300014221829907
Epoch 5/30 - Target: hazard-category


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.12332023199173217
Epoch 6/30 - Target: hazard-category


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.12088875183777052
Epoch 7/30 - Target: hazard-category


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.08753933498501168
Epoch 8/30 - Target: hazard-category


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.0919520901281785
Early stopping counter: 1/6
Epoch 9/30 - Target: hazard-category


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.06687380690288688
Epoch 10/30 - Target: hazard-category


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.05834036160944287
Epoch 11/30 - Target: hazard-category


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.058827113601783255
Early stopping counter: 1/6
Epoch 12/30 - Target: hazard-category


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.06635835043992261
Early stopping counter: 2/6
Epoch 13/30 - Target: hazard-category


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.06480995052070913
Early stopping counter: 3/6
Epoch 14/30 - Target: hazard-category


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.04684006825287873
Epoch 15/30 - Target: hazard-category


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.04329001270900861
Epoch 16/30 - Target: hazard-category


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.048154167587496924
Early stopping counter: 1/6
Epoch 17/30 - Target: hazard-category


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.03437990628134565
Epoch 18/30 - Target: hazard-category


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.018798213623978093
Epoch 19/30 - Target: hazard-category


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.014573888237435816
Epoch 20/30 - Target: hazard-category


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.05025359633914634
Early stopping counter: 1/6
Epoch 21/30 - Target: hazard-category


100%|██████████| 318/318 [01:44<00:00,  3.05it/s]


Average Training Loss: 0.07091998262440716
Early stopping counter: 2/6
Epoch 22/30 - Target: hazard-category


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.06011526574016074
Early stopping counter: 3/6
Epoch 23/30 - Target: hazard-category


100%|██████████| 318/318 [01:44<00:00,  3.05it/s]


Average Training Loss: 0.0451803937888962
Early stopping counter: 4/6
Epoch 24/30 - Target: hazard-category


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.014328113882641349
Epoch 25/30 - Target: hazard-category


100%|██████████| 318/318 [01:44<00:00,  3.05it/s]


Average Training Loss: 0.010117055660804862
Epoch 26/30 - Target: hazard-category


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.008667651622639617
Epoch 27/30 - Target: hazard-category


100%|██████████| 318/318 [01:44<00:00,  3.05it/s]


Average Training Loss: 0.0078015452457310735
Epoch 28/30 - Target: hazard-category


100%|██████████| 318/318 [01:44<00:00,  3.05it/s]


Average Training Loss: 0.0077190328447613865
Epoch 29/30 - Target: hazard-category


100%|██████████| 318/318 [01:44<00:00,  3.05it/s]


Average Training Loss: 0.006572308321190564
Epoch 30/30 - Target: hazard-category


100%|██████████| 318/318 [01:44<00:00,  3.05it/s]


Average Training Loss: 0.0059684857979794646

Generating predictions for target: hazard-category


  model.load_state_dict(torch.load(f'/content/drive/My Drive/AUEB/best_model_{target}.pt'))
100%|██████████| 36/36 [00:03<00:00,  9.73it/s]



Training for target: product-category


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-base-cased-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/30 - Target: product-category


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 2.188765874448812
Epoch 2/30 - Target: product-category


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 1.4394085168276194
Epoch 3/30 - Target: product-category


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.8948356110438611
Epoch 4/30 - Target: product-category


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.6091960489562472
Epoch 5/30 - Target: product-category


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.4227321868844377
Epoch 6/30 - Target: product-category


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.2920932123179799
Epoch 7/30 - Target: product-category


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.22418861847324004
Epoch 8/30 - Target: product-category


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.1585750683070213
Epoch 9/30 - Target: product-category


100%|██████████| 318/318 [01:44<00:00,  3.03it/s]


Average Training Loss: 0.12256313285368653
Epoch 10/30 - Target: product-category


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.1283585603272765
Early stopping counter: 1/6
Epoch 11/30 - Target: product-category


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.12321317716246857
Early stopping counter: 2/6
Epoch 12/30 - Target: product-category


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.09615027408024776
Epoch 13/30 - Target: product-category


100%|██████████| 318/318 [01:44<00:00,  3.03it/s]


Average Training Loss: 0.07327921406743731
Epoch 14/30 - Target: product-category


100%|██████████| 318/318 [01:44<00:00,  3.03it/s]


Average Training Loss: 0.06740042055245349
Epoch 15/30 - Target: product-category


100%|██████████| 318/318 [01:44<00:00,  3.03it/s]


Average Training Loss: 0.084821394531191
Early stopping counter: 1/6
Epoch 16/30 - Target: product-category


100%|██████████| 318/318 [01:44<00:00,  3.03it/s]


Average Training Loss: 0.06211942207557296
Epoch 17/30 - Target: product-category


100%|██████████| 318/318 [01:44<00:00,  3.03it/s]


Average Training Loss: 0.05854416869100441
Epoch 18/30 - Target: product-category


100%|██████████| 318/318 [01:44<00:00,  3.03it/s]


Average Training Loss: 0.04330922761908586
Epoch 19/30 - Target: product-category


100%|██████████| 318/318 [01:44<00:00,  3.03it/s]


Average Training Loss: 0.06269520769663062
Early stopping counter: 1/6
Epoch 20/30 - Target: product-category


100%|██████████| 318/318 [01:44<00:00,  3.03it/s]


Average Training Loss: 0.07094431524648208
Early stopping counter: 2/6
Epoch 21/30 - Target: product-category


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.05089650205453659
Early stopping counter: 3/6
Epoch 22/30 - Target: product-category


100%|██████████| 318/318 [01:44<00:00,  3.03it/s]


Average Training Loss: 0.04698036411471823
Early stopping counter: 4/6
Epoch 23/30 - Target: product-category


100%|██████████| 318/318 [01:44<00:00,  3.03it/s]


Average Training Loss: 0.029790958869944578
Epoch 24/30 - Target: product-category


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.010486631827361577
Epoch 25/30 - Target: product-category


100%|██████████| 318/318 [01:44<00:00,  3.03it/s]


Average Training Loss: 0.007355811028066238
Epoch 26/30 - Target: product-category


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.0043748202548822114
Epoch 27/30 - Target: product-category


100%|██████████| 318/318 [01:44<00:00,  3.03it/s]


Average Training Loss: 0.0038038843145532498
Epoch 28/30 - Target: product-category


100%|██████████| 318/318 [01:44<00:00,  3.03it/s]


Average Training Loss: 0.003811578682745344
Early stopping counter: 1/6
Epoch 29/30 - Target: product-category


100%|██████████| 318/318 [01:44<00:00,  3.03it/s]


Average Training Loss: 0.003620365917911085
Epoch 30/30 - Target: product-category


100%|██████████| 318/318 [01:44<00:00,  3.03it/s]


Average Training Loss: 0.004297280124185092
Early stopping counter: 1/6

Generating predictions for target: product-category


  model.load_state_dict(torch.load(f'/content/drive/My Drive/AUEB/best_model_{target}.pt'))
100%|██████████| 36/36 [00:03<00:00,  9.63it/s]



Training for target: hazard


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-base-cased-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/30 - Target: hazard


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 2.3658450154013604
Epoch 2/30 - Target: hazard


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 1.1629930944562707
Epoch 3/30 - Target: hazard


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.830802950516062
Epoch 4/30 - Target: hazard


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.6444885319087116
Epoch 5/30 - Target: hazard


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.5085708345960146
Epoch 6/30 - Target: hazard


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.41226685614729264
Epoch 7/30 - Target: hazard


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.3387121555377852
Epoch 8/30 - Target: hazard


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.29704311042555476
Epoch 9/30 - Target: hazard


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.24675774453910734
Epoch 10/30 - Target: hazard


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.18960983360643094
Epoch 11/30 - Target: hazard


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.15649065168855605
Epoch 12/30 - Target: hazard


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.14408216257462478
Epoch 13/30 - Target: hazard


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.12576954356693434
Epoch 14/30 - Target: hazard


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.1381472565816516
Early stopping counter: 1/6
Epoch 15/30 - Target: hazard


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.11282264793499625
Epoch 16/30 - Target: hazard


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.11551124589816439
Early stopping counter: 1/6
Epoch 17/30 - Target: hazard


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.06635128909139162
Epoch 18/30 - Target: hazard


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.06473167002590596
Epoch 19/30 - Target: hazard


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.04187132777725259
Epoch 20/30 - Target: hazard


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.06828941580906815
Early stopping counter: 1/6
Epoch 21/30 - Target: hazard


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.08288865458365213
Early stopping counter: 2/6
Epoch 22/30 - Target: hazard


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.059280859779071576
Early stopping counter: 3/6
Epoch 23/30 - Target: hazard


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.04830586074070958
Early stopping counter: 4/6
Epoch 24/30 - Target: hazard


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.024489144082605815
Epoch 25/30 - Target: hazard


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.01342237001481179
Epoch 26/30 - Target: hazard


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.01106527345416095
Epoch 27/30 - Target: hazard


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.010440319987034437
Epoch 28/30 - Target: hazard


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.009342343528878014
Epoch 29/30 - Target: hazard


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.00838751729604138
Epoch 30/30 - Target: hazard


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.008077849992572874

Generating predictions for target: hazard


  model.load_state_dict(torch.load(f'/content/drive/My Drive/AUEB/best_model_{target}.pt'))
100%|██████████| 36/36 [00:03<00:00,  9.68it/s]



Training for target: product


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-base-cased-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/30 - Target: product


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 6.256507617122722
Epoch 2/30 - Target: product


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 5.253463043356842
Epoch 3/30 - Target: product


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 4.521450587788468
Epoch 4/30 - Target: product


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 3.9069172683751807
Epoch 5/30 - Target: product


100%|██████████| 318/318 [01:44<00:00,  3.03it/s]


Average Training Loss: 3.366147315352218
Epoch 6/30 - Target: product


100%|██████████| 318/318 [01:44<00:00,  3.03it/s]


Average Training Loss: 2.8940544000961497
Epoch 7/30 - Target: product


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 2.4795399289461053
Epoch 8/30 - Target: product


100%|██████████| 318/318 [01:44<00:00,  3.03it/s]


Average Training Loss: 2.1356173523192137
Epoch 9/30 - Target: product


100%|██████████| 318/318 [01:44<00:00,  3.03it/s]


Average Training Loss: 1.826778085726612
Epoch 10/30 - Target: product


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 1.5624344167094562
Epoch 11/30 - Target: product


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 1.3380469129707828
Epoch 12/30 - Target: product


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 1.1515217273021645
Epoch 13/30 - Target: product


100%|██████████| 318/318 [01:44<00:00,  3.03it/s]


Average Training Loss: 0.9838794406105138
Epoch 14/30 - Target: product


100%|██████████| 318/318 [01:44<00:00,  3.03it/s]


Average Training Loss: 0.8341012394184586
Epoch 15/30 - Target: product


100%|██████████| 318/318 [01:44<00:00,  3.03it/s]


Average Training Loss: 0.7101489749250922
Epoch 16/30 - Target: product


100%|██████████| 318/318 [01:44<00:00,  3.03it/s]


Average Training Loss: 0.6116838118635446
Epoch 17/30 - Target: product


100%|██████████| 318/318 [01:44<00:00,  3.03it/s]


Average Training Loss: 0.5163960320839904
Epoch 18/30 - Target: product


100%|██████████| 318/318 [01:44<00:00,  3.03it/s]


Average Training Loss: 0.43407176587285484
Epoch 19/30 - Target: product


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.3652357617217415
Epoch 20/30 - Target: product


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.31270106679191756
Epoch 21/30 - Target: product


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.26775837741183034
Epoch 22/30 - Target: product


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.2275939227532853
Epoch 23/30 - Target: product


100%|██████████| 318/318 [01:44<00:00,  3.03it/s]


Average Training Loss: 0.2336877233132064
Early stopping counter: 1/6
Epoch 24/30 - Target: product


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.19465563905501515
Epoch 25/30 - Target: product


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.21578890505779838
Early stopping counter: 1/6
Epoch 26/30 - Target: product


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.17051010284537696
Epoch 27/30 - Target: product


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.11154657602895918
Epoch 28/30 - Target: product


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.0811959656347878
Epoch 29/30 - Target: product


100%|██████████| 318/318 [01:44<00:00,  3.04it/s]


Average Training Loss: 0.06553522920903732
Epoch 30/30 - Target: product


100%|██████████| 318/318 [01:44<00:00,  3.03it/s]


Average Training Loss: 0.053228428666392026

Generating predictions for target: product


  model.load_state_dict(torch.load(f'/content/drive/My Drive/AUEB/best_model_{target}.pt'))
100%|██████████| 36/36 [00:03<00:00,  9.70it/s]


ST1 predictions saved to /content/drive/My Drive/AUEB/ST1_predictions.csv
ST2 predictions saved to /content/drive/My Drive/AUEB/ST2_predictions.csv


In [None]:
# mipws na dokimasw na kanw to classification me to instructor?

In [7]:
import pandas as pd

# Load your CSV file (adjust the path if needed)
input_csv_path = '/content/drive/My Drive/AUEB/ST2_predictions.csv'  # Example: ST1 predictions
output_csv_path = '/content/drive/My Drive/AUEB/submission.csv'  # Output as "submission.csv"

# Read the CSV and drop the first column
df = pd.read_csv(input_csv_path)
df = df.iloc[:, 1:]  # Drop the first column

# Save the modified DataFrame
df.to_csv(output_csv_path, index=False)

print(f"Submission saved to {output_csv_path}")


Submission saved to /content/drive/My Drive/AUEB/submission.csv


In [6]:
from google.colab import drive
import pandas as pd
# Mount Google Drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [8]:
import pandas as pd

# File paths
st1_path = '/content/drive/My Drive/AUEB/ST1_predictions.csv'
st2_path = '/content/drive/My Drive/AUEB/ST2_predictions.csv'

# Save paths
st1_output_path = '/content/drive/My Drive/AUEB/ST1_predictions_cleaned.csv'
st2_output_path = '/content/drive/My Drive/AUEB/ST2_predictions_cleaned.csv'

# Process ST1_predictions.csv
df_st1 = pd.read_csv(st1_path)  # Load the file
df_st1 = df_st1.iloc[:, 1:]  # Drop the first column
df_st1.columns = ['hazard-category', 'product-category']  # Rename columns
df_st1.to_csv(st1_output_path, index=False)  # Save back to Google Drive
print(f"Processed ST1_predictions.csv saved to: {st1_output_path}")

# Process ST2_predictions.csv
df_st2 = pd.read_csv(st2_path)  # Load the file
df_st2 = df_st2.iloc[:, 1:]  # Drop the first column
df_st2.columns = ['hazard', 'product']  # Rename columns
df_st2.to_csv(st2_output_path, index=False)  # Save back to Google Drive
print(f"Processed ST2_predictions.csv saved to: {st2_output_path}")


Processed ST1_predictions.csv saved to: /content/drive/My Drive/AUEB/ST1_predictions_cleaned.csv
Processed ST2_predictions.csv saved to: /content/drive/My Drive/AUEB/ST2_predictions_cleaned.csv
