In [None]:
# Google colab stuff
!pip install transformers datasets torch evaluate accelerate

In [None]:
from datasets import load_dataset
from transformers import EfficientNetForImageClassification, EfficientNetImageProcessor

dataset = load_dataset('bsgreenb/cats_vs_dogs')
model_name = 'google/efficientnet-b0'  # or choose the variant you prefer

preprocessor = EfficientNetImageProcessor.from_pretrained(model_name)
model = EfficientNetForImageClassification.from_pretrained(model_name)

def preprocess_images(examples):
    # Apply the preprocessor to each image
    examples['pixel_values'] = [preprocessor(image, return_tensors="pt").pixel_values.squeeze() for image in examples['image']]
    return examples

encoded_dataset = dataset.map(preprocess_images, batched=True)

In [None]:
train_valid_split = encoded_dataset['train'].train_test_split(test_size=0.2)
train_dataset = train_valid_split['train']
valid_dataset = train_valid_split['test']
test_dataset = encoded_dataset['train']

from transformers import Trainer, TrainingArguments

import evaluate

accuracy = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return accuracy.compute(predictions=predictions, references=labels)

training_args = TrainingArguments(
    output_dir="./results",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
    evaluation_strategy="epoch",
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=valid_dataset,
    compute_metrics=compute_metrics,
)

trainer.train()

print(trainer.evaluate())

In [None]:
import numpy as np

# Generate predictions
predictions = trainer.predict(test_dataset)
pred_logits = predictions.predictions
pred_labels = np.argmax(pred_logits, axis=-1)  # Convert logits to labels

test_ids = test_dataset['id']

prediction_pairs = zip(test_ids, pred_labels)

import csv

# Define the CSV file path
output_csv_path = 'predictions.csv'

# Write the predictions to a CSV file
with open(output_csv_path, 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['id', 'label'])  # Write the header
    writer.writerows(prediction_pairs)  # Write the ID, label pairs
