In [None]:
# Check CPU information
import os
import psutil

# Number of CPU cores
cpu_cores = psutil.cpu_count(logical=True)
print(f"Number of CPU cores: {cpu_cores}")

# CPU Memory
cpu_memory = psutil.virtual_memory()
print(f"Total CPU Memory: {cpu_memory.total / (1024**3):.2f} GB")
print(f"Available CPU Memory: {cpu_memory.available / (1024**3):.2f} GB")
print(f"Used CPU Memory: {cpu_memory.used / (1024**3):.2f} GB")
print(f"Memory Usage Percentage: {cpu_memory.percent}%")

# TPU information
# TPU is accessed through TensorFlow, so we'll check TPU usage
import tensorflow as tf

# Check TPU
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    print("TPU Available.")
    print(f"TPU Name: {tpu.master()}")

    # You can also get TPU information through TensorFlow
    tpu_details = tf.tpu.experimental.initialize_tpu_system(tpu)
    print("TPU initialized.")
except ValueError as e:
    print("TPU not available.")

# If you want to check RAM specifically in a TPU environment, you can do:
if 'COLAB_GPU' in os.environ:
    print("Using GPU Runtime")
elif 'COLAB_TPU_ADDR' in os.environ:
    print("Using TPU Runtime")
else:
    print("Using CPU Runtime")


In [None]:
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from sklearn import metrics


In [None]:
from huggingface_hub import login

# Use your token directly
hf_token = "token"
login(token=hf_token)


In [None]:
# Step 1: Load the pre-trained model and tokenizer
from transformers import AutoTokenizer, AutoModelForSequenceClassification

model_name = "PavanDeepak/text-classification-model-iab-categories-mixed-bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)


In [None]:
# Move the model to the appropriate device (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

In [None]:
# Step 2: Load the test data
test_data = pd.read_csv('/kaggle/input/trial-fibe1/dataset/test.csv', encoding='ISO-8859-1')  # Update with your actual path

In [None]:
# Step 3: Tokenize the test data
test_encodings = tokenizer(
    test_data['text'].tolist(),
    truncation=True,
    padding=True,
    max_length=512,
    return_tensors='pt'
)

In [None]:
import numpy as np

batch_size = 32  # Adjust this based on your available memory
predictions = []

# Process the input in batches
for i in range(0, len(test_encodings['input_ids']), batch_size):
    batch_input_ids = test_encodings['input_ids'][i:i + batch_size].to(device)
    batch_attention_mask = test_encodings['attention_mask'][i:i + batch_size].to(device)

    with torch.no_grad():
        outputs = model(batch_input_ids, attention_mask=batch_attention_mask)
        batch_predictions = torch.argmax(outputs.logits, dim=1).cpu().numpy()
        predictions.extend(batch_predictions)

predictions = np.array(predictions)


In [None]:
# Step 5: Prepare the submission file
submission = pd.DataFrame({
    'Index': test_data['Index'],  # Ensure your test.csv has an 'Index' column
    'target': predictions
})

submission.to_csv('submissions.csv', index=False)
print("Submission file created: submissions.csv")