In [9]:
from huggingface_hub import Repository
import tempfile, shutil, os

# ← change this to whatever folder you actually want to keep
PREFERRED = "no-ood-epochs-10-ense-1"

# 0) sanity check
cwd = os.getcwd()
print("Working in", cwd)

# 1) remove any existing model dirs (heuristic: contains *.lock files) except the one you want
for name in os.listdir(cwd):
    path = os.path.join(cwd, name)
    if os.path.isdir(path):
        # look for any files ending in .lock (metadata from hf download)
        if any(f.endswith(".lock") for f in os.listdir(path)):
            if name != PREFERRED:
                print(f"→ removing old folder {name}")
                shutil.rmtree(path)

# 2) make a temp dir to clone the full HF repo
tmp_dir = tempfile.mkdtemp()
print("Cloning into temp:", tmp_dir)
repo = Repository(
    local_dir=tmp_dir,
    clone_from="sawlachintan/gpt2-goemotions-ft",
    use_auth_token=True
)

# 3) copy only your preferred subfolder back into ./PREFERRED
src = os.path.join(tmp_dir, PREFERRED)
dst = os.path.join(cwd, PREFERRED)

# if it somehow already exists, nuke it
if os.path.isdir(dst):
    print(f"→ removing stale copy at {dst}")
    shutil.rmtree(dst)

print(f"Copying {src} → {dst}")
shutil.copytree(src, dst)

# 4) clean up the temp clone
print("Cleaning up temp dir")
shutil.rmtree(tmp_dir)

print("Done! Only keeping:", os.listdir(cwd))


For more details, please read https://huggingface.co/docs/huggingface_hub/concepts/git_vs_http.
Cloning https://huggingface.co/sawlachintan/gpt2-goemotions-ft into local empty directory.


Working in /Users/chintansawla/Desktop/nlp_project/clean_project
Cloning into temp: /var/folders/46/sjpfqcxn5k3_qybfjf14v03m0000gn/T/tmpnt_5na_6


Download file ood-epochs-10-ense-5/pytorch_model.bin:   0%|          | 24.7k/2.32G [00:00<?, ?B/s]
[A
Download file ood-epochs-10-ense-5/pytorch_model.bin:   0%|          | 2.61M/2.32G [00:01<15:40, 2.64MB/s]
Download file ood-epochs-10-ense-5/pytorch_model.bin:   0%|          | 7.64M/2.32G [00:03<15:52, 2.60MB/s]
[A
Download file ood-epochs-10-ense-5/pytorch_model.bin:   0%|          | 10.3M/2.32G [00:04<15:27, 2.67MB/s]
Download file ood-epochs-10-ense-5/pytorch_model.bin:   1%|          | 15.6M/2.32G [00:06<15:17, 2.69MB/s]
Download file ood-epochs-10-ense-5/pytorch_model.bin:   1%|          | 18.3M/2.32G [00:07<15:07, 2.72MB/s]
[A
Download file ood-epochs-10-ense-5/pytorch_model.bin:   1%|          | 23.2M/2.32G [00:09<15:42, 2.61MB/s]
[A
Download file ood-epochs-10-ense-5/pytorch_model.bin:   1%|          | 25.7M/2.32G [00:10<15:56, 2.57MB/s]
Download file ood-epochs-10-ense-5/pytorch_model.bin:   1%|          | 28.2M/2.32G [00:11<15:49, 2.59MB/s]
Download file ood-epochs-10-e

Copying /var/folders/46/sjpfqcxn5k3_qybfjf14v03m0000gn/T/tmpnt_5na_6/no-ood-epochs-10-ense-1 → /Users/chintansawla/Desktop/nlp_project/clean_project/no-ood-epochs-10-ense-1
Cleaning up temp dir
Done! Only keeping: ['.DS_Store', 'repo.ipynb', 'data_explore.ipynb', 'thresholds.csv', 'test.py', '.gitattributes', 'test_huggingface.ipynb', 'log', 'ood-10-epochs', 'venv', 'main.py', 'no-ood-epochs-10-ense-1']


In [10]:
from transformers import AutoTokenizer, AutoConfig, AutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained("./no-ood-epochs-10-ense-1")
config    = AutoConfig.from_pretrained(
    "gpt2",
    num_labels=28,
    finetuning_task="multi_label_classification",
    pad_token_id=tokenizer.eos_token_id
)
model = AutoModelForSequenceClassification.from_pretrained(
    "./no-ood-epochs-10-ense-1",
    config=config
)


Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at ./no-ood-epochs-10-ense-1 and are newly initialized: ['score.weight', 'transformer.h.0.attn.c_attn.bias', 'transformer.h.0.attn.c_attn.weight', 'transformer.h.0.attn.c_proj.bias', 'transformer.h.0.attn.c_proj.weight', 'transformer.h.0.ln_1.bias', 'transformer.h.0.ln_1.weight', 'transformer.h.0.ln_2.bias', 'transformer.h.0.ln_2.weight', 'transformer.h.0.mlp.c_fc.bias', 'transformer.h.0.mlp.c_fc.weight', 'transformer.h.0.mlp.c_proj.bias', 'transformer.h.0.mlp.c_proj.weight', 'transformer.h.1.attn.c_attn.bias', 'transformer.h.1.attn.c_attn.weight', 'transformer.h.1.attn.c_proj.bias', 'transformer.h.1.attn.c_proj.weight', 'transformer.h.1.ln_1.bias', 'transformer.h.1.ln_1.weight', 'transformer.h.1.ln_2.bias', 'transformer.h.1.ln_2.weight', 'transformer.h.1.mlp.c_fc.bias', 'transformer.h.1.mlp.c_fc.weight', 'transformer.h.1.mlp.c_proj.bias', 'transformer.h.1.mlp.c_proj.weight', 'transformer.h.10.

In [11]:
import torch
texts = ["I love sunny days but hate the rain.", "I'm feeling curious and excited!"]

# 2. Tokenize + batch into tensors
#    return_tensors="pt" gives you PyTorch tensors,
#    padding=True/truncation=True makes them all the same length
inputs = tokenizer(
    texts,
    return_tensors="pt",
    padding=True,
    truncation=True,
)

# 3. Move to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
inputs = {k: v.to(device) for k, v in inputs.items()}

# 4. Forward pass (no gradient needed for inference)
model.eval()
with torch.no_grad():
    outputs = model(**inputs)
    logits = outputs.logits      # shape: (batch_size, num_labels)

# 5. Convert logits → probabilities
probs = torch.sigmoid(logits)    # multi-label, independent scores

# 6. Threshold or inspect
threshold = 0.5
preds = (probs > threshold).long()  # 0/1 predictions per label

print("Scores:", probs)
print("Binary preds:", preds)


Scores: tensor([[0.5018, 0.5410, 0.4824, 0.3989, 0.5654, 0.2874, 0.6191, 0.4568, 0.8238,
         0.6015, 0.4471, 0.4068, 0.4024, 0.5305, 0.4335, 0.5351, 0.2971, 0.3326,
         0.7804, 0.6055, 0.6450, 0.3474, 0.7648, 0.6362, 0.6060, 0.6761, 0.3860,
         0.4552],
        [0.5344, 0.5606, 0.4227, 0.3549, 0.7709, 0.5586, 0.5748, 0.2480, 0.7037,
         0.6027, 0.4832, 0.4159, 0.3430, 0.3890, 0.5046, 0.6251, 0.4929, 0.4419,
         0.4972, 0.4933, 0.6439, 0.4950, 0.6389, 0.6756, 0.6051, 0.7122, 0.5622,
         0.4897]])
Binary preds: tensor([[1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1,
         1, 1, 0, 0],
        [1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1,
         1, 1, 1, 0]])
