### Hugging Face Transformers: Images ###

In [11]:
import os
import glob
import numpy as np
import pandas as pd

# PyTorch packages
import torch
import torch.nn as nn

# Hugging Face
from transformers import pipeline
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import set_seed
from datasets import load_dataset

# Appearance of the Notebook
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
np.set_printoptions(linewidth=110)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 1000)

# Import this module with autoreload
%load_ext autoreload
%autoreload 2
import nlptools as nlpt
from nlptools.fileutils import FileOP
from nlptools.imageproc import ImageData
print(f'NLP Tools package version:  {nlpt.__version__}')
print(f'PyTorch version:            {torch.__version__}')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
NLP Tools package version:  0.0.post1.dev37+g1eeedfd.d20250111
PyTorch version:            2.6.0a0+df5bbc09d1.nv24.11


In [3]:
# GPU checks
is_cuda = torch.cuda.is_available()
print(f'CUDA available: {is_cuda}')
print(f'Number of GPUs found:  {torch.cuda.device_count()}')

if is_cuda:
    print(f'Current device ID:     {torch.cuda.current_device()}')
    print(f'GPU device name:       {torch.cuda.get_device_name(0)}')
    print(f'CUDNN version:         {torch.backends.cudnn.version()}')
    device_str = 'cuda:0'
    torch.cuda.empty_cache() 
else:
    device_str = 'cpu'
device = torch.device(device_str)
print()
print(f'Device for model training/inference: {device}')

CUDA available: True
Number of GPUs found:  1
Current device ID:     0
GPU device name:       NVIDIA GeForce RTX 3070 Laptop GPU
CUDNN version:         90501

Device for model training/inference: cuda:0


In [6]:
# Directories and files
data_dir = os.path.join(os.environ.get('HOME'), 'data')

In [14]:
# Get a data set from HuggingFace
dataset_name = 'Matias12f/cats_and_dogs'
dataset = load_dataset(path=dataset_name)

Resolving data files:   0%|          | 0/176 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/21 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/24 [00:00<?, ?it/s]