## Zero Shot Classification with Ollama ##
https://github.com/AndreasKarasenko/scikit-ollama

In [6]:
# Imports
import os
import pandas as pd
import numpy as np
import ollama

# PyTorch packages
import torch

# Scikit-ollama:https://andreaskarasenko.github.io/skollama-docs/
from skollama.models.ollama.classification.zero_shot import ZeroShotOllamaClassifier
from skllm.datasets import get_classification_dataset

# Appearance of the Notebook
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
np.set_printoptions(linewidth=110)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 1000)

# Import this module with autoreload
%load_ext autoreload
%autoreload 2
import nlptools as nlpt
print(f'NLP Tools package version:  {nlpt.__version__}')
print(f'PyTorch version:            {torch.__version__}')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
NLP Tools package version:  0.0.post1.dev45+gb231578.d20250116
PyTorch version:            2.6.0a0+df5bbc09d1.nv24.11


In [2]:
# GPU checks
is_cuda = torch.cuda.is_available()
print(f'CUDA available: {is_cuda}')
print(f'Number of GPUs found:  {torch.cuda.device_count()}')

if is_cuda:
    print(f'Current device ID:     {torch.cuda.current_device()}')
    print(f'GPU device name:       {torch.cuda.get_device_name(0)}')
    print(f'CUDNN version:         {torch.backends.cudnn.version()}')
    device_str = 'cuda:0'
    torch.cuda.empty_cache() 
else:
    device_str = 'cpu'
device = torch.device(device_str)
print()
print(f'Device for model training/inference: {device}')

CUDA available: True
Number of GPUs found:  1
Current device ID:     0
GPU device name:       NVIDIA GeForce RTX 3060 Laptop GPU
CUDNN version:         90501

Device for model training/inference: cuda:0


In [3]:
# Data directory maintained by docker (not available on host)
data_dir = os.path.join(os.environ.get('HOME'), 'data')

In [18]:
# Get a small data set for sentiment analysis
text, labels = get_classification_dataset()
label_set = list(set(labels))
print(f'Available labels for this data set: {label_set}')
df = pd.DataFrame({'text': text, 'label': labels})
display(df.head())

Available labels for this data set: ['positive', 'neutral', 'negative']


Unnamed: 0,text,label
0,I was absolutely blown away by the performance...,positive
1,The special effects in 'Star Battles: Nebula C...,positive
2,'The Lost Symphony' was a masterclass in chara...,positive
3,I was pleasantly surprised by 'Love in the Tim...,positive
4,I went into 'Marble Street' with low expectati...,positive


### Pull the model from the ollama server ###

In [19]:
ollama_host = os.environ.get('OLLAMA_HOST')
model_name = 'llama3.2:1b'
ollama.pull(model=model_name)
print(ollama.list())

models=[Model(model='llama3.2:1b', modified_at=datetime.datetime(2025, 1, 16, 23, 24, 34, 831822, tzinfo=TzInfo(UTC)), digest='baf6a787fdffd633537aa2eb51cfd54cb93ff08e28040095462bb63daf552878', size=1321098329, details=ModelDetails(parent_model='', format='gguf', family='llama', families=['llama'], parameter_size='1.2B', quantization_level='Q8_0'))]


In [21]:
clf = ZeroShotOllamaClassifier(model=model_name, host=ollama_host)
clf.fit(text, label_set)

In [22]:
preds = clf.predict(df['text'].values)

100%|██████████| 30/30 [00:05<00:00,  5.96it/s]


In [25]:
display(pd.DataFrame({'label': labels, 'predictions': preds}).sample(5))

Unnamed: 0,label,predictions
25,neutral,neutral
4,positive,positive
26,neutral,negative
16,negative,neutral
28,neutral,positive
