## Zero Shot Classification with Ollama ##
https://github.com/AndreasKarasenko/scikit-ollama

In [32]:
# Imports
import os
import pandas as pd
import numpy as np
import ollama

# PyTorch packages
import torch

# Scikit-ollama:https://andreaskarasenko.github.io/skollama-docs/
from skollama.models.ollama.classification.zero_shot import ZeroShotOllamaClassifier

# Appearance of the Notebook
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
np.set_printoptions(linewidth=110)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 1000)

# Import this module with autoreload
%load_ext autoreload
%autoreload 2
import nlptools as nlpt
print(f'NLP Tools package version:  {nlpt.__version__}')
print(f'PyTorch version:            {torch.__version__}')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
NLP Tools package version:  0.0.post1.dev42+ga7dec38.d20250114
PyTorch version:            2.6.0a0+df5bbc09d1.nv24.11


In [33]:
# GPU checks
is_cuda = torch.cuda.is_available()
print(f'CUDA available: {is_cuda}')
print(f'Number of GPUs found:  {torch.cuda.device_count()}')

if is_cuda:
    print(f'Current device ID:     {torch.cuda.current_device()}')
    print(f'GPU device name:       {torch.cuda.get_device_name(0)}')
    print(f'CUDNN version:         {torch.backends.cudnn.version()}')
    device_str = 'cuda:0'
    torch.cuda.empty_cache() 
else:
    device_str = 'cpu'
device = torch.device(device_str)
print()
print(f'Device for model training/inference: {device}')

CUDA available: True
Number of GPUs found:  1
Current device ID:     0
GPU device name:       NVIDIA GeForce RTX 3070 Laptop GPU
CUDNN version:         90501

Device for model training/inference: cuda:0


In [35]:
# Data directory maintained by docker (not available on host)
data_dir = os.path.join(os.environ.get('HOME'), 'data')

In [25]:
# Download a data set for text classification
# Original data set 
!wget -nc https://lazyprogrammer.me/course_files/nlp/bbc_text_cls.csv -P /app/data
df = pd.read_csv(os.path.join(data_dir, 'bbc_text_cls.csv'))

In [31]:
# Get a small data set for sentiment analysis
text, labels = get_classification_dataset()
print(labels)

['positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral']


In [28]:
clf = ZeroShotOllamaClassifier(model=model_name, host=ollama_host)
clf.fit(text, labels)

In [29]:
preds = clf.predict(text)

100%|██████████| 30/30 [00:04<00:00,  6.67it/s]


In [30]:
print(preds)

['positive' 'positive' 'negative' 'negative' 'positive' 'positive' 'neutral' 'negative' 'positive' 'positive'
 'negative' 'positive' 'negative' 'neutral' 'positive' 'negative' 'negative' 'neutral' 'positive' 'neutral'
 'positive' 'positive' 'positive' 'positive' 'negative' 'positive' 'negative' 'positive' 'neutral' 'neutral']
