# 🚀 Initial setup

In [3]:
!echo $OLLAMA_HOST




## 📦 Requirements

In [None]:
%pip install -r requirements.txt

## 🤖 Ollama model pulling

In [None]:
!ollama pull gpt-oss:20b

## 🔥 Hello world with torch & cuda

In [2]:
#hello world with torch & cuda
import torch
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))
print(torch.cuda.get_device_capability(0))
# get vram
print(torch.cuda.get_device_properties(0).total_memory / (1024 ** 3), "GB")

True
NVIDIA H200 NVL
(9, 0)
139.80145263671875 GB


## 🧠 Hello world with transformers


In [6]:
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

model_name = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")

with torch.no_grad():
    outputs = model(**inputs)


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

In [7]:
!curl http://h01:37118

Ollama is running

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


## 💬 Chat with Ollama GPT-OSS

In [None]:
import requests
import json

import os
# Simple function to chat with Ollama GPT-OSS
def chat_with_gpt_oss(message, model="gpt-oss:20b"):
    url = os.environ.get("OLLAMA_HOST", "http://localhost:11434") + "/api/generate"
    
    data = {
        "model": model,
        "prompt": message,
        "stream": False
    }
    
    try:
        response = requests.post(url, json=data)
        if response.status_code == 200:
            result = response.json()
            return result.get('response', 'No response received')
        else:
            return f"Error: {response.status_code} - {response.text}"
    except Exception as e:
        return f"Error connecting to Ollama: {str(e)}"

# Test the chat function
test_message = "Hello! Can you tell me a short joke?"
print(f"🤖 User: {test_message}")
response = chat_with_gpt_oss(test_message)
print(f"💬 GPT-OSS: {response}")

### 🎯 Interactive Chat Session

You can now interact with the GPT-OSS model. Try different prompts!

In [None]:
# Interactive chat - you can modify this message
your_message = "What are the benefits of open source AI models?"
print(f"🤖 User: {your_message}")
response = chat_with_gpt_oss(your_message)
print(f"💬 GPT-OSS: {response}")

In [None]:
%pip install open-web-ui

In [8]:
!curl http://h01:2440

Ollama is running

In [11]:
!service iptables status

Redirecting to /bin/systemctl status iptables.service
Unit iptables.service could not be found.


In [10]:
!curl --noproxy "*" http://h01:2440
!curl --noproxy "*" http://h01:2440

Ollama is running

In [5]:
!module avail


-------------------------- /opt/ohpc/pub/modulefiles --------------------------
   AOCL-BLAS/5.0-GCC-14.2.0
   ASE/3.22.1-gfbf-2023b
   ASE/3.25.0-gfbf-2024a
   ASE/3.25.0-gfbf-2025a                                    (D)
   ATK/2.38.0-GCCcore-13.2.0
   Abseil/20240116.1-GCCcore-13.2.0
   Abseil/20240722.0-GCCcore-13.3.0
   Abseil/20250512.1-GCCcore-14.2.0                         (D)
   Albumentations/1.4.4-foss-2023b-CUDA-12.4.0
   Albumentations/1.4.10-foss-2023b-CUDA-12.4.0             (D)
   Anaconda3/2021.05
   Anaconda3/2022.05
   Anaconda3/2022.10
   Anaconda3/2023.07-2
   Anaconda3/2024.02-1                                      (D)
   Arrow/16.1.0-gfbf-2023b
   Arrow/17.0.0-gfbf-2024a
   Arrow/19.0.1-gfbf-2025a                                  (D)
   Autoconf/2.71-GCCcore-13.2.0
   Autoconf/2.72-GCCcore-13.3.0
   Autoconf/2.72-GCCcore-14.2.0                             (D)
   Automake/1.16.5-GCCcore-13.2.0
   Automake/1.16.5-GCCcore-13.3.0
   Automake/1.17-GCCcore-14.2.0      

In [1]:
from utils.chat import load_md_prompts

prompts = load_md_prompts("claimify", True)

In [2]:
prompts

{'decomposition': ChatPromptTemplate(input_variables=['excerpt', 'question', 'sentence'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template='You are an assistant for a group of fact-checkers. You will be given a question, which was asked about a source text (it may be referred to by other names, \ne.g., a dataset). You will also be given an excerpt from a response to the question. If it contains "[...]", this means that you are NOT seeing all sentences in the response. You will also be given a particular sentence from the response. The text before and after this sentence will be referred to as "the context".\n\nYour task is to identify all specific and verifiable propositions in the sentence and ensure that each proposition is decontextualized. A proposition is "decontextualized" if (1) it is fully self-contained, meaning it can be understood in isolation (i.e., without th

In [3]:
from utils.chat import LangChainJSONChat
chat = LangChainJSONChat(prompts["element_coverage"], parse_output=False)

In [4]:
chat({"claims": "text", "elements": "text", "excerpt": "text", "question": "text"})

'I don’t have the necessary structured data to evaluate coverage. Please provide the actual dictionaries for Claims (C) and Elements (E), plus the question and excerpt. Use this format:\n\nQuestion: <your question>\nExcerpt from response: <the excerpt>\nClaims (C): {\n  1: "<claim 1>",\n  2: "<claim 2>"\n}\nElements (E): {\n  1: "<element 1>",\n  2: "<element 2>"\n}\n\nOnce you provide these, I’ll assess each element as fully covered by C or not fully covered by C.'