In [2]:
!curl -fsSL https://ollama.com/install.sh | sh

>>> Installing ollama to /usr/local
>>> Downloading Linux amd64 bundle
######################################################################## 100.0%
>>> Creating ollama user...
>>> Adding ollama user to video group...
>>> Adding current user to ollama group...
>>> Creating ollama systemd service...
>>> The Ollama API is now available at 127.0.0.1:11434.
>>> Install complete. Run "ollama" from the command line.


In [9]:
!ollama --version
!ollama list

ollama version is 0.13.3
NAME                  ID              SIZE      MODIFIED      
qwen3-embedding:4b    df5bd2e3c74c    2.5 GB    3 minutes ago    


In [6]:
!ollama serve > ollama.log 2>&1 &

In [11]:
!pip install -U --no-cache-dir faiss-gpu-cu11
!pip install ollama

Collecting ollama
  Downloading ollama-0.6.1-py3-none-any.whl.metadata (4.3 kB)
Downloading ollama-0.6.1-py3-none-any.whl (14 kB)
Installing collected packages: ollama
Successfully installed ollama-0.6.1


In [12]:
import ollama
import json
import numpy as np
from pathlib import Path
from tqdm.auto import tqdm
import faiss
print(f"FAISS version: {faiss.__version__}")
print("GPUs detected:", faiss.get_num_gpus())

FAISS version: 1.13.0
GPUs detected: 1


In [13]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [15]:
COPRUS_PATH = Path("/content/drive/MyDrive/dataset/corpus.jsonl")
doc = []
with open(COPRUS_PATH, "r") as f:
    for line in f:
        doc.append(json.loads(line)["text"])
print(len(doc))

183408


In [None]:
MODEL = "qwen3-embedding:4b"
all_embs = []
for text in tqdm(doc):
    emb = ollama.embeddings(MODEL, text)["embedding"]
    all_embs.append(emb)
embeddings = np.array(all_embs, dtype="float32")
print(embeddings.shape)

  0%|          | 0/183408 [00:00<?, ?it/s]

In [None]:
faiss.normalize_L2(embeddings)
dim = embeddings.shape[1]
index = faiss.IndexFlatIP(dim)
index.add(embeddings)

In [None]:
OUT_DIR = Path("/content/drive/MyDrive/indexes/clapnq-ollama-faiss")
OUT_DIR.mkdir(parents=True, exist_ok=True)
faiss.write_index(index, str(OUT_DIR / "index.faiss"))
np.save(str(OUT_DIR / "emb.npy"), embeddings)
print("Saved to:", OUT_DIR)
