In [2]:
!git clone https://github.com/endoflife-date/endoflife.date.git


Cloning into 'endoflife.date'...
remote: Enumerating objects: 43732, done.[K
remote: Counting objects: 100% (398/398), done.[K
remote: Compressing objects: 100% (265/265), done.[K
remote: Total 43732 (delta 286), reused 136 (delta 132), pack-reused 43334 (from 4)[K
Receiving objects: 100% (43732/43732), 10.94 MiB | 8.59 MiB/s, done.
Resolving deltas: 100% (34326/34326), done.


In [7]:
import frontmatter
from pathlib import Path

def parse_eol_markdown(path="endoflife.date/products"):
    entries = []
    for file in Path(path).glob("*.md"):
        post = frontmatter.load(file)
        product = post.get("title", file.stem)
        for release in post.get("releases", []):
            cycle = release.get("releaseCycle", release.get("cycle", "unknown"))
            release_date = release.get("releaseDate", "unknown")
            eol = release.get("eol", "unknown")
            if isinstance(eol, bool):
                eol = "TBD" if eol is False else str(eol)
            latest = release.get("latest", "")
            lts = release.get("lts", False)

            description = (
                f"{product} {cycle}, released on {release_date}, "
                f"reaches EOL on {eol}."
            )

            entries.append({
                "product": product,
                "cycle": cycle,
                "release": release_date,
                "eol": eol,
                "latest": latest,
                "lts": lts,
                "description": description
            })
    return entries


In [10]:
data=parse_eol_markdown()

In [18]:
list(filter(lambda x:(x['product']).lower().startswith('red'),data))

[{'product': 'Red Hat Satellite',
  'cycle': '6.17',
  'release': datetime.date(2025, 5, 6),
  'eol': 'TBD',
  'latest': '6.17.1',
  'lts': False,
  'description': 'Red Hat Satellite 6.17, released on 2025-05-06, reaches EOL on TBD.'},
 {'product': 'Red Hat Satellite',
  'cycle': '6.16',
  'release': datetime.date(2024, 11, 5),
  'eol': datetime.date(2026, 5, 31),
  'latest': '6.16.5.2',
  'lts': False,
  'description': 'Red Hat Satellite 6.16, released on 2024-11-05, reaches EOL on 2026-05-31.'},
 {'product': 'Red Hat Satellite',
  'cycle': '6.15',
  'release': datetime.date(2024, 4, 23),
  'eol': datetime.date(2025, 11, 30),
  'latest': '6.15.5.3',
  'lts': False,
  'description': 'Red Hat Satellite 6.15, released on 2024-04-23, reaches EOL on 2025-11-30.'},
 {'product': 'Red Hat Satellite',
  'cycle': '6.14',
  'release': datetime.date(2023, 11, 8),
  'eol': datetime.date(2025, 5, 31),
  'latest': '6.14.4.5',
  'lts': False,
  'description': 'Red Hat Satellite 6.14, released on 2023

In [23]:
from sentence_transformers import SentenceTransformer
import numpy as np

model = SentenceTransformer("all-MiniLM-L6-v2")  # small, fast

entries = parse_eol_markdown()
descriptions = [e["description"] for e in entries]
embeddings = model.encode(descriptions, show_progress_bar=True)


Batches: 100%|██████████| 190/190 [00:51<00:00,  3.68it/s]


In [29]:
import faiss

dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(np.array(embeddings))  # FAISS needs np.float32 arrays

# Save the index and entries
faiss.write_index(index, "eol_index.faiss")

import pickle
with open("eol_entries.pkl", "wb") as f:
    pickle.dump(entries, f)


In [30]:
!ls -lrt 

total 10000
-rw-rw-rw-   1 codespace root           15 Jul  8 04:12 README.md
-rw-rw-rw-   1 codespace codespace      64 Jul  8 04:22 requirements.txt
-rw-rw-rw-   1 codespace codespace   40446 Jul  8 04:37 Notebook.ipynb
drwxrwxrwx+ 14 codespace codespace    4096 Jul  8 05:22 endoflife.date
-rw-rw-rw-   1 codespace codespace     204 Jul  8 05:57 history
-rw-rw-rw-   1 codespace codespace   30865 Jul  8 06:06 Untitled.ipynb
-rw-rw-rw-   1 codespace codespace 9323565 Jul  8 06:07 eol_index.faiss
-rw-rw-rw-   1 codespace codespace  823104 Jul  8 06:07 eol_entries.pkl


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [11]:
import pickle
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer



model = SentenceTransformer("all-MiniLM-L6-v2")  # small, fast

# Load index and entries
index = faiss.read_index("eol_index.faiss")
with open("eol_entries.pkl", "rb") as f:
    entries = pickle.load(f)

def search_eol(query):
    query_vec = model.encode([query])
    D, I = index.search(np.array(query_vec), k=3)
    for i in I[0]:
        result = entries[i]
        print(f"📦 {result['product']} {result['cycle']}")
        print(f"🛠 Released: {result['release']}")
        print(f"⏳ EOL: {result['eol']}")
        print(f"🧲 LTS: {result['lts']}\n")


model = SentenceTransformer("all-MiniLM-L6-v2")

def search_eol(query, entries, faiss_index, threshold=0.60):
    query_vec = model.encode([query])
    D, I = faiss_index.search(np.array(query_vec), k=3)

    scores = 1 / (1 + D[0])  # Convert L2 distance to similarity (approx)

    if scores[0] < threshold:
        print("❌ No strong match found for your query.")
        print("💡 Try rephrasing or check spelling.")
        print("🧩 Closest matches:")
        for idx, score in zip(I[0], scores):
            entry = entries[idx]
            print(f" - {entry['product']} {entry['cycle']} (confidence: {score:.2f})")
        return

    top_entry = entries[I[0][0]]
    print(f"✅ Found: {top_entry['product']} {top_entry['cycle']}")
    print(f"📅 Release: {top_entry['release']}")
    print(f"📆 EOL: {top_entry['eol']}")

search_eol("When does RHEL 7 reach end of life?")


TypeError: search_eol() missing 2 required positional arguments: 'entries' and 'faiss_index'

In [12]:
!pip install chromadb

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting chromadb
  Downloading chromadb-1.0.15-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.0 kB)
Collecting build>=1.0.3 (from chromadb)
  Downloading build-1.2.2.post1-py3-none-any.whl.metadata (6.5 kB)
Collecting pybase64>=1.4.1 (from chromadb)
  Downloading pybase64-1.4.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.4 kB)
Collecting uvicorn>=0.18.3 (from uvicorn[standard]>=0.18.3->chromadb)
  Downloading uvicorn-0.35.0-py3-none-any.whl.metadata (6.5 kB)
Collecting posthog<6.0.0,>=2.4.0 (from chromadb)
  Downloading posthog-5.4.0-py3-none-any.whl.metadata (5.7 kB)
Collecting onnxruntime>=1.14.1 (from chromadb)
  Downloading onnxruntime-1.22.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.8 kB)
Collecting opentelemetry-api>=1.2.0 (from chromadb)
  Downloading opentelemetry_api-1.34.1-py3-none-any.whl.metadata (1.5 kB)
Collecting opentelemetry-exporter-otlp-proto-grpc>