In [1]:
from llama_index.core.node_parser import SentenceSplitter
from llama_index.llms.llama_cpp import LlamaCPP
from llama_index.core import Settings
from llama_index.core import StorageContext
from llama_index.core import VectorStoreIndex
from llama_index.vector_stores.postgres import PGVectorStore
from llama_index.core import Document
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings
from transformers import AutoTokenizer
from llama_index.core import set_global_tokenizer
from llama_index.core.node_parser import HTMLNodeParser
from pathlib import Path
from bs4 import BeautifulSoup
import psycopg 
from llama_index.core import PromptTemplate
import os
from dotenv import load_dotenv

In [2]:
load_dotenv("/setup/on.env")
pg_user = os.getenv("POSTGRES_USER")
pg_db = os.getenv("POSTGRES_DB")
pg_pwd = os.getenv("POSTGRES_PASSWORD")

In [3]:
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-14B-Instruct")

Settings.embed_model = HuggingFaceEmbedding(
    model_name = "BAAI/bge-base-en-v1.5"
)

set_global_tokenizer(tokenizer.encode)

In [4]:
data_dir = "/notebooks/data/ttlg-html"
tags = []
html_docs = []
for ext in ["*.html"]:
    for path in Path(data_dir).rglob(ext):
        with open(path, "rb") as file:
            html_text = file.read().decode("windows-1252")
            soup = BeautifulSoup(html_text)
            tags.extend([tag.name for tag in soup.find_all()])
            html_docs.append(Document(text=html_text))


In [5]:
len(html_docs)

143

In [6]:
# tags = ["p", "h1", "h2", "h3", "h4", "h5", "h6", "li", "b", "i", "u", "section", "blockquote", 'pagetitle']
tags = ["blockquote", 'pagetitle']

parser = HTMLNodeParser(tags=tags)
nodes = parser.get_nodes_from_documents(html_docs)
print(len(nodes))

143


In [7]:
pg_pwd

'grover'

In [8]:
def drop(name):
    with psycopg.connect(
        f"host=postgres dbname={pg_db} user={pg_user} password={pg_pwd}"
    ) as conn:
        with conn.cursor() as cur:
            cur.execute(f"""
                drop table if exists {name};
                """)
            conn.commit()


drop("data_html")

In [9]:
vector_store = PGVectorStore.from_params(
    database=pg_db,
    host="postgres",
    password=pg_pwd,
    port=5432,
    user=pg_user,
    table_name="html",
    embed_dim=768,
    hnsw_kwargs={
        "hnsw_m": 14,
        "hnsw_ef_construction": 72,
        "hnsw_ef_search": 52,
        "hnsw_dist_method": "vector_cosine_ops",
    },
)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

In [10]:
index = VectorStoreIndex(nodes, storage_context=storage_context, show_progress=True, embed_model=Settings.embed_model)

Generating embeddings:   0%|          | 0/143 [00:00<?, ?it/s]

In [11]:
def completion_to_prompt(completion):
   return f"<|im_start|>system\n<|im_end|>\n<|im_start|>user\n{completion}<|im_end|>\n<|im_start|>assistant\n"

def messages_to_prompt(messages):
    prompt = ""
    for message in messages:
        if message.role == "system":
            prompt += f"<|im_start|>system\n{message.content}<|im_end|>\n"
        elif message.role == "user":
            prompt += f"<|im_start|>user\n{message.content}<|im_end|>\n"
        elif message.role == "assistant":
            prompt += f"<|im_start|>assistant\n{message.content}<|im_end|>\n"

    if not prompt.startswith("<|im_start|>system"):
        prompt = "<|im_start|>system\n" + prompt
        

    prompt = prompt + '<|im_start|>"You are Qwen, created by Alibaba Cloud. You are a helpful assistant. You answer user queries about the Through The Looking Glass (TTLG) forums using retreived html data scraped from the boards.\n'

    return prompt

llm = LlamaCPP(
    model_url="https://huggingface.co/bartowski/Qwen2.5-14B-Instruct-GGUF/resolve/main/Qwen2.5-14B-Instruct-Q6_K.gguf",
    temperature=0.1,
    max_new_tokens=1024,
    context_window=12384,
    generate_kwargs={"repeat_penalty": 1.15, "top_k": 0, "top_p": 0.5, "min_p": 0.1},
    model_kwargs={
        "n_gpu_layers": -1,
    },
    messages_to_prompt=messages_to_prompt,
    completion_to_prompt=completion_to_prompt,
    verbose=True,
)

Settings.llm = llm

ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    yes
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 CUDA devices:
  Device 0: NVIDIA GeForce RTX 4060 Ti, compute capability 8.9, VMM: yes
llama_load_model_from_file: using device CUDA0 (NVIDIA GeForce RTX 4060 Ti) - 14449 MiB free
llama_model_loader: loaded meta data with 38 key-value pairs and 579 tensors from /llamaindex_cache/models/Qwen2.5-14B-Instruct-Q6_K.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = qwen2
llama_model_loader: - kv   1:                               general.type str              = model
llama_model_loader: - kv   2:                               general.name str              = Qwen2.5 14B Instruct
llama_model_loader: - kv   3:                           general.finetune str              = Instruct
llama_model_loader: - kv   4:       

In [13]:
print(
    index.as_query_engine().query(
        'Using the provided context, answer the following query: Provide a summary of popular discussion topics in the TTLG forums.'
    )
)

Llama.generate: 8 prefix-match hit, remaining 1501 prompt tokens to eval
llama_perf_context_print:        load time =   19933.47 ms
llama_perf_context_print: prompt eval time =       0.00 ms /  1501 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   202 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   14776.14 ms /  1703 tokens


Based on the provided context, it seems that one of the popular discussion topics in TTLG forums is whether or not to create a new forum for Arkane's upcoming game "The Crossing". There are differing opinions among members about this. Some believe that since TTLG has followed other games from Arkane such as Arx and DoMM, they should include The Crossing too, especially given the involvement of ex-LGSers and ex-IonStorm people in its development. Others argue against it due to lack of information on what the game will be like at this point.

Another topic that seems popular is feedback about forum organization and management decisions made by admins. There's a discussion around whether certain forums are necessary or not, with some members suggesting re-organization into sub-forums for companies employing former LG staff as an alternative to adding new individual game forums each time.

Lastly, there appears to be interest in the development of The Crossing itself, with forum users expr