In [None]:
pip install torch transformers accelerate sentence-transformers faiss-cpu

In [None]:
# Load the model
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "ZeroXClem/Llama-3.1-8B-Athena-Apollo-exp"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype="auto", device_map="auto")


In [None]:
# Test the Model
prompt = "Explain quantum entanglement like I'm 10 years old."
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs, max_new_tokens=200)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

In [None]:
# Load and chunk the log file

from pathlib import Path

# Load your log file
log_path = Path("/workspace/ai-playground/data/sample_log.txt")
log_text = log_path.read_text()

# Simple chunking by lines or size
chunk_size = 500  # characters or adjust based on token estimates
chunks = [log_text[i:i+chunk_size] for i in range(0, len(log_text), chunk_size)]


In [None]:
# embed the chunks for search
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np

# Use a sentence-transformer for embedding chunks
embedder = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = embedder.encode(chunks)

# Create FAISS index
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(np.array(embeddings))

In [None]:
# define a query + retrieve relavant chunks
def retrieve_chunks(query, top_k=3):
    query_embedding = embedder.encode([query])
    distances, indices = index.search(query_embedding, top_k)
    return [chunks[i] for i in indices[0]]

query = "What are the main errors in the test log?"
retrieved_context = "\n".join(retrieve_chunks(query))

In [None]:
# set the model prompt
prompt = f"""You are a helpful assistant reading test logs.

Context:
{retrieved_context}

Question: {query}
Answer:"""

In [None]:
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs, max_new_tokens=300)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))