<a href="https://colab.research.google.com/github/davidelgas/DataSciencePortfolio/blob/main/Language_Models/LLM_RAG_Semantic.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# === 1. Mount Google Drive ===
from google.colab import drive
drive.mount('/content/drive')


# === 2. Install Required Packages ===
!pip install -q sentence-transformers faiss-cpu pandas openai --upgrade

!pip install snowflake

import snowflake.connector

# === 3. RAG Pipeline ===
import os
import pandas as pd
from sentence_transformers import SentenceTransformer
import faiss
from openai import OpenAI

# === 4. Configuration ===
api_key_path = "/content/drive/Othercomputers/My Mac/CSCI_104/credentials/openaikey.txt"

# Read OpenAI API key
with open(api_key_path, 'r') as file:
    openai_api_key = file.read().strip()

# Create OpenAI client
client = OpenAI(api_key=openai_api_key)

# === 5. Load Corpus from Snowflake ===
import snowflake.connector

# Load credentials from a text file (format: KEY=VALUE per line)
sf_creds_path = '/content/drive/Othercomputers/My Mac/Git/credentials/snowflake_credentials.txt'

sf_env = {}
with open(sf_creds_path, 'r') as f:
    for line in f:
        if '=' in line:
            key, value = line.strip().split('=', 1)
            sf_env[key.strip()] = value.strip()

# Connect to Snowflake
conn = snowflake.connector.connect(
    user=sf_env['USER'],
    password=sf_env['PASSWORD'],
    account=sf_env['ACCOUNT'],
    database='E9_CORPUS',
    schema='E9_CORPUS_SCHEMA',
    warehouse='COMPUTE_WH'  # or use your default warehouse
)

cur = conn.cursor()
cur.execute("SELECT THREAD_ID, THREAD_TITLE, THREAD_FIRST_POST, THREAD_ALL_POSTS FROM E9_CORPUS.E9_CORPUS_SCHEMA.E9_FORUM_CORPUS")
rows = cur.fetchall()

# Convert to DataFrame
df = pd.DataFrame(rows, columns=['thread_id', 'thread_title', 'thread_first_post', 'thread_all_posts'])
print(f"Loaded {len(df)} threads from Snowflake.")

df["full_text"] = (
    df["thread_title"].fillna("") + "\n\n" +
    df["thread_first_post"].fillna("") + "\n\n" +
    df["thread_all_posts"].fillna("")
)


# === 6. Embed Corpus ===
model = SentenceTransformer("all-MiniLM-L6-v2")
corpus_embeddings = model.encode(df["full_text"].tolist(), show_progress_bar=True)

# === 7. Create FAISS Index ===
dimension = corpus_embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(corpus_embeddings)

# === 8. Ask a Question ===
question = "How do I remove the steering wheel in an E9?"
question_embedding = model.encode([question])

# === 9. Retrieve Top Matches + Distances ===
top_k = 5
distances, indices = index.search(question_embedding, top_k)

retrieved_texts = []
print("\n=== Retrieved Threads with Distances ===\n")
for i, (idx, dist) in enumerate(zip(indices[0], distances[0])):
    title = df.iloc[idx]["thread_title"]
    print(f"[{i+1}] Distance: {dist:.4f} | Title: {title}")
    retrieved_texts.append(df.iloc[idx]["full_text"])

# === 10. Format Prompt with Context ===
context = "\n\n".join([f"Thread {i+1}:\n{text}" for i, text in enumerate(retrieved_texts)])
rag_prompt = f"""You are an expert on BMW E9 maintenance. Use the following forum threads to answer the question.

{context}

Question: {question}
Answer:"""

# === 11a. Generate Answer WITH context ===
response_with_context = client.chat.completions.create(
    model="gpt-3.5-turbo",
    messages=[{"role": "user", "content": rag_prompt}],
    temperature=0.2
)

# === 11b. Generate Answer WITHOUT context (baseline) ===
baseline_prompt = f"""You are an expert on BMW E9 maintenance.

Question: {question}
Answer:"""

response_without_context = client.chat.completions.create(
    model="gpt-3.5-turbo",
    messages=[{"role": "user", "content": baseline_prompt}],
    temperature=0.2
)

# === 12. Output both answers ===
print("\n=== ANSWER WITH RAG CONTEXT ===\n")
print(response_with_context.choices[0].message.content)

print("\n=== BASELINE ANSWER (No RAG) ===\n")
print(response_without_context.choices[0].message.content)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Loaded 8578 threads from Snowflake.


Batches:   0%|          | 0/269 [00:00<?, ?it/s]


=== Retrieved Threads with Distances ===

[1] Distance: 0.6596 | Title: Formuling steering wheel with BMW adapter from E9
[2] Distance: 0.7089 | Title: Nardi Steering Wheel
[3] Distance: 0.7424 | Title: Steering wheel on eBay
[4] Distance: 0.8357 | Title: "Early" E9 steering wheel
[5] Distance: 0.8773 | Title: Here is my manual steering box and other E9-stuff

=== ANSWER WITH RAG CONTEXT ===

To remove the steering wheel in an E9, you will need to first remove the horn button cover. In most cases, you can carefully insert your fingernail underneath the cover and pop it off without causing any damage. Once the horn button cover is removed, you will likely see a nut that needs to be loosened to release the steering wheel. Make sure to disconnect any electrical connections if your steering wheel has any controls or buttons. Once the nut is removed, you can gently pull the steering wheel towards you to release it from the steering column. Be sure to handle the steering wheel with care to 

In [3]:
print(f"Corpus rows loaded: {len(df)}")

Corpus rows loaded: 8578


Now using llama

In [5]:
# === STEP 1: INSTALL DEPENDENCIES ===
!pip install -q sentence-transformers faiss-cpu pandas transformers accelerate bitsandbytes

# === STEP 2: MOUNT GOOGLE DRIVE ===
from google.colab import drive
drive.mount('/content/drive')

# === STEP 3: IMPORTS ===
import os
import pandas as pd
import faiss
import torch
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForCausalLM

# === STEP 4: LOAD EMBEDDING MODEL ===
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

# === STEP 5: LOAD LLAMA-2 MODEL FROM HUGGING FACE ===
llama_model_name = "meta-llama/Llama-2-7b-chat-hf"  # You must accept model access on Hugging Face
tokenizer = AutoTokenizer.from_pretrained(llama_model_name, use_auth_token=True)
llama_model = AutoModelForCausalLM.from_pretrained(
    llama_model_name,
    torch_dtype=torch.float16,
    device_map="auto"
)

# === STEP 6: LOAD CSV CORPUS FROM DRIVE ===
csv_path = "/content/drive/Othercomputers/My Mac/Git/Language_Models/datasets/e9/e9_forum_corpus.csv"
df = pd.read_csv(csv_path)

# === STEP 7: PREPARE TEXT COLUMN ===
df["full_text"] = (
    df["thread_title"].fillna("") + "\n\n" +
    df["thread_first_post"].fillna("") + "\n\n" +
    df["thread_all_posts"].fillna("")
)

# === STEP 8: EMBED CORPUS ===
corpus_embeddings = embedding_model.encode(df["full_text"].tolist(), show_progress_bar=True)

# === STEP 9: CREATE FAISS INDEX ===
dimension = corpus_embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(corpus_embeddings)

# === STEP 10: ASK A QUESTION ===
question = "How do I remove the steering wheel in an E9?"
question_embedding = embedding_model.encode([question])

# === STEP 11: RETRIEVE RELEVANT THREADS ===
top_k = 5
distances, indices = index.search(question_embedding, top_k)
retrieved_texts = df.iloc[indices[0]]["full_text"].tolist()
retrieved_titles = df.iloc[indices[0]]["thread_title"].tolist()

# === STEP 12: FORMAT PROMPT FOR LLAMA ===
context = "\n\n".join([f"Thread {i+1}: {retrieved_titles[i]}\n{retrieved_texts[i]}" for i in range(len(retrieved_texts))])
rag_prompt = f"""### Instruction:
You are an expert on BMW E9 maintenance. Use only the following forum threads to answer the question.

{context}

### Question:
{question}

### Answer:"""

# === STEP 13: GENERATE RAG-BASED ANSWER WITH LLAMA ===
inputs = tokenizer(rag_prompt, return_tensors="pt").to(llama_model.device)
outputs = llama_model.generate(
    **inputs,
    max_new_tokens=512,
    temperature=0.2,
    do_sample=True,
    top_p=0.95
)
response_with_context = tokenizer.decode(outputs[0], skip_special_tokens=True)

# === STEP 14: GENERATE BASELINE ANSWER WITHOUT CONTEXT ===
baseline_prompt = f"""### Instruction:
You are an expert on BMW E9 maintenance.

### Question:
{question}

### Answer:"""

inputs_base = tokenizer(baseline_prompt, return_tensors="pt").to(llama_model.device)
outputs_base = llama_model.generate(
    **inputs_base,
    max_new_tokens=512,
    temperature=0.2,
    do_sample=True,
    top_p=0.95
)
response_without_context = tokenizer.decode(outputs_base[0], skip_special_tokens=True)

# === STEP 15: DISPLAY RESULTS ===
print("\n=== ANSWER WITH RAG CONTEXT (LLaMA) ===\n")
print(response_with_context)

print("\n=== BASELINE ANSWER (LLaMA) ===\n")
print(response_without_context)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


OSError: You are trying to access a gated repo.
Make sure to have access to it at https://huggingface.co/meta-llama/Llama-2-7b-chat-hf.
403 Client Error. (Request ID: Root=1-68190e34-107841924c6404ef19e1c9c0;41034c54-ac6b-46d4-ac57-3304c985bbb9)

Cannot access gated repo for url https://huggingface.co/meta-llama/Llama-2-7b-chat-hf/resolve/main/config.json.
Access to model meta-llama/Llama-2-7b-chat-hf is restricted and you are not in the authorized list. Visit https://huggingface.co/meta-llama/Llama-2-7b-chat-hf to ask for access.