In [1]:
import os
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"]="1"

import torch
import torch.nn as nn
import torch.nn.functional as F

import pandas as pd

from transformers import AutoModelForCausalLM, AutoTokenizer
from sentence_transformers import util, SentenceTransformer

# -----

device = "mps:0"
data_df_save_path = "localRAG.csv"
embeddings_df_save_path = "localRAG_embs.csv"
modelpath = "google/gemma-2-2b-it" # "ministral/Ministral-3b-instruct"
emb_model_name = "mixedbread-ai/mxbai-embed-large-v1" 


In [2]:
#quantization_config = BitsAndBytesConfig(load_in_8bit=True)
model = AutoModelForCausalLM.from_pretrained(
    modelpath,    
    device_map="auto",
    # quantization_config=quantization_config,
    # attn_implementation="flash_attention_2",
    torch_dtype=torch.bfloat16,
)
model.to(device)

embedding_model = SentenceTransformer(model_name_or_path=emb_model_name, 
                                      device=device)
embedding_model.to(device)

# Load (slow) Tokenizer, fast tokenizer sometimes ignores added tokens
# Requires sentencepiece
tokenizer = AutoTokenizer.from_pretrained(modelpath)   
#tokenizer.add_special_tokens(dict(eos_token="</s>"))

#-----

df = pd.read_csv(data_df_save_path)
embeddings_df = pd.read_csv(embeddings_df_save_path)
embeddings = torch.from_numpy(embeddings_df.values).to(device=device, dtype=torch.float32)




Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [3]:
def retrieve_topk_texts(query: str,
                        k_resources_to_return: int=5,
                        ):
    #query prompting specific to mixedbread model
    query = f'Represent this sentence for searching relevant passages: {query}'
    query_embedding = embedding_model.encode(query, convert_to_tensor=True)
    dot_scores = util.dot_score(a=query_embedding, b=embeddings)[0]
    top_results_dot_product = torch.topk(dot_scores, k=k_resources_to_return)
    texts = []
    for idx in top_results_dot_product[1].to("cpu"):
        index = int(idx)
        texts.append(df.iloc[index]["sentence_chunk"])
    return texts


def make_prompt(query,
                rag_text=None,
                ):
    sys_prompt = "You are a university professor. The user will ask you to explain a concept. Your task is to explain the concept as fully as you can, and maintain a clear and concise chain of thought. Afterwards, summarize what you have written and write a report."
    if rag_text:
        query = f"You may use the following pieces of text for context:\n {rag_text}\n Now use the context items to answer the following prompt:\n {query}"
    prompt = f"{sys_prompt}\n {query}"
    chat = [{ "role": "user", "content": prompt},]
    prompt = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
    return prompt


def ask(query, 
        max_new_tokens = 512, 
        num_answers = 1,
        top_k_rag = 4,
        ):
    topk = retrieve_topk_texts(query, k_resources_to_return=top_k_rag)
    rag_text = ""
    for chunk in topk:
        rag_text += chunk + "\n"
    prompt = make_prompt(query, rag_text)
    model_inputs = tokenizer([prompt]*num_answers, return_tensors="pt").to(device)
    generated_ids = model.generate(**model_inputs, max_new_tokens=max_new_tokens, do_sample=True)
    out = tokenizer.batch_decode(generated_ids)
    print(f"RAG context:\n {rag_text}")
    for output in out:
        print("----------")
        print(output.replace(prompt,""))


def ask_noRAG(query,
              max_new_tokens = 512, 
              num_answers = 1,
              ):
    prompt = make_prompt(query)
    model_inputs = tokenizer([prompt]*num_answers, return_tensors="pt").to(device)
    generated_ids = model.generate(**model_inputs, max_new_tokens=max_new_tokens, do_sample=True)
    out = tokenizer.batch_decode(generated_ids)
    for output in out:
        print("----------")
        print(output.replace(prompt,""))


In [None]:
input_text = "How should I determine the intrinsic value of a company? Can you provide some example computations for me to consider?"

# Generate text without RAG
ask_noRAG(input_text, num_answers=1)

print("----------------- No RAG above ----------------- Yes RAG below -----------------")
# Generate text with RAG
ask(input_text, num_answers=1, top_k_rag=4)


----------
<bos>Let's break down the concept of intrinsic value in a company.  "Intrinsic value" is essentially the real, underlying worth (potential) of a company, independent of market sentiment, investor psychology, or current stock price. Determining this value gives you a theoretical measure of "true" value that can help you compare different businesses and determine if current market prices are fair. 

Now, here are some of the key approaches to calculate intrinsic value, along with examples:

**1. Discounted Cash Flow (DCF) Analysis:**

* **Theory:** Businesses generate cash, and this cash represents value. DCF analyses future cash flows and discounts them back to today's dollars using a discount rate that reflects the risks inherent to the company.
* **Steps:**
    1. **Forecasting:** Project cash flows for 5-10 years (longer for more mature companies).  Consider revenue, expenses, capital expenditures, and working capital.
    2. **Discount Rate:**  Calculate the appropriate d