In [4]:
import pickle

# Load the pickle file
with open('Vectordatabase/financial_chunks_metadata_sentences.pkl', 'rb') as file:
    text_chunks = pickle.load(file)

In [5]:
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer

# Initialize SentenceTransformer model
text_embedder = SentenceTransformer('all-MiniLM-L6-v2')

# Load the existing FAISS index
index = faiss.read_index('Vectordatabase/financial_docs_text_index_sentences.faiss')

def retrieve_similar(query, k=1):
    # Get query embedding
    query_embedding = text_embedder.encode([query])
    # Search in FAISS index
    distances, indices = index.search(query_embedding.astype(np.float32), k)
    return indices, distances

  from tqdm.autonotebook import tqdm, trange


In [6]:
from langchain_community.llms import Ollama

llm = Ollama(model="llama3.2:1b")

def generate_response(query):
    indices, distances = retrieve_similar(query)
    context = "\n".join([text_chunks[idx]['raw_text'] for _, idx in enumerate(indices[0])])
    # Prepare the input for the T5 model
    input_text = f"question: {query} context: {context}. Let me know if the context was useful."



    response = llm.invoke(input_text)

    return context, response

In [3]:
import pandas as pd

# Function to convert text file to DataFrame
def text_to_dataframe(file_path):
    questions = []
    answers = []
    
    with open(file_path, 'r', encoding='utf-8') as file:
        content = file.read().strip()
        entries = content.split('\n\n')  # Split on double newline for each Q&A
        
        for entry in entries:
            if entry.strip():  # Ensure the entry is not empty
                lines = entry.split('\n')
                if len(lines) >= 2:  # Ensure there are at least 2 lines (Q and A)
                    question = lines[0].replace('Q: ', '').strip()
                    answer = lines[1].replace('A: ', '').strip()
                    questions.append(question)
                    answers.append(answer)

    # Create a DataFrame
    df = pd.DataFrame({'Question': questions, 'Answer': answers})
    return df

# Example usage
file_path = "../Akshay_Work/Answers_10k.txt"
df = text_to_dataframe(file_path)

                                             Question  \
0   What is Portillo’s business model and what typ...   
1   How many restaurants did Portillo’s operate as...   
2   What are Portillo’s growth plans for restauran...   
3   What impact did inflation have on Portillo’s f...   
4   How does Portillo’s plan to maintain profitabi...   
..                                                ...   
57  What initiatives has Portillo’s implemented to...   
58  How does Portillo’s address potential food saf...   
59  How does Portillo’s manage potential real esta...   
60  How did Portillo’s manage cash flow and profit...   
61  What are Portillo’s long-term sustainability g...   

                                               Answer  
0   Portillo’s operates as a fast-casual restauran...  
1   As of December 31, 2023, Portillo's operated 8...  
2   Portillo’s aims to grow its restaurant base by...  
3   Portillo's faced 5.5% commodity price inflatio...  
4   To manage increasing food and c

In [7]:
df['RAG_context'], df['simple_RAG'] = zip(*df['Question'].apply(generate_response))

In [8]:
df

Unnamed: 0,Question,Answer,RAG_context,simple_RAG
0,What is Portillo’s business model and what typ...,Portillo’s operates as a fast-casual restauran...,Overview of Portillo's\nPortillo’s serves icon...,"Yes, the provided context is extremely helpful..."
1,How many restaurants did Portillo’s operate as...,"As of December 31, 2023, Portillo's operated 8...",The 53rd week in fiscal 2023 included Christma...,"Yes, the context provided was useful. It outli..."
2,What are Portillo’s growth plans for restauran...,Portillo’s aims to grow its restaurant base by...,"In fiscal 2023, we opened 12 new restaurants. ...",The provided context seems useful for understa...
3,What impact did inflation have on Portillo’s f...,Portillo's faced 5.5% commodity price inflatio...,Commodity inflation was\n5.5% for the year end...,The provided context is indeed useful in under...
4,How does Portillo’s plan to maintain profitabi...,To manage increasing food and commodity prices...,Portillo's Inc. Form 10-K | 34Table of Content...,"Yes, the context provided is very useful. It p..."
...,...,...,...,...
57,What initiatives has Portillo’s implemented to...,Portillo’s launched a DEI initiative guided by...,"At Portillo’s, we embrace Diversity, Equity, I...","Yes, the context you provided was very helpful..."
58,How does Portillo’s address potential food saf...,Portillo’s has implemented a comprehensive foo...,Portillo's Inc. Form 10-K | 34Table of Content...,"Yes, the context provided is useful. It seems ..."
59,How does Portillo’s manage potential real esta...,Portillo’s manages real estate challenges by r...,Portillo's Inc. Form 10-K | 53PORTILLO'S INC.\...,The context provided is a Form 10-K report fro...
60,How did Portillo’s manage cash flow and profit...,Portillo’s achieved strong cash flow and profi...,Portillo's Inc. Form 10-K | 53PORTILLO'S INC.\...,The context provided is indeed helpful in unde...


In [9]:
df.rename(columns={'simple_RAG': 'RAG_helpful_context'}, inplace=True)

In [10]:
df.to_csv("LLM_test.csv")