In [1]:
import chromadb
from llama_index.core import PromptTemplate
from llama_index.core import Settings
from llama_index.core import SimpleDirectoryReader
from llama_index.core import StorageContext
from llama_index.core import VectorStoreIndex
from llama_index.core import Document
from llama_index.core.node_parser import SentenceSplitter
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.ollama import Ollama
from llama_index.vector_stores.chroma import ChromaVectorStore
from pathlib import Path
import os
import hashlib
from slack_sdk import WebClient
from slack_sdk.errors import SlackApiError
from slack_bolt.adapter.flask import SlackRequestHandler
from slack_bolt import App
from dotenv import find_dotenv, load_dotenv
from flask import Flask, request




In [9]:
#Check to see if Llama has been initialized
#llm = Ollama(model="llama3.1:latest")
llm = Ollama(model="llama3.1:latest", request_timeout=3000.0)
response = llm.complete("What is linear regression?")
print(response)

Linear regression is a fundamental concept in statistics and machine learning that helps predict a continuous outcome variable based on one or more predictor variables. It's a simple yet powerful technique for modeling the relationship between a dependent variable (y) and one or more independent variables (x).

**Key components:**

1. **Dependent variable (y):** The variable you're trying to predict or explain, often denoted as "y" or "target".
2. **Independent variables (x):** The variables that are used to predict the dependent variable, often denoted as "x" or "features".
3. **Relationship:** Linear regression assumes a linear relationship between the independent and dependent variables.

**How it works:**

The goal of linear regression is to find the best-fitting line (or hyperplane in higher dimensions) that minimizes the sum of squared errors between predicted values and actual observations. This is done by adjusting the coefficients of the independent variables, which are used t

In [10]:
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
Settings.llm = llm
Settings.embed_model = embed_model

In [18]:
# Define the path to the folder containing the PDFs
pdf_folder_path = "C:\AI BootCamp\MyRepo\RAG_LLAM3\PDF"
txt_folder_path = "C:\AI BootCamp\MyRepo\RAG_LLAM3\TXT"

# Check if the directorys exist
if not os.path.isdir(pdf_folder_path):
    raise ValueError(f"Directory {pdf_folder_path} does not exist.")

if not os.path.isdir(txt_folder_path):
    raise ValueError(f"Directory {txt_folder_path} does not exist.")

# Gather all PDF and TXT files in the folder
pdf_files = list(Path(pdf_folder_path).glob('*.pdf'))
txt_files = list(Path(txt_folder_path).glob('*.txt'))

# Initialize a reader to read PDF files
pdf_reader = SimpleDirectoryReader(input_dir= pdf_folder_path, recursive=True)
txt_reader = SimpleDirectoryReader(input_dir= txt_folder_path, recursive=True)

# Custom class to mimic PDF structure for TXT files
class TxtDocument:
    def __init__(self, text, doc_id):
        self.text = text
        self.doc_id = doc_id
        self.id_ = doc_id  # Add id_ attribute

    def get_doc_id(self):
        return self.doc_id

    def hash(self):
        return hashlib.md5(self.text.encode('utf-8')).hexdigest()
    
    def get_metadata_str(self, mode=None):
        if mode:
            return f"Document ID: {self.doc_id}, Mode: {mode}"
        return f"Document ID: {self.doc_id}"
    
    def get_content(self, metadata_mode=None):
        if metadata_mode:
            return f"Content: {self.text}, Metadata Mode: {metadata_mode}"
        return self.text
    
    def as_related_node_info(self):
        return {
            "doc_id": self.doc_id,
            "text": self.text,
            "metadata": self.get_metadata_str()
        }
    
# Initialize an empty list to store PDF content
all_content = []

# Loop through each PDF and TXT file and extract content
for pdfs in pdf_reader.iter_data():
    for pdf in pdfs:
        pdf.text = pdf.text.upper()
        all_content.append(pdf)

# Loop through each TXT file and extract content
for txt_file in txt_files:
    with open(txt_file, 'r', encoding='utf-8') as file:
        txt_content = file.read()
        txt_doc = TxtDocument(text=txt_content.upper(), doc_id=str(txt_file))
        all_content.append(Document(text=txt_doc.get_content(), doc_id=txt_doc.get_doc_id()))


# Create a unified index
chroma_client = chromadb.EphemeralClient()

# Delete the existing collection if it exists
if "ollama" in chroma_client.list_collections():
    chroma_client.delete_collection("ollama")

#Create pipeline for indexing and retrieving documents using vector database
chroma_collection = chroma_client.create_collection("ollama")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
    all_content, 
    storage_context=storage_context, 
    embed_model=embed_model, 
    transformations=[SentenceSplitter(chunk_size=256, chunk_overlap=10)]
)

: 

In [5]:
# Load environment variables from .env file
load_dotenv(find_dotenv())

# Set Slack API credentials
SLACK_BOT_TOKEN = os.environ["SLACK_BOT_TOKEN"]
SLACK_SIGNING_SECRET = os.environ["SLACK_SIGNING_SECRET"]
SLACK_BOT_USER_ID = os.environ["SLACK_BOT_USER_ID"]

# Initialize the Slack app
app = App(token=SLACK_BOT_TOKEN)

# Initialize the Flask app
# Flask is a web application framework written in Python
flask_app = Flask(__name__)
handler = SlackRequestHandler(app)

In [6]:
#Create Prompt Template
template = (
    "Imagine you are a professor teaching a class on Artifical Intelligence and "
    "you answer a student's questions about what you have gone over in previous lectures."
    "Here is some context from the Aritifical Intelligence class: \n"
    "related to the query::\n"
    "-----------------------------------------\n"
    "{context_str}\n"
    "-----------------------------------------\n"
    "Considering the above information, "
    "please respond to the following inquiry:\n\n"
    "Question: {query_str}\n\n"
    "Answer succinctly and ensure your response is "
    "clear to someone without a deep understaind of Artifical Intelligence."
    "The professor's name is Ryan."
)
qa_template = PromptTemplate(template)

In [7]:
#Create a text-based Question-Answering task the responds with the top 3 most similar documents
query_engine = index.as_query_engine(text_qa_template=qa_template, similarity_top_k=3)

In [None]:
#Create a Slack WebClient for sending messages
@app.event("app_mention")
def handle_mentions(body, say):
    """
    Event listener for mentions in Slack.
    When the bot is mentioned, this function processes the text and sends a response.

    Args:@
        body (dict): The event data received from Slack.
        say (callable): A function for sending a response to the channel.
    """
    text = body["event"]["text"]

    mention = f"<@{SLACK_BOT_USER_ID}>"
    text = text.replace(mention, "").strip()

    say("Great question! Let me find the answer for you.")
    response = query_engine.query(text)
    say(response.response)


@flask_app.route("/slack/events", methods=["POST"])
def slack_events():
    """
    Route for handling Slack events.
    This function passes the incoming HTTP request to the SlackRequestHandler for processing.

    Returns:
        Response: The result of handling the request.
    """
    return handler.handle(request)


# Run the Flask app
if __name__ == "__main__":
    flask_app.run()