In [None]:
# os.environ["LANGCHAIN_TRACING_V2"] = "true"
# os.environ["LANGCHAIN_API_KEY"] = getpass.getpass()

In [4]:
from langchain_community.chat_message_histories import SQLChatMessageHistory

chat_message_history = SQLChatMessageHistory(
    session_id="test_session_id", connection="sqlite:///sqlite.db"
)

chat_message_history.add_user_message("Hello")
chat_message_history.add_ai_message("Hi")


In [4]:
chat_message_history.messages


[HumanMessage(content='Hello'),
 AIMessage(content='Hi'),
 HumanMessage(content='Hello'),
 AIMessage(content='Hi')]

In [6]:
from dotenv import load_dotenv  
import os

load_dotenv()

OPENAI_API_KEY=os.getenv("OPENAI_API_KEY")

In [6]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_openai import ChatOpenAI

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a helpful assistant."),
        MessagesPlaceholder(variable_name="history"),
        ("human", "{question}"),
    ]
)

In [7]:
chain = prompt | ChatOpenAI()

chain_with_history = RunnableWithMessageHistory(
    chain,
    lambda session_id: SQLChatMessageHistory(
        session_id=session_id, connection="sqlite:///sqlite.db"
    ),
    input_messages_key="question",
    history_messages_key="history",
)

In [8]:
config = {"configurable": {"session_id": "<SQL_SESSION_ID>"}}

chain_with_history.invoke({"question": "Whats my name"}, config=config)


Parent run 13c79427-d6c4-4a24-a00e-df6a48966c5b not found for run 7f456764-b535-49cc-88ad-7d3658aec0fb. Treating as a root run.


AIMessage(content='Your name is Bob. How can I assist you, Bob?', response_metadata={'token_usage': {'completion_tokens': 13, 'prompt_tokens': 102, 'total_tokens': 115}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-d8b1a216-e451-4e93-87e9-39716f7bc1e6-0', usage_metadata={'input_tokens': 102, 'output_tokens': 13, 'total_tokens': 115})

In [11]:
# This is where we configure the session id

chain_with_history.invoke({"question": "Hi! I'm bob"}, config=config)



Parent run 3bf189ff-0293-4958-8098-462fb6c7ac34 not found for run d5927bc0-b75f-47bf-b549-f53f3e51a11d. Treating as a root run.


AIMessage(content='Hello Bob! How can I assist you today?', response_metadata={'token_usage': {'completion_tokens': 10, 'prompt_tokens': 56, 'total_tokens': 66}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-59451bcc-b8ca-4bc9-aa70-37cace63979c-0', usage_metadata={'input_tokens': 56, 'output_tokens': 10, 'total_tokens': 66})

In [6]:
import os
import pandas as pd
import requests
from transformers import AutoTokenizer, AutoModel
import torch

# Initialize Pinecone and OctoAI clients
octoai_api_token = os.environ.get("OCTOAI_API_TOKEN")
pc_key = os.environ.get("pc_key")

# Load Atomic Habits keywords from the provided file
file_path = '/Users/clydeclarke/Documents/AH_router/data/ah_index.csv'
csv_path = "/Users/clydeclarke/Documents/AH_Code_Architecture/data/ah_index.csv"
index_data = pd.read_csv(csv_path)
index_data.columns = index_data.columns.str.strip()

# Extract concepts and words related to "Atomic Habits"
atomic_habits_keywords = index_data['concept'].tolist()

# Pre-defined personal keywords
personal_keywords = ["my", "I", "me", "mine", "family", "work", "personal", "health"]

# Load pre-trained model for embeddings
tokenizer = AutoTokenizer.from_pretrained("thenlper/gte-large")
model = AutoModel.from_pretrained("thenlper/gte-large")

# Function to classify query using keywords
def classify_query_keyword(query):
    if any(keyword in query for keyword in personal_keywords):
        return "personal"
    if any(keyword in query for keyword in atomic_habits_keywords):
        return "atomic_habits"
    return "unknown"

# Function to get embedding for a query
def get_embedding(query):
    inputs = tokenizer(query, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        embeddings = model(**inputs).last_hidden_state.mean(dim=1)
    return embeddings

# Function to classify query using embeddings
def classify_query_embedding(query):
    query_embedding = get_embedding(query)
    personal_embedding = get_embedding("This is a personal query.")
    atomic_habits_embedding = get_embedding("This is a query about the book Atomic Habits.")

    # Compute cosine similarity
    cosine_similarity = torch.nn.functional.cosine_similarity
    personal_similarity = cosine_similarity(query_embedding, personal_embedding)
    atomic_habits_similarity = cosine_similarity(query_embedding, atomic_habits_embedding)

    if personal_similarity > atomic_habits_similarity:
        return "personal"
    else:
        return "atomic_habits"

# Combined function to classify query
def classify_query(query):
    category = classify_query_keyword(query)
    if category == "unknown":
        category = classify_query_embedding(query)
    return category

# Example usage
query = input("Enter your query: ")
category = classify_query(query)
print(f"Query category: {category}")

# Further actions based on the classification
if category == "personal":
    print("Handling personal query...")
    # Handle personal query
elif category == "atomic_habits":
    print("Handling Atomic Habits query...")
    # Handle Atomic Habits query
else:
    print("Unknown category. Please clarify your query.")


Query category: personal
Handling personal query...


In [5]:
import os
import pandas as pd
import requests
from transformers import AutoTokenizer, AutoModel
import torch

# Initialize Pinecone and OctoAI clients
octoai_api_token = os.environ.get("OCTOAI_API_TOKEN")
pc_key = os.environ.get("pc_key")

# Load keywords from CSV file
def load_keywords_from_csv(file_path):
    df = pd.read_csv(file_path)
    keywords = df['keywords'].tolist()  # Assuming the CSV has a column named 'keywords'
    return keywords

# Load Atomic Habits keywords from the provided file
file_path = '/mnt/data/ah_index.csv'
atomic_habits_keywords = load_keywords_from_csv(file_path)

# Pre-defined personal keywords
personal_keywords = ["my", "I", "me", "mine", "family", "work", "personal", "health"]

# Load pre-trained model for embeddings
tokenizer = AutoTokenizer.from_pretrained("thenlper/gte-large")
model = AutoModel.from_pretrained("thenlper/gte-large")

# Function to classify query using enhanced keywords
def classify_query_keyword(query):
    # Check if query contains any atomic habits keywords
    for keyword in atomic_habits_keywords:
        if keyword.lower() in query.lower():
            return "atomic_habits"
    # Check if query contains any personal keywords
    for keyword in personal_keywords:
        if keyword.lower() in query.lower():
            return "personal"
    return "unknown"

# Function to get embedding for a query
def get_embedding(query):
    inputs = tokenizer(query, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        embeddings = model(**inputs).last_hidden_state.mean(dim=1)
    return embeddings

# Function to classify query using embeddings
def classify_query_embedding(query):
    query_embedding = get_embedding(query)
    personal_embedding = get_embedding("This is a personal query.")
    atomic_habits_embedding = get_embedding("This is a query about the book Atomic Habits.")

    # Compute cosine similarity
    cosine_similarity = torch.nn.functional.cosine_similarity
    personal_similarity = cosine_similarity(query_embedding, personal_embedding, dim=1)
    atomic_habits_similarity = cosine_similarity(query_embedding, atomic_habits_embedding, dim=1)

    if personal_similarity.item() > atomic_habits_similarity.item():
        return "personal"
    else:
        return "atomic_habits"

# Combined function to classify query
def classify_query(query):
    category = classify_query_keyword(query)
    if category == "unknown":
        category = classify_query_embedding(query)
    return category

# Example usage
query = input("Enter your query: ")
category = classify_query(query)
print(f"Query category: {category}")

# Further actions based on the classification
if category == "personal":
    print("Handling personal query...")
    # Handle personal query
elif category == "atomic_habits":
    print("Handling Atomic Habits query...")
    # Handle Atomic Habits query
else:
    print("Unknown category. Please clarify your query.")


'personal'

In [18]:
ah_keywords = [atomic_habits_keywords.rstrip() for atomic_habits_keywords in atomic_habits_keywords]

In [15]:
from pydantic import BaseModel, Field
from typing import Literal
from openai import OpenAI
import instructor
from typing import List

# Apply the patch to the OpenAI client
# enables response_model keyword
client = instructor.from_openai(OpenAI())


class ClassificationResponse(BaseModel):
    label: Literal["ATOMIC_HABITS", "PERSONAL"] = Field(
        ...,
        description="The predicted class label.",
    )


def classify(data: str, keywords: List['str']) -> ClassificationResponse:
    """Perform single-label classification on the input text."""
    return client.chat.completions.create(
        model="gpt-3.5-turbo",
        response_model=ClassificationResponse,
        messages=[{'role': 'system',
                   'content': f'Use these keywords to determine the appropriate classification if any of them match the data then the classification should be ATOMIC_HABITS{" ".join(keywords)}'
                   },
                    {
                        "role": "user",
                        "content": f"Classify the following text: {data}",
                    },
        ],
    )


In [14]:
classify('how can I improve my productivity at work?')

ClassificationResponse(label='PERSONAL')

In [12]:
import pandas as pd
csv_path = "/Users/clydeclarke/Documents/AH_Code_Architecture/data/ah_index.csv"
index_data = pd.read_csv(csv_path)
index_data.columns = index_data.columns.str.strip()

# Extract concepts and words related to "Atomic Habits"
atomic_habits_keywords = index_data['concept'].tolist()
[atomic_habits_keywords.rstrip() for atomic_habits_keywords in atomic_habits_keywords]

['abilities',
 'accountability',
 'none',
 'Person',
 'addiction',
 'addiction',
 'addiction',
 'addiction',
 'addiction',
 'addiction',
 'addiction',
 'addiction',
 'animal behavior',
 'animal behavior',
 'animal behavior',
 'animal behavior',
 'animal behavior',
 'animal behavior',
 'animal behavior',
 'athletes',
 'athletes',
 'athletes',
 'athletes',
 'athletes',
 'athletes',
 'atomic habits',
 'atomic habits',
 'atomic habits',
 'automating a habit',
 'automating a habit',
 'automating a habit',
 'automating a habit',
 'awareness',
 'awareness',
 'awareness',
 'bad habits',
 'bad habits',
 'behavior change',
 'behavior change',
 'behavior change',
 'behavior change',
 'behavior change',
 'behavior change',
 'biological considerations',
 'biological considerations',
 'biological considerations',
 'biological considerations',
 'the brain',
 'the brain',
 'the brain',
 'the brain',
 'the brain',
 'the brain',
 'the brain',
 'the brain',
 'the brain',
 'the brain',
 'breakthrough mome

In [26]:
import pandas as pd
file_path = '/Users/clydeclarke/Documents/AH_router/data/ah_qa.csv'
questions = pd.read_csv(file_path)
for index, row in questions.iterrows():
   print(f'Question: {row['query']} Classification: {classify(row['query'],ah_keywords)} Answer: {row["classification"]}')


Question: How can I improve my productivity at work? Classification: label='ATOMIC_HABITS' Answer: atomic_habits
Question: What are the key principles of Atomic Habits? Classification: label='ATOMIC_HABITS' Answer: atomic_habits
Question: Tell me about your family. Classification: label='PERSONAL' Answer: personal
Question: How do I build a good habit according to Atomic Habits? Classification: label='ATOMIC_HABITS' Answer: atomic_habits
Question: I'm struggling with staying motivated. Any advice? Classification: label='ATOMIC_HABITS' Answer: personal
Question: Can you summarize the book Atomic Habits? Classification: label='ATOMIC_HABITS' Answer: atomic_habits
Question: I want to track my habits more effectively. Classification: label='ATOMIC_HABITS' Answer: personal
Question: What is the Two-Minute Rule in Atomic Habits? Classification: label='ATOMIC_HABITS' Answer: atomic_habits
Question: How do I break a bad habit according to James Clear? Classification: label='ATOMIC_HABITS' Answ

In [9]:
from langchain.chains.conversation.base import ConversationChain
from langchain.memory import (
    CombinedMemory,
    ConversationBufferMemory,
    ConversationSummaryMemory,
)
from langchain_core.prompts import PromptTemplate
from langchain_openai import OpenAI

conv_memory = ConversationBufferMemory(
    memory_key="chat_history_lines", input_key="input"
)

summary_memory = ConversationSummaryMemory(llm=OpenAI(), input_key="input")
# Combined
memory = CombinedMemory(memories=[conv_memory, summary_memory])
_DEFAULT_TEMPLATE = """The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Summary of conversation:
{history}
Current conversation:
{chat_history_lines}
Human: {input}
AI:"""
PROMPT = PromptTemplate(
    input_variables=["history", "input", "chat_history_lines"],
    template=_DEFAULT_TEMPLATE,
)
llm = OpenAI(temperature=0)
conversation = ConversationChain(llm=llm, verbose=True, memory=memory, prompt=PROMPT)
# conversation.run("my name is clyde")
print(conversation)

memory=CombinedMemory(memories=[ConversationBufferMemory(input_key='input', memory_key='chat_history_lines'), ConversationSummaryMemory(llm=OpenAI(client=<openai.resources.completions.Completions object at 0x114527920>, async_client=<openai.resources.completions.AsyncCompletions object at 0x114518890>, openai_api_key=SecretStr('**********'), openai_proxy=''), input_key='input')]) verbose=True prompt=PromptTemplate(input_variables=['chat_history_lines', 'history', 'input'], template='The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.\n\nSummary of conversation:\n{history}\nCurrent conversation:\n{chat_history_lines}\nHuman: {input}\nAI:') llm=OpenAI(client=<openai.resources.completions.Completions object at 0x1145b1610>, async_client=<openai.resources.completions.AsyncCompletions object at 0x1145b3dd0>, temper

In [16]:
from raptor.FaissRetriever import FaissRetriever, FaissRetrieverConfig
from raptor.EmbeddingModels import OpenAIEmbeddingModel
import tiktoken

# Create the configuration
config = FaissRetrieverConfig(
    max_tokens=100,
    max_context_tokens=3500,
    use_top_k=False,
    embedding_model=OpenAIEmbeddingModel(),
    question_embedding_model=OpenAIEmbeddingModel(),
    top_k=5,
    tokenizer=tiktoken.get_encoding("cl100k_base"),
    embedding_model_string="OpenAI"
)

# Create the retriever instance
retriever = FaissRetriever(config)

# Load the document text from a file or any other source
with open("/Users/clydeclarke/Documents/AH_router/data/dune.txt", "r") as f:
    doc_text = f.read()

# Build the index from the document text
retriever.build_from_text(doc_text)

# Retrieve the context for a given query
query = "What is the meaning of life?"
context = retriever.retrieve(query)
print(context)


Building embeddings:   0%|          | 0/98 [00:00<?, ?it/s]


TypeError: cannot pickle '_thread.RLock' object

In [17]:
retriever

<raptor.FaissRetriever.FaissRetriever at 0x17351a330>

In [18]:
retriever.build_from_text("Hello")

Building embeddings:   0%|          | 0/1 [00:00<?, ?it/s]


TypeError: cannot pickle '_thread.RLock' object

In [82]:
import pinecone_rag
completion = pinecone_rag.client_openai.chat.completions.create(
    model="gpt-4o",  # Replace with the appropriate OpenAI model
    messages=[{"role":"system","content":'You are a useful assistant'},
              {"role":"user", "content":'What is your name'},],
    max_tokens=300,
    stream=True,
    presence_penalty=0,
    temperature=0.5,
    top_p=0.9,
)
print(completion)

<openai.Stream object at 0x11b33b260>


In [84]:
text = ""
i=0
for chunks in completion:
    i = i + 1
    mini_chunk = chunks.choices[0].delta.content
    print(mini_chunk)
    if mini_chunk is None:
        mini_chunk = ""
    text += mini_chunk

print(text)




In [81]:
from groq import Groq


''

In [52]:
chunks.choices[0].delta.content

In [24]:
import asyncio
async def main() -> None:
    stream = await pinecone_rag.client_openai.chat.completions.create(
        model="your_deployment_name",
        messages = [ {"role": "user", "content": "What is chatgpt?"} ],
        stream=True,
    )

    #async for data in stream:
    #    print(data.model_dump_json())
    #    print("test")

    async for choices in stream:
        print(choices.model_dump_json(indent=2))    
        print()



In [26]:
main()

<coroutine object main at 0x11aefe500>