In [1]:
from dotenv import load_dotenv,find_dotenv
load_dotenv(find_dotenv())

True

##### A. Load two vector stores data

In [2]:
#Get embeddings and Chroma to load the vector store
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma

In [3]:
embeddings_model = OpenAIEmbeddings()

In [4]:
vectorstore1 = Chroma(persist_directory='data/chroma/1Causal_Inference',embedding_function=embeddings_model)
vectorstore2 = Chroma(persist_directory='data/chroma/2Causal_Inference',embedding_function=embeddings_model)

##### B. Setup LLM

In [5]:
from langchain.chat_models import ChatOpenAI

In [6]:
llm = ChatOpenAI(temperature=0)

##### C. Extract relevant docs from a search query for a string many ways

In [7]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor

In [8]:
question = "What are the benefits of OLS regression with regards to casuality analysis?"

###### C1. Contextual Compression Retriever

In [9]:
compressor = LLMChainExtractor.from_llm(llm)

In [10]:
mydocs1 = ContextualCompressionRetriever(base_compressor=compressor, base_retriever=vectorstore1.as_retriever())
mydocs2 = ContextualCompressionRetriever(base_compressor=compressor, base_retriever=vectorstore2.as_retriever())

In [11]:
#Example of compression filtering internal docs
docs1 = mydocs1.get_relevant_documents(question)
docs2 = mydocs2.get_relevant_documents(question)



In [None]:
#print (docs1)

In [None]:
#print (docs2)

###### C2. MultiQuery Retriever

In [12]:
from langchain.retrievers.multi_query import MultiQueryRetriever

In [13]:
mq1 = MultiQueryRetriever.from_llm(
    retriever=vectorstore1.as_retriever(), llm=llm
)

mq2 = MultiQueryRetriever.from_llm(
    retriever=vectorstore2.as_retriever(), llm=llm
)

In [14]:
doc3 = mq1.get_relevant_documents(query=question)
len(doc3)

5

In [15]:
doc4 = mq2.get_relevant_documents(query=question)
len(doc4)

6

In [None]:
#print (doc3)

In [None]:
#print (doc4)

##### D. Setup Conversational Memory

In [16]:
from langchain.memory import ConversationBufferMemory


In [46]:
memory = ConversationBufferMemory(memory_key="chat_history")

In [47]:
memory.buffer

''

##### E. Simple Chain without memory

In [19]:
from langchain import LLMChain
from langchain.prompts.prompt import PromptTemplate

In [35]:
prompt_template = """
You are a specialized teacher chatbot with expertise in causal inference using Python. 
Your primary role is to guide students through interactive lessons, hands-on exercises, and provide thoughtful insights. 
You will help students understand the underlying principles, methodologies, and practical applications of causal inference.

Your responses should be clear, concise, and tailored to the students' level of understanding, encouraging 
them to think critically and engage with the material. 
Encourage questions and provide examples where necessary to illustrate complex ideas.

When explaining concepts, refer to the internal documents by citing page numbers or section headers as provided from the 
vector store. Do not reference any external links or sources outside the provided internal documents. 
Ensure you added page numbers at the end of your response without exception? Citing content is a must.

Answer this {question}
"""

In [36]:
llm_chain = LLMChain(llm=llm, prompt=PromptTemplate.from_template(prompt_template))

In [37]:
result = llm_chain.run(input_documents=docs1,question=question)

In [38]:
print(result)

OLS regression, or Ordinary Least Squares regression, has several benefits when it comes to causal analysis:

1. **Interpretability**: OLS regression provides easily interpretable coefficients that represent the causal effect of each independent variable on the dependent variable. These coefficients can be used to understand the direction and magnitude of the causal relationship.

2. **Control for confounding variables**: OLS regression allows for the inclusion of multiple independent variables in the model, which can help control for potential confounding variables. By including these variables, we can better isolate the causal effect of the variable of interest on the dependent variable.

3. **Quantification of causal effects**: OLS regression provides estimates of the causal effects of the independent variables on the dependent variable. These estimates can be used to quantify the magnitude of the causal relationship and make comparisons between different variables.

4. **Hypothesis

In [85]:
result = llm_chain.run(input_documents=docs2,question='How do I determine the Confounding variables in my OLS?')

In [86]:
print(result)

To determine the confounding variables in your Ordinary Least Squares (OLS) regression model, you need to consider the factors that may affect both the independent variable(s) and the dependent variable, leading to a potential bias in the estimated relationship.

One way to identify potential confounding variables is through a careful review of the literature and subject matter knowledge. Look for variables that are known to be associated with both the independent and dependent variables. These variables can act as confounders and should be included in your regression model to control for their influence.

Another approach is to use statistical techniques such as the "change-in-estimate" method. This involves running the regression model with and without a potential confounding variable and comparing the coefficients of the independent variables of interest. If the inclusion of the potential confounder causes a substantial change in the coefficients, it suggests that the variable is a 

In [83]:
result = llm_chain.run(input_documents=doc3,question='How do I determine the Confounding variables in my OLS?')

In [None]:
while True:
    query = input("\n Enter Your Query:")
    print(llm_chain.run({"question": query, 'input_documents':doc3}))


 Enter Your Query:what is the backdoor criteria?
The backdoor criteria is a set of conditions that need to be satisfied in order to identify a valid set of covariates for controlling confounding variables in causal inference. According to Pearl's book "Causality: Models, Reasoning, and Inference" (page 81), the backdoor criteria states that a set of variables Z satisfies the backdoor criterion relative to a causal effect of interest if it blocks all backdoor paths between the treatment variable and the outcome variable. A backdoor path is a path that starts from the treatment variable, intersects with the arrows pointing into the treatment variable, and ends at the outcome variable. By controlling for the variables in Z, we can eliminate the confounding bias and estimate the causal effect accurately.

 Enter Your Query:which page number is that from?
I apologize, but as a language model AI, I do not have access to specific page numbers or internal documents. However, I can provide inf

In [84]:
print(result)

To determine the confounding variables in your Ordinary Least Squares (OLS) regression model, you need to consider the factors that may affect both the independent variable(s) and the dependent variable, leading to a potential bias in the estimated relationship.

One way to identify potential confounding variables is through a careful review of the literature and subject matter knowledge. Look for variables that are known to be associated with both the independent and dependent variables. These variables can act as confounders and should be included in your regression model to control for their influence.

Another approach is to use statistical techniques such as the "change-in-estimate" method or the "backdoor criterion" to identify confounding variables. The change-in-estimate method involves adding or removing variables from the model and observing the impact on the coefficients of the independent variables of interest. If the coefficients change substantially, it suggests the prese

In [43]:
result = llm_chain.run(input_documents=doc4,question=question)

In [44]:
print(result)

OLS regression, or Ordinary Least Squares regression, has several benefits when it comes to causal analysis:

1. **Interpretability**: OLS regression provides easily interpretable coefficients that represent the causal effect of each independent variable on the dependent variable. These coefficients can be used to understand the direction and magnitude of the causal relationship.

2. **Control for confounding variables**: OLS regression allows for the inclusion of multiple independent variables in the model, which can help control for potential confounding variables. By including these variables, we can better isolate the causal effect of the variable of interest on the dependent variable.

3. **Quantification of causal effects**: OLS regression provides estimates of the causal effects of the independent variables on the dependent variable. These estimates can be used to quantify the magnitude of the causal relationship and make comparisons between different variables.

4. **Hypothesis

##### F. Conversational Agent

In [45]:
from langchain.agents import Tool
from langchain.agents import AgentType
from langchain.agents import initialize_agent
from langchain.agents import load_tools

list

In [74]:
#tools = load_tools(["llm-math"], llm=llm)
tools = [
    Tool(
        name="Specialized teacher chatbot",
        func=llm_chain.run(input_documents=doc4,question=question),
        description="useful for when you have a specific question around causal analysis, stats, machine learning, etc",
        return_direct=True,
    )
    
     ]


ValueError: Missing some input keys: {'question'}

In [None]:
agent_chain = initialize_agent(tools, 
                               llm,
                               agent=AgentType.CONVERSATIONAL_REACT_DESCRIPTION, 
                               verbose=True, 
                               memory=memory)

In [None]:
agent_chain.run(input="What is a good way to learn about OLS?",        input_documents=doc4,
        question=question)