In [2]:
import os
import sys
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
keys_path = os.path.abspath('../../keys')
sys.path.append(keys_path)
from keys import LANGCHAIN_API_KEY, OPENAI_API_KEY
os.environ["LANGCHAIN_API_KEY"] = LANGCHAIN_API_KEY
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY

In [4]:
#### INDEXING ####

# Load blog
import bs4
from langchain_community.document_loaders import WebBaseLoader

urls = [
    "https://www.math-datascience.nat.fau.de/im-studium/masterstudiengaenge/master-data-science/",
    "https://www.fau.eu/studiengang/data-science-bsc/",
    "https://www.fau.eu/studiengang/data-science-msc/",
]

docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]

# Split
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=1000, 
    chunk_overlap=200)

doc_splits = text_splitter.split_documents(docs_list)

# Index
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import SKLearnVectorStore

vectorstore = SKLearnVectorStore.from_documents(documents=doc_splits, 
                                    embedding=OpenAIEmbeddings())

retriever = vectorstore.as_retriever()

In [5]:
(docs_list)

[Document(metadata={'source': 'https://www.math-datascience.nat.fau.de/im-studium/masterstudiengaenge/master-data-science/', 'title': 'Master Data Science - Lehreinheit Mathematik – Data Science', 'description': 'Please send only questions related to the field of Data Science or the structure of this study course to the student advisory. Questions on the application…', 'language': 'de-DE'}, page_content='\n\n\n\n\nMaster Data Science - Lehreinheit Mathematik – Data Science\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n \n\n\n\nNavigation überspringen\nZur Navigation\nZum Seitenende\n\n\n\n\n\n\n\nOrganisationsmenü öffnen\n\n \n\n\nOrganisationsmenü schließen\n\n\n\n\n\nFriedrich-Alexander-Universität LE Mathematik - Data Science \n\n\n\nFAUZur zentralen FAU Website\n\nFriedrich-Alexander-UniversitätNaturwissenschaftliche Fakultät\n\n\n\nGeben Sie hier den Suchbegriff ein, um in diesem Webauftritt zu suchen:\n\n\n Suche öffnen\n\n\n\n\n\n\nEnglish\n\n\

In [6]:
from langchain.prompts import ChatPromptTemplate

# Decomposition
template = """You are a helpful assistant that generates multiple sub-questions related to an input question. \n
The goal is to break down the input into a set of sub-problems / sub-questions that can be answers in isolation. \n
Generate multiple search queries related to: {question} \n
Output (3 queries):"""
prompt_decomposition = ChatPromptTemplate.from_template(template)


from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser

# LLM
llm = ChatOpenAI(temperature=0)

# Chain
generate_queries_decomposition = ( prompt_decomposition | llm | StrOutputParser() | (lambda x: x.split("\n")))

In [7]:
questions = [
    'What are the admission requirements of M.Sc. Data Science at FAU Erlangen?',
    'How can I apply for M.Sc. Data Science at FAU Erlangen?',
    'Is accommodation available at FAU Erlangen?',
    'How can I finance my studies at FAU Erlangen?',
    'What are the next steps if I get an admission at FAU Erlangen?',
    'What is the duration of M.Sc. Data Science degree program at FAU Erlangen?',
    'What is the teaching language of M.Sc. Data Science at FAU Erlangen?',
    'What is the structure of M.Sc. Data Science at FAU Erlangen?',
    'Is German required for M.Sc. Data Science at FAU Erlangen?',
    'What is the duration of B.Sc. Data Science degree program at FAU Erlangen?',
    'What is the teaching language of B.Sc. Data Science at FAU Erlangen?',
    'What is the structure of B.Sc. Data Science FAU Erlangen?',
    'What are the admission requirements of B.Sc. Data Science FAU Erlangen?'
]


In [8]:
### Answer recursively

In [9]:
answers = []
for question in questions:
    questions = generate_queries_decomposition.invoke({"question":question})

    # Prompt
    template = """Here is the question you need to answer:

    \n --- \n {question} \n --- \n

    Here is any available background question + answer pairs:

    \n --- \n {q_a_pairs} \n --- \n

    Here is additional context relevant to the question: 

    \n --- \n {context} \n --- \n

    Use the above context and any background question + answer pairs to answer the question: \n {question}
    """

    decomposition_prompt = ChatPromptTemplate.from_template(template)



    from operator import itemgetter
    from langchain_core.output_parsers import StrOutputParser

    def format_qa_pair(question, answer):
        """Format Q and A pair"""
        
        formatted_string = ""
        formatted_string += f"Question: {question}\nAnswer: {answer}\n\n"
        return formatted_string.strip()

    # llm
    llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

    q_a_pairs = ""
    for q in questions:
        
        rag_chain = (
        {"context": itemgetter("question") | retriever, 
        "question": itemgetter("question"),
        "q_a_pairs": itemgetter("q_a_pairs")} 
        | decomposition_prompt
        | llm
        | StrOutputParser())

        answer = rag_chain.invoke({"question":q,"q_a_pairs":q_a_pairs})
        q_a_pair = format_qa_pair(q,answer)
        q_a_pairs = q_a_pairs + "\n---\n"+  q_a_pair

    answers.append(answer)

In [10]:
answers

["Yes, international students can apply to the M.Sc. Data Science program at FAU Erlangen. In addition to the general requirements such as having a completed B.Sc. degree in a relevant field and a GPA of 2.5 or better with respect to the German grading system, international students need to fulfill English proficiency at level B2 CEFR or provide proof of six years of English classes at a German secondary school. However, applicants who completed their university entrance qualifications or first degree in English are not required to provide proof of English proficiency. The application process is performed online, and specific registration periods apply for the winter and summer intakes. Further information on the application process can be obtained by contacting the Master's Office at zuv-masterbuero@fau.de.",
 'Based on the provided context and background information, FAU Erlangen-Nürnberg does not have specific funding available to support international students with their living cos

In [11]:
len(answers)

13

In [12]:
### Answer individually

In [13]:
# Answer each sub-question individually 

from langchain import hub
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI

# RAG prompt
prompt_rag = hub.pull("rlm/rag-prompt")

def retrieve_and_rag(question,prompt_rag,sub_question_generator_chain):
    """RAG on each sub-question"""
    
    # Use our decomposition / 
    sub_questions = sub_question_generator_chain.invoke({"question":question})
    
    # Initialize a list to hold RAG chain results
    rag_results = []
    
    for sub_question in sub_questions:
        
        # Retrieve documents for each sub-question
        retrieved_docs = retriever.get_relevant_documents(sub_question)
        
        # Use retrieved documents and sub-question in RAG chain
        answer = (prompt_rag | llm | StrOutputParser()).invoke({"context": retrieved_docs, 
                                                                "question": sub_question})
        rag_results.append(answer)
    
    return rag_results,sub_questions

Please use the `langsmith sdk` instead:
  pip install langsmith
Use the `pull_prompt` method.
  res_dict = client.pull_repo(owner_repo_commit)


In [14]:
questions_all = [
    'What are the admission requirements of M.Sc. Data Science at FAU Erlangen?',
    'How can I apply for M.Sc. Data Science at FAU Erlangen?',
    'Is accommodation available at FAU Erlangen?',
    'How can I finance my studies at FAU Erlangen?',
    'What are the next steps if I get an admission at FAU Erlangen?',
    'What is the duration of M.Sc. Data Science degree program at FAU Erlangen?',
    'What is the teaching language of M.Sc. Data Science at FAU Erlangen?',
    'What is the structure of M.Sc. Data Science at FAU Erlangen?',
    'Is German required for M.Sc. Data Science at FAU Erlangen?',
    'What is the duration of B.Sc. Data Science degree program at FAU Erlangen?',
    'What is the teaching language of B.Sc. Data Science at FAU Erlangen?',
    'What is the structure of B.Sc. Data Science FAU Erlangen?',
    'What are the admission requirements of B.Sc. Data Science FAU Erlangen?'
]


In [15]:
answers_all = []
for question in questions_all:
    # Wrap the retrieval and RAG process in a RunnableLambda for integration into a chain
    answers, questions = retrieve_and_rag(question, prompt_rag, generate_queries_decomposition)


    def format_qa_pairs(questions, answers):
        """Format Q and A pairs"""
        
        formatted_string = ""
        for i, (question, answer) in enumerate(zip(questions, answers), start=1):
            formatted_string += f"Question {i}: {question}\nAnswer {i}: {answer}\n\n"
        return formatted_string.strip()

    context = format_qa_pairs(questions, answers)

    # Prompt
    template = """Here is a set of Q+A pairs:

    {context}

    Use these to synthesize an answer to the question: {question}
    """

    prompt = ChatPromptTemplate.from_template(template)

    final_rag_chain = (
        prompt
        | llm
        | StrOutputParser()
    )

    answer = final_rag_chain.invoke({"context":context,"question":question})
    answers_all.append(answer)


  warn_deprecated(


In [16]:
answers_all

['The admission requirements for the M.Sc. Data Science program at FAU Erlangen include a completed B.Sc. degree in Mathematics, Industrial Mathematics, Mathematical Economy, Computer Science, Data Science, or Physics. Applicants must have a Grade Point Average (GPA) of 2.5 or better with respect to the German grading system and English proficiency at level B2 CEFR. International students are welcome to apply, with specific application deadlines for the winter semester. Candidates with a GPA between 2.6 and 2.8 may be invited for a short online interview to assess their knowledge in calculus, linear algebra, algorithms, and data structures.',
 'To apply for the M.Sc. Data Science program at FAU Erlangen, you need to meet the admission requirements which include having a completed B.Sc. degree in specific fields, a GPA of 2.5 or better, and English proficiency at level B2 CEFR. The application process is online, with specific registration periods for the winter and summer intakes. Inter