In [7]:
from configs import API_KEY, DEFAULT_MODEL
from langchain_openai.embeddings import OpenAIEmbeddings
import os
import openai
from langchain_community.vectorstores import Chroma
from langchain_core.documents import Document
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from langchain_core.prompts import PromptTemplate
from langchain_openai.chat_models import ChatOpenAI

In [2]:
os.environ["OPENAI_API_KEY"] = API_KEY
# openai.api_key = API_KEY
openai.api_key = os.getenv('OPENAI_API_KEY')

In [3]:
embedding = OpenAIEmbeddings(model='text-embedding-ada-002')
vectorstore_from_dir = Chroma(persist_directory = "./into-to-DS-vectorstore", embedding_function = embedding)

  vectorstore_from_dir = Chroma(persist_directory = "./into-to-DS-vectorstore", embedding_function = embedding)


In [4]:
len(vectorstore_from_dir)

20

In [5]:
retriever = vectorstore_from_dir.as_retriever(search_type = 'mmr',
                                             search_kwargs = {'k':3, 'lambda_mult':0.7})

In [6]:
TEMPLATE = """
Answer the following question:
{question}

To answer the question use only the following context
{context}

At the end of the response, specify the name of the lecture this context is taken from in the format:
Resourses: **Lecture Title**
where **Lecture Title** should be substituted with the title of the resource lecture.
"""

In [8]:
prompt_template = PromptTemplate.from_template(TEMPLATE)

In [10]:
chat = ChatOpenAI(model = 'gpt-4',
                 model_kwargs = {'seed': 365},
                 max_tokens = 250)

In [11]:
question = "What software do data scientists use?"

In [14]:
chain = RunnableParallel({'context': retriever,
        'question': RunnablePassthrough()})

In [15]:
chain.invoke(question)

{'context': [Document(metadata={'Course Title': 'Introduction to Data and Data Science', 'Lecture Title': 'Programming Languages & Software Employed in Data Science - All the Tools You Need'}, page_content='Alright! So… How are the techniques used in data, business intelligence, or predictive analytics applied in real life? Certainly, with the help of computers. You can basically split the relevant tools into two categories—programming languages and software. Knowing a programming language enables you to devise programs that can execute specific operations. Moreover, you can reuse these programs whenever you need to execute the same action. As you can see from the infographic, R, and Python are the'),
  Document(metadata={'Lecture Title': 'Programming Languages & Software Employed in Data Science - All the Tools You Need', 'Course Title': 'Introduction to Data and Data Science'}, page_content='to address the complexity of big data and its computational intensity. Most notably, Hadoop d

In [16]:
chain1 = {'context': retriever,
        'question': RunnablePassthrough()} | prompt_template

In [18]:
print(chain1.invoke(question))

text="\nAnswer the following question:\nWhat software do data scientists use?\n\nTo answer the question use only the following context\n[Document(metadata={'Lecture Title': 'Programming Languages & Software Employed in Data Science - All the Tools You Need', 'Course Title': 'Introduction to Data and Data Science'}, page_content='Alright! So… How are the techniques used in data, business intelligence, or predictive analytics applied in real life? Certainly, with the help of computers. You can basically split the relevant tools into two categories—programming languages and software. Knowing a programming language enables you to devise programs that can execute specific operations. Moreover, you can reuse these programs whenever you need to execute the same action. As you can see from the infographic, R, and Python are the'), Document(metadata={'Course Title': 'Introduction to Data and Data Science', 'Lecture Title': 'Programming Languages & Software Employed in Data Science - All the Too

In [19]:
chain2 = {'context': retriever,
        'question': RunnablePassthrough()} | prompt_template | chat

In [20]:
print(chain2.invoke(question))

content='Data scientists use a variety of programming languages and software to apply techniques in data, business intelligence, and predictive analytics. The programming languages frequently used include R and Python. For handling the complexity of big data and its computational intensity, Hadoop is typically used as it enables distribution of computational tasks on multiple computers. In the realm of business intelligence visualizations, top software tools include Power BI, SaS, Qlik, and especially Tableau. For predictive analytics, EViews is commonly used for working with econometric time-series models, while Stata is used for academic statistical and econometric research.  \n\nResources: **Programming Languages & Software Employed in Data Science - All the Tools You Need**' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 139, 'prompt_tokens': 464, 'total_tokens': 603, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_

In [21]:
print("Data scientists use a variety of programming languages and software to apply techniques in data, business intelligence, and predictive analytics. The programming languages frequently used include R and Python. For handling the complexity of big data and its computational intensity, Hadoop is typically used as it enables distribution of computational tasks on multiple computers. In the realm of business intelligence visualizations, top software tools include Power BI, SaS, Qlik, and especially Tableau. For predictive analytics, EViews is commonly used for working with econometric time-series models, while Stata is used for academic statistical and econometric research.  \n\nResources: **Programming Languages & Software Employed in Data Science - All the Tools You Need**")

Data scientists use a variety of programming languages and software to apply techniques in data, business intelligence, and predictive analytics. The programming languages frequently used include R and Python. For handling the complexity of big data and its computational intensity, Hadoop is typically used as it enables distribution of computational tasks on multiple computers. In the realm of business intelligence visualizations, top software tools include Power BI, SaS, Qlik, and especially Tableau. For predictive analytics, EViews is commonly used for working with econometric time-series models, while Stata is used for academic statistical and econometric research.  

Resources: **Programming Languages & Software Employed in Data Science - All the Tools You Need**


In [25]:
chain3 = ({'context': retriever,
        'question': RunnablePassthrough()} 
        | prompt_template 
        | chat 
        | StrOutputParser())

In [26]:
print(chain3.invoke(question))

Data scientists use a variety of programming languages and software to carry out their tasks. The programming languages most relevant to data science include R and Python. In terms of software, tools like Hadoop are employed to handle big data, using distributed computational tasks across multiple computers. For business intelligence visualizations, software like Power BI, SaS, Qlik, and especially Tableau are utilized. Predictive analytics largely rely on EViews for working with econometric time-series models, and Stata for academic statistical and econometric research.

Resources: **Programming Languages & Software Employed in Data Science - All the Tools You Need**


In [28]:
import langchain
print(langchain.__version__)

1.2.8
