In [2]:
#!pip install langchain langchain-openai

In [2]:
from open_ai_api_key import api_key

In [3]:
from langchain_openai import ChatOpenAI

# Simple chatbot using Openai and langchain

In [4]:
llm = ChatOpenAI(
    api_key = api_key
)

response = llm.invoke('Hello, How are you?')
print(response.content)

Hello! I'm just a computer program, so I don't have feelings, but I'm here to help you. How can I assist you today?


### Save api key in environment file

In [12]:
# pip install python-dotenv
from dotenv import load_dotenv
# OPENAI_API_KEY = 'your api key' in .env file

load_dotenv()
llm = ChatOpenAI()

response = llm.invoke('Hello, How are you?')
print(response.content)

Hello! I'm just a computer program, so I don't have feelings, but I'm here and ready to help. How can I assist you today?


### Set type of model, temperature and max-token for the output

In [13]:

from dotenv import load_dotenv

load_dotenv()
llm = ChatOpenAI(
    model = 'gpt-3.5-turbo', 
    temperature = 0, # 0:Factual response good for math, 1: for Creative answers good for poem, fiction writing
    max_tokens = 1000,
    verbose = True, # Debug the output from the model good for mode complex chains
)

response = llm.batch(['What is machine learning?', 'what is few-shot learning'])
print(response)

[AIMessage(content='Machine learning is a subset of artificial intelligence that involves the development of algorithms and statistical models that enable computers to learn from and make predictions or decisions based on data without being explicitly programmed. Machine learning algorithms use patterns and inference to make decisions and improve their performance over time as they are exposed to more data.', response_metadata={'token_usage': {'completion_tokens': 60, 'prompt_tokens': 12, 'total_tokens': 72}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-9cfe68df-d457-4dd2-b0ce-3e7f6f32807f-0', usage_metadata={'input_tokens': 12, 'output_tokens': 60, 'total_tokens': 72}), AIMessage(content='Few-shot learning is a machine learning technique that aims to train models with only a small amount of labeled data. This is in contrast to traditional machine learning approaches that require large amounts of labeled data to ach

In [85]:

from dotenv import load_dotenv

load_dotenv()
llm = ChatOpenAI(
    model = 'gpt-3.5-turbo', 
    temperature = 0, # 0:Factual response good for math, 1: for Creative answers good for poem, fiction writing
    max_tokens = 1000,
    verbose = True, # Debug the output from the model good for mode complex chains
)

response = llm.invoke('Give me a list of different learnings in AI. No need to explaain, just list the names like federated learning, few-shot learning, supervised learning.')
print(response)

content='1. Reinforcement learning\n2. Unsupervised learning\n3. Transfer learning\n4. Semi-supervised learning\n5. Self-supervised learning\n6. Generative adversarial networks (GANs)\n7. Deep learning\n8. Natural language processing (NLP)\n9. Computer vision\n10. Bayesian learning' response_metadata={'token_usage': {'completion_tokens': 66, 'prompt_tokens': 41, 'total_tokens': 107}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None} id='run-3474c183-201b-4a2c-a82a-677d98970de5-0' usage_metadata={'input_tokens': 41, 'output_tokens': 66, 'total_tokens': 107}


### Generating output in stream mode

In [86]:

from dotenv import load_dotenv

load_dotenv()
llm = ChatOpenAI(
    model = 'gpt-3.5-turbo', 
    temperature = 0, # 0:Factual response good for math, 1: for Creative answers good for poem, fiction writing
    max_tokens = 1000,
    verbose = True, # Debug the output from the model good for mode complex chains
)

response = llm.stream('Introduce different learnings we have in AI 300 words')
for res in response:
    print(res.content, end = '', flush = True)

Artificial Intelligence (AI) is a rapidly evolving field that encompasses a wide range of technologies and applications. Here are some of the key learnings we have gained in AI:

1. Machine Learning: Machine learning is a subset of AI that focuses on developing algorithms and models that can learn from data and make predictions or decisions without being explicitly programmed. Key learnings in machine learning include the importance of data quality, feature engineering, model selection, and hyperparameter tuning.

2. Deep Learning: Deep learning is a subset of machine learning that uses neural networks with multiple layers to learn complex patterns in data. Key learnings in deep learning include the importance of network architecture design, training strategies, regularization techniques, and interpretability of models.

3. Natural Language Processing (NLP): NLP is a branch of AI that focuses on enabling computers to understand, interpret, and generate human language. Key learnings in 

## Using Prompt templates to tell the model how to respond (Control conversation)

In [23]:
from langchain_core.prompts import ChatPromptTemplate
from dotenv import load_dotenv


load_dotenv()
llm = ChatOpenAI(
    model = 'gpt-3.5-turbo'
)

prompt = ChatPromptTemplate.from_template('Tell me about {input} in 300 words')

# Create llm chain
chain = prompt | llm

response = chain.invoke({'input': 'deep learning'})
print(response)

content='Deep learning is a subset of machine learning that aims to model high-level abstractions in data by using multiple layers of neural networks. It is a type of artificial intelligence that is inspired by the way the human brain works. Deep learning algorithms are designed to automatically learn and improve from experience without being explicitly programmed.\n\nOne of the key features of deep learning is its ability to automatically discover intricate patterns and relationships in data, which can be used to make predictions, classify images, understand speech, and more. This is achieved through the use of artificial neural networks, which are composed of interconnected nodes that process and analyze data in a hierarchical manner.\n\nDeep learning has been successfully applied to a wide range of applications, including computer vision, natural language processing, speech recognition, and healthcare. For example, deep learning algorithms have been used to develop self-driving cars

In [29]:
from langchain_core.prompts import ChatPromptTemplate
from dotenv import load_dotenv


load_dotenv()
llm = ChatOpenAI(
    model = 'gpt-3.5-turbo'
)

# we can say the model what is its role, what type of response we want, ...
prompt = ChatPromptTemplate.from_messages(
    [
    ("system", "You are a professor in machine learning and we want to design 3 multiple choice questions for a test."),
    ('human', '{input}')
    ])

# Create llm chain
chain = prompt | llm

response = chain.invoke({'input': 'deep learning'})
print(response)

content='1. What is deep learning?\na) A type of machine learning that uses neural networks with multiple layers\nb) A type of reinforcement learning algorithm\nc) A method for supervised learning only\nd) A technique for feature selection\n\n2. Which of the following is a common activation function used in deep learning?\na) Sigmoid\nb) Linear\nc) Mean Squared Error\nd) K-Means\n\n3. What is the purpose of using convolutional neural networks (CNNs) in deep learning?\na) They are used for natural language processing tasks\nb) They are specialized for image recognition tasks\nc) They are used for anomaly detection in time series data\nd) They are specifically designed for clustering problems' response_metadata={'token_usage': {'completion_tokens': 150, 'prompt_tokens': 34, 'total_tokens': 184}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None} id='run-947c2c21-9adc-46f1-ad63-cc2e00896949-0' usage_metadata={'input_tokens': 34, 'out

##### Get a list of desired outputs 

In [33]:
from langchain_core.prompts import ChatPromptTemplate
from dotenv import load_dotenv


load_dotenv()
llm = ChatOpenAI(
    model = 'gpt-3.5-turbo'
)

# we can say the model what is its role, what type of response we want, ...
prompt = ChatPromptTemplate.from_messages(
    [
    ("system", "Generate 10 collocations for the input word. Return the results as a comma separated list"),
    ('human', '{input}')
    ])

# Create llm chain
chain = prompt | llm

response = chain.invoke({'input': 'snow'})
print(response)

content='falling snow, deep snow, powdery snow, fresh snow, heavy snow, snow-covered mountains, snow-capped peaks, snowflakes, snowstorm, snowdrifts' response_metadata={'token_usage': {'completion_tokens': 38, 'prompt_tokens': 30, 'total_tokens': 68}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None} id='run-4fd01ed3-c23a-45bb-895f-a13b481391b4-0' usage_metadata={'input_tokens': 30, 'output_tokens': 38, 'total_tokens': 68}


In [34]:
print(type(response))

<class 'langchain_core.messages.ai.AIMessage'>


# Parsing outputs

#### Get the output as String

In [38]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from dotenv import load_dotenv


load_dotenv()
llm = ChatOpenAI(
    model = 'gpt-3.5-turbo'
)


def resultToStr():
    prompt = ChatPromptTemplate.from_messages(
        [
        ("system", "Generate 10 collocations for the input word. Return the results as a comma separated list"),
        ('human', '{input}')
        ])

    parser = StrOutputParser()
    # Create llm chain
    chain = prompt | llm | parser
    
    response = chain.invoke({'input': 'snow'})
    return response

print(resultToStr())

falling snow, deep snow, fresh snow, heavy snow, snow-covered mountains, snow-capped peaks, thick snow, powdery snow, snowflake, snowstorm


In [39]:
print(type(resultToStr()))

<class 'str'>


#### Get the output as List

In [41]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import CommaSeparatedListOutputParser
from dotenv import load_dotenv


load_dotenv()
llm = ChatOpenAI(
    model = 'gpt-3.5-turbo'
)


def resultToList():
    prompt = ChatPromptTemplate.from_messages(
        [
        ("system", "Generate 10 collocations for the input word. Return the results as a comma separated list"),
        ('human', '{input}')
        ])

    parser = CommaSeparatedListOutputParser()
    # Create llm chain
    chain = prompt | llm | parser
    
    response = chain.invoke({'input': 'snow'})
    return response

print(resultToList())

['falling snow', 'deep snow', 'fresh snow', 'heavy snow', 'powdery snow', 'snow covered', 'snow capped', 'snow drifts', 'snow flurries', 'melting snow']


#### Get the output as Json

In [44]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langchain.pydantic_v1 import BaseModel, Field
from dotenv import load_dotenv


load_dotenv()
llm = ChatOpenAI(
    model = 'gpt-3.5-turbo'
)


def resultToJson():
    prompt = ChatPromptTemplate.from_messages(
        [
        ("system", "Extract the information from the sentence based on the following format.\n Formating instruction: {format_instruction}"),
        ('human', '{input}')
        ])

    class Auto(BaseModel):
        model: str = Field(description = 'What is the model of the car?')
        year: int = Field(description = 'What is the year of the car?')
        reach_100: int = Field(description= "How long does it take to reach 100KM?")
    
    parser = JsonOutputParser(pydantic_object = Auto)
    
    # Create llm chain
    chain = prompt | llm | parser
    
    response = chain.invoke({
        'input': 'This 2017 Ford Mustang car is amazing with having V8 for the engine, and reaching to 100KM under 4 seconds', 
        "format_instruction": parser.get_format_instructions() # Generate instruction that model understands
    })
    return response

print(resultToJson())

{'model': 'Ford Mustang', 'year': 2017, 'reach_100': 4}


### Connecting LLM to Specific source using LangChain

In [None]:
https://python.langchain.com/v0.1/docs/modules/data_connection/

Csv, Pdf, Documents, Website, ...

##### Asking a question from the LLM

In [48]:
from langchain_core.prompts import ChatPromptTemplate
from dotenv import load_dotenv

load_dotenv()
llm_model = ChatOpenAI(
    model = 'gpt-3.5-turbo'
)

prompt = ChatPromptTemplate.from_template('''
        Answer the questions: Question: {input}
        ''')  
# Create llm chain
chain = prompt | llm 
    
response = chain.invoke({'input': 'What is the  minimum CRS score for the last draw on May 31?'})
print(response)
#https://www.cicnews.com/2024/07/express-entry-1390-candidates-invited-in-latest-pnp-only-draw-0745309.html#gs.bs9dnf

content='The minimum CRS score for the last draw on May 31 was 380.' response_metadata={'token_usage': {'completion_tokens': 16, 'prompt_tokens': 32, 'total_tokens': 48}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None} id='run-e513b377-97bd-442c-94df-5e5876bc204c-0' usage_metadata={'input_tokens': 32, 'output_tokens': 16, 'total_tokens': 48}


#### Give the model specific information

In [49]:
from langchain_core.prompts import ChatPromptTemplate
from dotenv import load_dotenv

load_dotenv()
llm_model = ChatOpenAI(
    model = 'gpt-3.5-turbo'
)

prompt = ChatPromptTemplate.from_template('''
        Answer the questions: 
        Context: Today's minimum CRS score represent's the lowest score of the year in a non-category based selection draw and is also lower than the last CEC-only draw on May 31 (522). So far this month IRCC has issued  17,361 ITAs over six seperate draws. Yesterday, IRCC invited 1,391 candidates in a Provincial Nominee Program (PNP)-only draw with a minimum CRS of 670. Before that, IRCC most recently invited 3,200 candidates in a category-based selected draw  for French proficiency on July 8.
        Question: {input}
        ''')  
# Create llm chain
chain = prompt | llm 
    
response = chain.invoke({'input': 'What is the  minimum CRS score for the last draw on May 31?'})
print(response)
#https://www.cicnews.com/2024/07/express-entry-1390-candidates-invited-in-latest-pnp-only-draw-0745309.html#gs.bs9dnf

content='The minimum CRS score for the last draw on May 31 was 522.' response_metadata={'token_usage': {'completion_tokens': 16, 'prompt_tokens': 153, 'total_tokens': 169}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None} id='run-4d746cdb-4afc-4c30-aa74-36ffd69fe2bb-0' usage_metadata={'input_tokens': 153, 'output_tokens': 16, 'total_tokens': 169}


#### Put the information in the document variable (document or documents)

In [56]:
from langchain_core.prompts import ChatPromptTemplate
from dotenv import load_dotenv
from langchain_core.documents import Document

doc1 = Document(
    page_content = '''
    Today's minimum CRS score represent's the lowest score of the year in a non-category based selection draw and is also lower than the last CEC-only draw on May 31 (522). So far this month IRCC has issued  17,361 ITAs over six seperate draws. Yesterday, IRCC invited 1,391 candidates in a Provincial Nominee Program (PNP)-only draw with a minimum CRS of 670. Before that, IRCC most recently invited 3,200 candidates in a category-based selected draw  for French proficiency on July 8.
    '''
)

load_dotenv()
llm_model = ChatOpenAI(
    model = 'gpt-3.5-turbo'
)

prompt = ChatPromptTemplate.from_template('''
        Answer the questions: 
        Context: {context}
        Question: {input}
        ''')  
# Create llm chain
chain = prompt | llm 
    
response = chain.invoke({
    'input': 'What is the  minimum CRS score for the last draw on May 31?',
    'context': [doc1]
})
print(response)
#https://www.cicnews.com/2024/07/express-entry-1390-candidates-invited-in-latest-pnp-only-draw-0745309.html#gs.bs9dnf

content='Answer: The minimum CRS score for the last draw on May 31 was 522.' response_metadata={'token_usage': {'completion_tokens': 18, 'prompt_tokens': 164, 'total_tokens': 182}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None} id='run-7bae5c31-dab2-4390-8cf8-3e71648fee19-0' usage_metadata={'input_tokens': 164, 'output_tokens': 18, 'total_tokens': 182}


#### Add class create_stuff_documents_chain for retrieval later

In [57]:
from langchain_core.prompts import ChatPromptTemplate
from dotenv import load_dotenv
from langchain_core.documents import Document
from langchain.chains.combine_documents import create_stuff_documents_chain

doc1 = Document(
    page_content = '''
    Today's minimum CRS score represent's the lowest score of the year in a non-category based selection draw and is also lower than the last CEC-only draw on May 31 (522). So far this month IRCC has issued  17,361 ITAs over six seperate draws. Yesterday, IRCC invited 1,391 candidates in a Provincial Nominee Program (PNP)-only draw with a minimum CRS of 670. Before that, IRCC most recently invited 3,200 candidates in a category-based selected draw  for French proficiency on July 8.
    '''
)

load_dotenv()
llm_model = ChatOpenAI(
    model = 'gpt-3.5-turbo'
)

prompt = ChatPromptTemplate.from_template('''
        Answer the questions: 
        Context: {context}
        Question: {input}
        ''')  
# Create llm chain
#chain = prompt | llm 
chain = create_stuff_documents_chain(
    llm = llm_model, 
    prompt = prompt
)

    
response = chain.invoke({
    'input': 'What is the  minimum CRS score for the last draw on May 31?',
    'context': [doc1]
})
print(response)
#https://www.cicnews.com/2024/07/express-entry-1390-candidates-invited-in-latest-pnp-only-draw-0745309.html#gs.bs9dnf

The minimum CRS score for the last draw on May 31 was 522.


#### Write a function to crawl the data

In [64]:
#pip install langchain_community

from langchain_core.prompts import ChatPromptTemplate
from dotenv import load_dotenv
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_community.document_loaders import WebBaseLoader


def crawl_data(url):
    loader = WebBaseLoader(url)
    docs = loader.load()
    return docs

print(crawl_data('https://www.cicnews.com/2024/07/express-entry-1390-candidates-invited-in-latest-pnp-only-draw-0745309.html#gs.bs9dnf'))



[Document(metadata={'source': 'https://www.cicnews.com/2024/07/express-entry-1390-candidates-invited-in-latest-pnp-only-draw-0745309.html#gs.bs9dnf', 'title': 'IRCC invites 6,300 Canadian Experience Class candidates in latest Express Entry draw | CIC News', 'language': 'en-US'}, page_content="\n\n\n\n\n\nIRCC invites 6,300 Canadian Experience Class candidates in latest Express Entry draw | CIC News\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nAbout us\n\n\n\n\nMeet our partners\n\n\n\n\nAdvertise with us \n\n\n\n\n\n\n\n\n\n\n\n\n\nSearch\n\n\n\n\n\n\n\n\n\n\nMenu\n\n\nClose\n\n\n\n\n\n\nSubscribe\n\n\n\n\n\n\n\n\nHome\nExpress Entry\nCanada\nProvinces\nFamily Sponsorship\nStudy\nWork\nVisit\nCitizenship\nLife in Canada\n \n\n\n\n\n\n\n\nIRCC invites 6,300 Canadian Experience Class candidates in latest Express Entry draw\n\n\n\n \nEdana Robitaille\n\n\nUpdated: J

#### Feed the crawl data into the LLM Model

In [65]:

#pip install langchain_community

from langchain_core.prompts import ChatPromptTemplate
from dotenv import load_dotenv
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_community.document_loaders import WebBaseLoader


def crawl_data(url):
    loader = WebBaseLoader(url)
    docs = loader.load()
    return docs

docs = crawl_data('https://www.cicnews.com/2024/07/express-entry-1390-candidates-invited-in-latest-pnp-only-draw-0745309.html#gs.bs9dnf')

load_dotenv()
llm_model = ChatOpenAI(
    model = 'gpt-3.5-turbo'
)

prompt = ChatPromptTemplate.from_template('''
        Answer the questions: 
        Context: {context}
        Question: {input}
        ''')  

# Create llm chain
chain = create_stuff_documents_chain(
    llm = llm_model, 
    prompt = prompt
)

response = chain.invoke({
    'input': 'What is the  minimum CRS score for the last draw on May 31?',
    'context': docs
})
print(response)


The minimum CRS score for the last draw on May 31 was 522.


### Split the the whole doc to chunks

In [66]:
# Just take the notes that is import important chunk

In [72]:

#pip install langchain_community

from langchain_core.prompts import ChatPromptTemplate
from dotenv import load_dotenv
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

def crawl_data(url):
    loader = WebBaseLoader(url)
    docs = loader.load()
    
    splitter = RecursiveCharacterTextSplitter(
        chunk_size = 200, 
        chunk_overlap = 20
    )
    split_docs = splitter.split_documents(docs)
    print('split_docs:', len(split_docs))
    return split_docs

docs = crawl_data('https://www.cicnews.com/2024/07/express-entry-1390-candidates-invited-in-latest-pnp-only-draw-0745309.html#gs.bs9dnf')

load_dotenv()
llm_model = ChatOpenAI(
    model = 'gpt-3.5-turbo'
)

prompt = ChatPromptTemplate.from_template('''
        Answer the questions: 
        Context: {context}
        Question: {input}
        ''')  

# Create llm chain
chain = create_stuff_documents_chain(
    llm = llm_model, 
    prompt = prompt
)

response = chain.invoke({
    'input': 'What is the  minimum CRS score for the last draw on May 31?',
    'context': docs
})
print(response)


split_docs: 57
The minimum CRS score for the last draw on May 31 was 522.


In [None]:
### Converting chucnks to embedding, and store as vectors

In [None]:
Retrival only releavent parts

In [77]:
# pip install faiss-cpu
from langchain_core.prompts import ChatPromptTemplate
from dotenv import load_dotenv
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores.faiss import FAISS

def crawl_data(url):
    loader = WebBaseLoader(url)
    docs = loader.load()
    
    splitter = RecursiveCharacterTextSplitter(
        chunk_size = 200, 
        chunk_overlap = 20)
    split_docs = splitter.split_documents(docs)
    return split_docs

def vectorization(docs):
    embedding = OpenAIEmbeddings()

    #FAISS (Facebook AI Similarity Search) index for efficient similarity search.
    vectorStore = FAISS.from_documents(docs, embedding = embedding)
    return vectorStore




The minimum CRS score for the last draw on May 31 was 522.


#### Create the chain as function

In [81]:
# pip install faiss-cpu
from langchain_core.prompts import ChatPromptTemplate
from dotenv import load_dotenv
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores.faiss import FAISS
from langchain.chains import create_retrieval_chain

def crawl_data(url):
    loader = WebBaseLoader(url)
    docs = loader.load()
    
    splitter = RecursiveCharacterTextSplitter(
        chunk_size = 200, 
        chunk_overlap = 20)
    split_docs = splitter.split_documents(docs)
    return split_docs

def vectorization(docs):
    embedding = OpenAIEmbeddings()
    vectorStore = FAISS.from_documents(docs, embedding = embedding)
    return vectorStore

def create_chain(vectorStore):
    llm_model = ChatOpenAI(
    model = 'gpt-3.5-turbo')

    prompt = ChatPromptTemplate.from_template('''
            Answer the questions: 
            Context: {context}
            Question: {input}
            ''')  
    
    # Create llm chain
    chain = create_stuff_documents_chain(
        llm = llm_model, 
        prompt = prompt
    )
    
    #A retrieval-augmented generation (RAG) pipeline, which combines a language model with a vector store 
    #to answer questions based on the context retrieved from the vector store.
    retriever = vectorStore.as_retriever() # give 5 best results  or you can use as_retriever(search_kwargs={"k": 1})
    retriever_chain = create_retrieval_chain(retriever, chain)

    return retriever_chain
    


docs = crawl_data('https://www.cicnews.com/2024/07/express-entry-1390-candidates-invited-in-latest-pnp-only-draw-0745309.html#gs.bs9dnf')
vectorStore = vectorization(docs)
chain = create_chain(vectorStore)

load_dotenv()

response = chain.invoke({
    'input': 'What is the  minimum CRS score for the last draw on May 31?',
    'context': docs
})
print(response)


{'input': 'What is the  minimum CRS score for the last draw on May 31?', 'context': [Document(metadata={'source': 'https://www.cicnews.com/2024/07/express-entry-1390-candidates-invited-in-latest-pnp-only-draw-0745309.html#gs.bs9dnf', 'title': 'IRCC invites 6,300 Canadian Experience Class candidates in latest Express Entry draw | CIC News', 'language': 'en-US'}, page_content="Today's minimum CRS score represent's the lowest score of the year in a non-category based selection draw and is also lower than the last CEC-only draw on May 31 (522)."), Document(metadata={'source': 'https://www.cicnews.com/2024/07/express-entry-1390-candidates-invited-in-latest-pnp-only-draw-0745309.html#gs.bs9dnf', 'title': 'IRCC invites 6,300 Canadian Experience Class candidates in latest Express Entry draw | CIC News', 'language': 'en-US'}, page_content='The department issued 6,300 ITAs in a draw for Canadian Experience Class (CEC) candidates.\nCandidates required a minimum Comprehensive Ranking System (CRS) 

In [82]:
print(response['answer'])

The minimum CRS score for the last draw on May 31 was 522.
