In [None]:
!pip install langchain-community

Successfully installed dataclasses-json-0.6.7 httpx-sse-0.4.0 langchain-0.3.13 langchain-community-0.3.13 langchain-core-0.3.28 marshmallow-3.23.2 mypy-extensions-1.0.0 pydantic-settings-2.7.0 python-dotenv-1.0.1 typing-inspect-0.9.0


In [None]:
pip install docarray

In [None]:
from langchain.chains import RetrievalQA
from langchain.document_loaders import CSVLoader
from langchain.vectorstores import DocArrayInMemorySearch
from IPython.display import display, Markdown
from langchain.llms import HuggingFaceHub
from google.colab import drive
from langchain.indexes import VectorstoreIndexCreator
from sentence_transformers import SentenceTransformer
from langchain.embeddings import HuggingFaceHubEmbeddings

import pandas as pd

In [None]:
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
file = '/content/drive/MyDrive/DSM COURSE/NOTEBOOK/G. LLMs/RAG/ai_job_market_insights.csv'
loader = CSVLoader(file_path=file)

In [None]:
# Use an open-source embedding model
api_token = "your token"

embeddings = HuggingFaceHubEmbeddings(
    repo_id="sentence-transformers/all-MiniLM-L6-v2",  # Embedding-compatible model
    huggingfacehub_api_token=api_token
)
# Create the index with embeddings
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,
    embedding=embeddings  # Specify the embedding model
).from_loaders([loader])

In [None]:
query ="Please list The Industry name \
in a table in markdown and summarize each one."

In [None]:
llm_replacement_model = HuggingFaceHub(
    repo_id='google/flan-t5-small',
    model_kwargs = {'temperature':0.0,'max_length':512},
    huggingfacehub_api_token=api_token
)

response = index.query(query,
                       llm = llm_replacement_model)

In [None]:
display(Markdown(response))

Business

# Step By Step

In [None]:
from langchain.document_loaders import CSVLoader
loader = CSVLoader(file_path=file))

In [None]:
docs = loader.load()

In [None]:
docs[0]

Document(metadata={'source': '/content/drive/MyDrive/DSM COURSE/NOTEBOOK/G. LLMs/RAG/ai_job_market_insights.csv', 'row': 0}, page_content='Job_Title: Cybersecurity Analyst\nIndustry: Entertainment\nCompany_Size: Small\nLocation: Dubai\nAI_Adoption_Level: Medium\nAutomation_Risk: High\nRequired_Skills: UX/UI Design\nSalary_USD: 111392.16524315962\nRemote_Friendly: Yes\nJob_Growth_Projection: Growth')

In [None]:
from langchain.embeddings import HuggingFaceHubEmbeddings
api_token = "your token"
embeddings = HuggingFaceHubEmbeddings(
    repo_id="sentence-transformers/all-MiniLM-L6-v2",
    huggingfacehub_api_token=api_token
)

In [None]:
embed = embeddings.embed_query("Hi my name is Ali")

In [None]:
print(len(embed))

384


In [None]:
print(embed[:5])

[-0.03770144283771515, 0.012181680649518967, -0.030671807006001472, 0.06867518275976181, -0.06839242577552795]


In [None]:
db = DocArrayInMemorySearch.from_documents(
    docs,
    embeddings
)

In [None]:
query = "Please suggest a job title with data"

In [None]:
docs = db.similarity_search(query)

In [None]:
len(docs)

4

In [None]:
docs[0]

Document(metadata={'source': '/content/drive/MyDrive/DSM COURSE/NOTEBOOK/G. LLMs/RAG/ai_job_market_insights.csv', 'row': 398}, page_content='Job_Title: Operations Manager\nIndustry: Education\nCompany_Size: Medium\nLocation: New York\nAI_Adoption_Level: Low\nAutomation_Risk: High\nRequired_Skills: Machine Learning\nSalary_USD: 121912.19646021897\nRemote_Friendly: No\nJob_Growth_Projection: Stable')

In [None]:
retriever = db.as_retriever()
llm = HuggingFaceHub(
    repo_id='google/flan-t5-small',
    model_kwargs = {'temperature':0.0,'max_length':512},
    huggingfacehub_api_token=api_token
)

In [None]:
qdocs = "".join([docs[i].page_content for i in range(len(docs))])

In [None]:
response = llm(f"{qdocs} Question: Please list all job title \
with Data in a table in markdown and summarize each one."))

  response = llm(f"{qdocs} Question: Please list all job title \


In [None]:
display(Markdown(response))

Job Title: Data Scientist Industry: Education Company

In [None]:
qa_stuff = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    verbose=True
)
query =  "Please list all job title \
with Data in a table in markdown and summarize each one."

In [None]:
response = qa_stuff.run(query)



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


In [None]:
display(Markdown(response))

Job title

In [None]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,
    embedding=embeddings,
).from_loaders([loader])

In [None]:
response = index.query(query, llm=llm)

# Clean Code

In [None]:
from langchain.document_loaders import CSVLoader
from langchain.embeddings import HuggingFaceHubEmbeddings
from langchain.vectorstores import DocArrayInMemorySearch
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFaceHub
from langchain.indexes import VectorstoreIndexCreator
from IPython.display import display, Markdown


# Hugging Face API Token
api_token = "your token"

# Load CSV Data
file_path = '/content/drive/MyDrive/DSM COURSE/NOTEBOOK/G. LLMs/RAG/ai_job_market_insights.csv'
loader = CSVLoader(file_path=file_path)
docs = loader.load()

# Use Hugging Face Hub for Embeddings
embeddings = HuggingFaceHubEmbeddings(
    repo_id="sentence-transformers/all-MiniLM-L6-v2",
    huggingfacehub_api_token=api_token
)

# Create a Document Database
db = DocArrayInMemorySearch.from_documents(docs, embeddings)
retriever = db.as_retriever()

# Initialize LLM from Hugging Face Hub
llm = HuggingFaceHub(
    repo_id="google/flan-t5-small",
    model_kwargs={"temperature": 0.0, "max_length": 512},
    huggingfacehub_api_token=api_token
)

# Create Retrieval QA Chain
qa_stuff = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    verbose=True
)

# Query Example
query = "Please list all job titles with 'Data' in a table in markdown and summarize each one."
response = qa_stuff.run(query)
display(Markdown(response))



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


Job Titles with 'Data' in a table in markdown

In [None]:
from langchain.document_loaders import CSVLoader
from langchain.embeddings import HuggingFaceHubEmbeddings
from langchain.vectorstores import DocArrayInMemorySearch
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFaceHub
from langchain.indexes import VectorstoreIndexCreator
from IPython.display import display, Markdown

# Hugging Face API Token
api_token = "your token"

# Load CSV Data
file_path = '/content/drive/MyDrive/DSM COURSE/NOTEBOOK/G. LLMs/RAG/ai_job_market_insights.csv'
loader = CSVLoader(file_path=file_path)
docs = loader.load()

# Use Hugging Face Hub for Embeddings
embeddings = HuggingFaceHubEmbeddings(
    repo_id="sentence-transformers/all-MiniLM-L6-v2",
    huggingfacehub_api_token=api_token
)

# Create a Vectorstore Index
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,
    embedding=embeddings
).from_loaders([loader])

# Initialize LLM from Hugging Face Hub
llm = HuggingFaceHub(
    repo_id="google/flan-t5-small",
    model_kwargs={"temperature": 0.0, "max_length": 512},
    huggingfacehub_api_token=api_token
)

# Query the Index
retriever = index.vectorstore.as_retriever()

# Create Retrieval QA Chain
qa_stuff = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    verbose=True
)

# Query Example
query = "Please list all job titles with 'Data' in a table in markdown and summarize each one."
response = qa_stuff.run(query)
display(Markdown(response))



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


Job Titles with 'Data' in a table in markdown