In [3]:
import warnings
from helper import load_env
from haystack import Pipeline
from haystack_integrations.components.embedders.cohere.text_embedder import CohereTextEmbedder
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
from haystack.components.builders import PromptBuilder
from haystack.components.generators import OpenAIGenerator
from haystack_integrations.document_stores.pinecone import PineconeDocumentStore
from haystack_integrations.components.retrievers.pinecone import PineconeEmbeddingRetriever
import jsonpickle
warnings.filterwarnings('ignore')
load_env()

document_store = PineconeDocumentStore(
		index="industries",
		namespace="Classification",
        dimension=1024,
        spec={"serverless": {"region": "us-east-1", "cloud": "aws"}},
)

prompt = """
Answer the question based on the provided context.
Context:
{% for doc in documents %}
   {{ doc.content }} 
{% endfor %}
Question: {{ query }}
"""

query_embedder = CohereTextEmbedder(model="embed-english-v3.0")
retriever = PineconeEmbeddingRetriever(document_store=document_store)
prompt_builder = PromptBuilder(template=prompt)
generator = OpenAIGenerator()

query_pipeline = Pipeline()
query_pipeline.add_component("query_embedder", query_embedder)
query_pipeline.add_component("retriever", retriever)
query_pipeline.add_component("prompt", prompt_builder)
query_pipeline.add_component("generator", generator)

query_pipeline.connect("query_embedder.embedding", "retriever.query_embedding")
query_pipeline.connect("retriever.documents", "prompt.documents")
query_pipeline.connect("prompt", "generator")
industry_info = "Marijuana Cultivation"
industry_question = "My company is a "+industry_info+" : What is my Industry, SubIndustry and SubIndustryId? Provide in the json format {\"industry\":\"\", \"subindustry\":\"\", \"subindustry_id\":\"\"}"

industries_result = query_pipeline.run(
    {
        "query_embedder": {"text": industry_question},
        "retriever": {"top_k": 1},
        "prompt": {"query": industry_question},
    }
)
industry = jsonpickle.decode(industries_result["generator"]["replies"][0])
subindustry = industry["subindustry"]
print(subindustry)
risks_question = f"What risks and mitigations associated with {subindustry}? Please provide response in a json array format."

risk_result = query_pipeline.run(
    {
        "query_embedder": {"text": risks_question},
        "retriever": {"top_k": 1},
        "prompt": {"query": risks_question},
    }
)
print(industries_result["generator"]["replies"][0])
print(risk_result["generator"]["replies"][0])

Farming & Fishing
{
  "industry": "Consumer Staples",
  "subindustry": "Farming & Fishing",
  "subindustry_id": "202010"
}
[
  {
    "risk": "Drought leading to crop failure and reduced yield.",
    "mitigation": "Invest in irrigation systems, Drought-resistant crops and crop insurance."
  },
  {
    "risk": "Flooding due to increased precipitation disrupting harvesting and sowing schedules.",
    "mitigation": "Investment in flood control infrastructure, Diversify crops and location of farms to areas less vulnerable to flooding."
  },
  {
    "risk": "Rising temperatures resulting in heat stress and sub-optimal growth conditions for crops.",
    "mitigation": "Use of heat-tolerant crops, Modify planting dates to avoid peak heat conditions."
  },
  {
    "risk": "Increased pests and weeds due to warmer climates disrupting crop health and yield.",
    "mitigation": "Invest in integrated pest management strategies, Use of resistant crop varieties."
  },
  {
    "risk": "Increased frequen