In [13]:
import os 
from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv())
openai_api_key = os.environ["OPENAI_API_KEY"]
pinecone_api_key = os.environ["PINECONE_API_KEY"]
pinecone_env = os.environ["PINECONE_ENV"]

In [14]:
pwd

'C:\\Users\\ACER\\Desktop\\work_trial'

In [15]:
from pinecone import Pinecone
pc = Pinecone(api_key=pinecone_api_key,
             environment=pinecone_env)

In [16]:
# check if index already exists (it shouldn't if this is first time)
from pinecone import Pinecone, PodSpec
index_name= "pod-index"

if index_name not in pc.list_indexes().names():
    # if does not exist, create index
    pc.create_index(
        index_name,
        dimension=1536,  # dimensionality of text-embedding-ada-002
        metric='cosine',
        spec=PodSpec(
                environment="gcp-starter"
                )
    )
# connect to index
index = pc.Index(index_name)
# view index stats
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {},
 'total_vector_count': 0}

In [17]:
# from langchain.text_splitter import CharacterTextSplitter
# from langchain_community.document_loaders import TextLoader
# from langchain.text_splitter import TokenTextSplitter

# text_splitter = TokenTextSplitter(chunk_size=100, chunk_overlap=10)
# docs = text_splitter.split_text(data_)

In [18]:
import json 

file_path = "data\dummy.json"
with open(file_path, 'r') as file:
        data = json.load(file)

In [19]:
data

[{'Id': '4e9b7ecd-60ae-4b35-ae8c-22393abcd119',
  'title': 'Implement New Feature',
  'description': 'Develop the new search functionality using elastic search.',
  'assigned': 'John Doe',
  'progress': 'In Progress'},
 {'Id': '6a8dcef8-3dae-4567-ada4-1a2b3c4d5e6f',
  'title': 'Bug Fix on Homepage',
  'description': 'Address the layout breaking issue in the homepage when viewed on mobile devices.',
  'assigned': 'Emily Clark',
  'progress': 'Completed'},
 {'Id': '8bd5fcea-3c56-4771-ac41-54321fe98734',
  'title': 'Database Optimization',
  'description': 'Optimize the current database schema to improve performance.',
  'assigned': 'Alex Smith',
  'progress': 'Not Started'},
 {'Id': 'a49b8e7f-c1d2-460e-999d-54e345678bcf',
  'title': 'Implement New Feature',
  'description': 'Develop the new chat system for customer support.',
  'assigned': 'David Jones',
  'progress': 'In Progress'},
 {'Id': 'eaf7892b-b880-4b57-985c-2a3d4e5678f9',
  'title': 'Perform UAT',
  'description': 'Coordinate wi

In [20]:
from langchain.schema import Document

final_docs = []

for document in data:
    # page_content is a concatenation of description and title
    page_content = document["description"] + " " + document["title"]
    
    # Metadata should contain the rest, so we need to exclude description and title from the document dict
    metadata = {key: value for key, value in document.items() if key not in ['description', 'title']}
    
    final_doc = Document(page_content=page_content, metadata=metadata)
    final_docs.append(final_doc)

In [21]:
# To demonstrate it has worked as expected; let's print the result:
for _,doc in enumerate(final_docs):
    print("Page Content:", doc.page_content)
    print("Metadata:", doc.metadata)
    print("Id from Metadata:", doc.metadata["Id"])
    print()  # Just to add an empty line between documents for clarity
    if _ == 2:
        break

Page Content: Develop the new search functionality using elastic search. Implement New Feature
Metadata: {'Id': '4e9b7ecd-60ae-4b35-ae8c-22393abcd119', 'assigned': 'John Doe', 'progress': 'In Progress'}
Id from Metadata: 4e9b7ecd-60ae-4b35-ae8c-22393abcd119

Page Content: Address the layout breaking issue in the homepage when viewed on mobile devices. Bug Fix on Homepage
Metadata: {'Id': '6a8dcef8-3dae-4567-ada4-1a2b3c4d5e6f', 'assigned': 'Emily Clark', 'progress': 'Completed'}
Id from Metadata: 6a8dcef8-3dae-4567-ada4-1a2b3c4d5e6f

Page Content: Optimize the current database schema to improve performance. Database Optimization
Metadata: {'Id': '8bd5fcea-3c56-4771-ac41-54321fe98734', 'assigned': 'Alex Smith', 'progress': 'Not Started'}
Id from Metadata: 8bd5fcea-3c56-4771-ac41-54321fe98734



In [22]:
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()

In [25]:
from langchain_community.vectorstores import Pinecone as PineconeVectorstore
from langchain_openai import OpenAIEmbeddings

query = "elastic search"

docsearch = PineconeVectorstore.from_documents(final_docs, embeddings, index_name=index_name)
docs_ = docsearch.similarity_search_with_relevance_scores(query = query)


In [26]:
print(docs_)

[(Document(page_content='Develop the new search feature using ElasticSearch. Implement New Feature', metadata={'Id': 'd2f3g4h5-i6j7-k8l9-m1n2-o3p4q5r6s7t8', 'assigned': 'John Doe', 'progress': 'In Progress'}), 0.9209570585), (Document(page_content='Develop the new search functionality using elastic search. Implement New Feature', metadata={'Id': '4e9b7ecd-60ae-4b35-ae8c-22393abcd119', 'assigned': 'John Doe', 'progress': 'In Progress'}), 0.918690413), (Document(page_content='Review all indexes on the company database for their effectiveness. Database Optimization', metadata={'Id': '9f8d7c6e-ab7d-4e2c-8c34-56789f0ed3f2', 'assigned': 'Mia Wallace', 'progress': 'Not Started'}), 0.8908236625), (Document(page_content='Conduct SEO optimization to improve website ranking in search engines. SEO Optimization', metadata={'Id': 'm9n8b7v6-c5x4-z3a2-s1d9-f8g7h6j5k4l3', 'assigned': 'Felicity Smoak', 'progress': 'Completed'}), 0.890244037)]
