# PairWise 
An AI platform that uses a semantic search engine to connect climate tech builders.

In [2]:
!pip install llama-index --upgrade
!pip install python-dotenv
!pip install llama-index-core --upgrade
!pip install llama-index-embeddings-openai --upgrade
!pip install pinecone-client
!pip install llama-index-vector-stores-pinecone

Collecting llama-index-core<0.12.0,>=0.11.14 (from llama-index)
  Using cached llama_index_core-0.11.14-py3-none-any.whl.metadata (2.4 kB)
Using cached llama_index_core-0.11.14-py3-none-any.whl (1.6 MB)
Installing collected packages: llama-index-core
  Attempting uninstall: llama-index-core
    Found existing installation: llama-index-core 0.10.68.post1
    Uninstalling llama-index-core-0.10.68.post1:
      Successfully uninstalled llama-index-core-0.10.68.post1
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
llama-index-vector-stores-pinecone 0.1.7 requires llama-index-core<0.11.0,>=0.10.11.post1, but you have llama-index-core 0.11.14 which is incompatible.
llama-index-readers-s3 0.1.8 requires llama-index-core<0.11.0,>=0.10.37.post1, but you have llama-index-core 0.11.14 which is incompatible.
llama-index-readers-s3 0.1.8 requires llama-index-readers-fil

In [5]:
import json 
import pprint
from llama_index.core import Document 
from llama_index.core.node_parser import JSONNodeParser
from llama_index.core import VectorStoreIndex
import nest_asyncio
from dotenv import load_dotenv
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import Settings
from llama_index.llms.openai import OpenAI

nest_asyncio.apply()  # Allow async calls
load_dotenv()

True

In [6]:
embed_model = OpenAIEmbedding(model="text-embedding-3-small")
llm = OpenAI(model="gpt-4-turbo")

# Global Embed Model
# Settings.llm = llm
# Settings.embed_model = embed_model
# Settings.chunk_size = 1024

In [7]:
data = None 

file_path = 'data.json'
with open (file_path, 'r') as f:
    data = json.load(f)

In [8]:
documents = [Document(text=json.dumps(item)) for item in data['responses']]

In [9]:
pprint.pprint(documents, width=10)

[Document(id_='65af1f5a-f3cf-46f4-9915-61e3a067e759', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='{"user_id": 1, "question": "What are you building?", "answer": "We are developing a cutting-edge energy-efficient cooling system for data centers that reduces power consumption by 40%, aiming to minimize the carbon footprint of large-scale computing infrastructure."}', mimetype='text/plain', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'),
 Document(id_='c92b5241-9d59-4ee6-b361-ba45dd043d6d', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='{"user_id": 1, "question": "Where are you located?", "answer": "Our main office is located in Amsterdam, Netherlands, with additional R&D facilities in Denmark."}', mimetype='text/plain', start_char_idx=None, end_char_

In [10]:
json_parser = JSONNodeParser(include_metadata=True,include_prev_next_rel=True)

nodes = json_parser.get_nodes_from_documents(documents)

In [11]:
pprint.pprint(nodes[1].get_content(metadata_mode='all'))

('user_id 1\n'
 'question Where are you located?\n'
 'answer Our main office is located in Amsterdam, Netherlands, with additional '
 'R&D facilities in Denmark.')


In [13]:
index = VectorStoreIndex(nodes=nodes)
query_engine = index.as_query_engine()

response = query_engine.query('Give me the name of all builders who are working on carbon software')

pprint.pprint(response)

Response(response='User_id 5',
         source_nodes=[NodeWithScore(node=TextNode(id_='2b39836c-098a-4e3c-b270-560ec2c4a7c9', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='da293f32-7223-4cd1-9d1c-0ac3c3bbb49a', node_type=<ObjectType.DOCUMENT: '4'>, metadata={}, hash='18bb4e318e1f1b847c6b04c394be6bcf84625e6693353277d82cb0c36e79809a')}, text='user_id 5\nquestion What are you building?\nanswer We’re building a blockchain-based carbon credit marketplace that allows businesses to trade verified carbon offsets with full transparency and traceability.', mimetype='text/plain', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'), score=0.8077735370662964),
                       NodeWithScore(node=TextNode(id_='313f5449-790c-49de-970e-caaf3333afc3', embedding=None, metadata={}, exclude