# PairWise 
An AI platform that uses a semantic search engine to connect climate tech builders.

In [1]:
!pip install llama-index --upgrade
!pip install python-dotenv
!pip install llama-index-core --upgrade
!pip install llama-index-embeddings-openai --upgrade
!pip install pinecone-client
!pip install llama-index-vector-stores-pinecone

Collecting llama-index
  Downloading llama_index-0.11.16-py3-none-any.whl.metadata (11 kB)
Collecting llama-index-core<0.12.0,>=0.11.16 (from llama-index)
  Downloading llama_index_core-0.11.16-py3-none-any.whl.metadata (2.4 kB)
Collecting llama-index-llms-openai<0.3.0,>=0.2.10 (from llama-index)
  Downloading llama_index_llms_openai-0.2.11-py3-none-any.whl.metadata (649 bytes)
Downloading llama_index-0.11.16-py3-none-any.whl (6.8 kB)
Downloading llama_index_core-0.11.16-py3-none-any.whl (1.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m11.7 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading llama_index_llms_openai-0.2.11-py3-none-any.whl (12 kB)
Installing collected packages: llama-index-core, llama-index-llms-openai, llama-index
  Attempting uninstall: llama-index-core
    Found existing installation: llama-index-core 0.11.15
    Uninstalling llama-index-core-0.11.15:
      Successfully uninstalled llama-index-core-0.11.15
  Att

In [2]:
import json 
import pprint
from llama_index.core import Document 
from llama_index.core.node_parser import JSONNodeParser
from llama_index.core import VectorStoreIndex
import nest_asyncio
from dotenv import load_dotenv
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import Settings
from llama_index.llms.openai import OpenAI

nest_asyncio.apply()  # Allow async calls
load_dotenv()

[nltk_data] Downloading package punkt_tab to
[nltk_data]     /opt/anaconda3/lib/python3.11/site-
[nltk_data]     packages/llama_index/core/_static/nltk_cache...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [3]:
embed_model = OpenAIEmbedding(model="text-embedding-3-small")
llm = OpenAI(model="gpt-4-turbo")

# Global Embed Model
# Settings.llm = llm
# Settings.embed_model = embed_model
# Settings.chunk_size = 1024

In [4]:
data = None 

file_path = 'data.json'
with open (file_path, 'r') as f:
    data = json.load(f)

In [5]:
documents = [Document(text=json.dumps(item)) for item in data['responses']]

In [6]:
pprint.pprint(documents, width=10)

[Document(id_='ae7fa7e3-c329-4500-9fe6-bf827ebf73a5', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='{"user_id": 1, "question": "What are you building?", "answer": "We are developing a cutting-edge energy-efficient cooling system for data centers that reduces power consumption by 40%, aiming to minimize the carbon footprint of large-scale computing infrastructure."}', mimetype='text/plain', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'),
 Document(id_='d1ce2624-b74f-42bf-a676-bcf5124ed0e3', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='{"user_id": 1, "question": "Where are you located?", "answer": "Our main office is located in Amsterdam, Netherlands, with additional R&D facilities in Denmark."}', mimetype='text/plain', start_char_idx=None, end_char_

In [7]:
json_parser = JSONNodeParser(include_metadata=True,include_prev_next_rel=True)

nodes = json_parser.get_nodes_from_documents(documents)

In [8]:
pprint.pprint(nodes[1].get_content(metadata_mode='all'))

('user_id 1\n'
 'question Where are you located?\n'
 'answer Our main office is located in Amsterdam, Netherlands, with additional '
 'R&D facilities in Denmark.')


In [9]:
index = VectorStoreIndex(nodes=nodes)
query_engine = index.as_query_engine(
    similarity_top_k=5,
    verbose=True
)

response = query_engine.query('What builders are in SF working on climate risk management?')

pprint.pprint(response)

Response(response='Builders in San Francisco are working on climate risk '
                  'management by developing a blockchain-based carbon credit '
                  'marketplace, a cutting-edge energy-efficient cooling system '
                  'for data centers, a software platform using AI for waste '
                  'management optimization, and a hydrogen-powered '
                  'transportation network.',
         source_nodes=[NodeWithScore(node=TextNode(id_='5c889a24-4d46-4544-8d1a-8e83be2089d2', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='18da961f-eec3-4a75-b715-d9f10552f242', node_type=<ObjectType.DOCUMENT: '4'>, metadata={}, hash='18bb4e318e1f1b847c6b04c394be6bcf84625e6693353277d82cb0c36e79809a')}, text='user_id 5\nquestion What are you building?\nanswer We’re building a blockchain-based carbon credit marketplace that allows businesses to trade