In [None]:
import numpy as np
import pandas as pd

from src.chatbot.prompt_templates import default_prompt_template
from src.chatbot.base import ChatbotBase
from src.chatbot.config import ChatbotConfig
import os
from src.vectordb.gcp_vector_search.transform_and_load import single_text_embedding
import pandas as pd

data = pd.read_json("chunks_working_site_lease.json", lines=True)

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "prj-ilios-ai.json"
os.environ["PROJECT_ID"] = "602280418311"
os.environ["LOCATION"] = "us-west1"
os.environ['GCS_BUCKET']="doc_ai_storage"

In [None]:
chatbot = ChatbotBase(config=ChatbotConfig(vector_store_id='projects/602280418311/locations/us-west1/indexEndpoints/2259901015358701568',
                                           index_id='projects/602280418311/locations/us-west1/indexes/507481790823268352',
                                           prompt_template=default_prompt_template,
                                           max_documents=5), data=data)

In [None]:
chatbot.invoke(prompt_input={'question': "Who is the tenant of the site lease?"})

In [None]:
from google.cloud import aiplatform_v1

# Set variables for the current deployed index.
API_ENDPOINT="186034444.us-west1-602280418311.vdb.vertexai.goog"
INDEX_ENDPOINT="projects/602280418311/locations/us-west1/indexEndpoints/7273111088063840256"
DEPLOYED_INDEX_ID="chatbot_docs_1712048074931"

# Configure Vector Search client
client_options = {
    "api_endpoint": API_ENDPOINT
}
vector_search_client = aiplatform_v1.MatchServiceClient(
    client_options=client_options,
)
feature_vector = single_text_embedding("Who is the tenant of the site lease?")
# Build FindNeighborsRequest object
datapoint = aiplatform_v1.IndexDatapoint(
    feature_vector=feature_vector,
)
query = aiplatform_v1.FindNeighborsRequest.Query(
    datapoint=datapoint,
    # The number of nearest neighbors to be retrieved
    neighbor_count=10
)
request = aiplatform_v1.FindNeighborsRequest(
    index_endpoint=INDEX_ENDPOINT,
    deployed_index_id=DEPLOYED_INDEX_ID,
    # Request can have multiple queries
    queries=[query],
    return_full_datapoint=False,
)

# Execute the request
response = vector_search_client.find_neighbors(request)

# Handle the response
print(response)

In [None]:
from google.cloud import aiplatform
from typing import List


def vector_search_find_neighbors(
        project: str,
        location: str,
        index_endpoint_name: str,
        deployed_index_id: str,
        queries: List[List[float]],
        num_neighbors: int,
) -> None:
    """Query the vector search index.

    Args:
        project (str): Required. Project ID
        location (str): Required. The region name
        index_endpoint_name (str): Required. Index endpoint to run the query
        against.
        deployed_index_id (str): Required. The ID of the DeployedIndex to run
        the queries against.
        queries (List[List[float]]): Required. A list of queries. Each query is
        a list of floats, representing a single embedding.
        num_neighbors (int): Required. The number of neighbors to return.
    """
    # Initialize the Vertex AI client
    aiplatform.init(project=project, location=location)

    # Create the index endpoint instance from an existing endpoint.
    my_index_endpoint = aiplatform.MatchingEngineIndexEndpoint(
        index_endpoint_name=index_endpoint_name
    )

    # Query the index endpoint for the nearest neighbors.
    resp = my_index_endpoint.find_neighbors(
        deployed_index_id=deployed_index_id,
        queries=queries,
        num_neighbors=num_neighbors,
        fraction_leaf_nodes_to_search_override=1,
    )
    print(resp)

In [None]:
vector_search_find_neighbors(
    project="602280418311",
    location="us-west1",
    index_endpoint_name="projects/602280418311/locations/us-west1/indexEndpoints/7273111088063840256",
    deployed_index_id="chatbot_docs_1712048074931",
    queries=[embed],
    num_neighbors=10,
)

In [None]:
embed_3 = single_text_embedding("""(c) Tenant's insurance policies required by this Lease but excluding any insurance on\nTenant's Property shall: (i) be issued by insurance companies licensed to do business in North Carolina\nwith a general policyholder's ratings of at least A- and a financial rating of at least VI in the most current\nBest's Insurance Reports available on the Commencement Date; (ii) name Landlord as additional insured\nas its interest may appear; (iii) provide that the insurance not be canceled, non-renewed or coverage\nmaterially reduced unless thirty (30) days advance notice is given to Landlord; (iv) be non-assessable\nprimary policies, and non-contributing with any insurance that Landlord may carry; (v) provide that any\nloss shall be payable notwithstanding any negligence of Landlord or Tenant which might result in a\nforfeiture of such insurance or the amount of proceeds payable; and (vi) have no deductible exceeding\nTen Thousand Dollars ($10,000.00), unless approved in writing by Landlord""")

In [None]:
embed_4 = single_text_embedding("What is the tenant of the site lease?")

In [None]:
embed_2 = [-0.0034991091,-0.0233696476,-0.0421388261,0.0477666631,0.0060913651,-0.0253267996,0.0020537516,-0.0051893047,-0.0310597811,0.0118867457,0.0366741046,-0.0697392002,0.0248793066,0.0352613628,-0.0012418637,-0.0424266644,-0.0196955726,-0.0558928065,0.0232175924,0.0315387584,-0.0428622328,0.0143539403,0.0146647058,0.0429491922,-0.0214416981,-0.0734510869,0.0182244871,-0.0084116422,-0.0483804457,0.0412532687,-0.0087221917,-0.0217027552,-0.0143707637,0.002418027,0.0257341936,0.0015057245,-0.0161259416,0.0145180039,-0.0533782132,0.0425810516,-0.0075381696,-0.0155714676,0.0123976478,0.026758844,-0.0393180363,-0.0198563766,-0.0132647976,0.0254979432,0.0327559449,-0.0088870469,-0.080253154,-0.0136162359,-0.0147633003,0.035525158,0.028825799,0.0244631842,-0.033180628,-0.0540859886,0.0427035354,0.0048515755,-0.0054342211,-0.0202230625,-0.036069721,-0.021036271,0.0021942558,-0.0136536323,-0.000002567,-0.003287477,-0.0755686089,-0.008394354,-0.0120300148,0.0029070613,-0.0343150795,-0.0353784487,0.0692424253,-0.0116633866,0.0025846949,-0.0093311965,0.0587918051,-0.0411519147,-0.0069361888,-0.0530003682,-0.050555218,-0.0876256302,-0.0063042287,0.0577879846,-0.0585103966,0.0165464133,-0.0126776723,0.0045538112,-0.0617779903,0.0406784192,0.0401356556,0.01911732,-0.0666031912,0.0445138887,-0.0321518555,0.001278328,0.0200167224,0.0081635974,0.0022405719,-0.067061469,0.0480189882,0.0162029881,0.0477041677,-0.0259232298,0.0141522288,0.0198953301,0.0241169259,-0.041649539,-0.1070413738,0.0221252404,0.01084683,0.0489420369,-0.0223618429,-0.0073282439,0.0120398048,-0.0110956617,0.0323402472,0.0084986594,-0.0412436575,0.0064984532,0.044170171,0.0374077559,-0.0414552763,-0.0011167346,0.0221918412,0.0010448323,0.0226362702,0.0233472362,0.037012808,-0.0001528977,0.0438031107,-0.0491535701,0.0353979841,0.0234732125,0.0186430272,0.026288148,0.0424710847,-0.0183439348,-0.0508230403,-0.0260541197,-0.0219089966,0.0182208735,-0.0411196202,-0.0298320893,0.0486889444,-0.0304209013,-0.0505178422,0.0027738325,-0.0116670467,0.0198304765,-0.0300189201,0.0653778389,0.0450519808,0.043645788,0.0893565714,0.0621756501,-0.0344981141,-0.0393007696,-0.0062149665,-0.0001165537,-0.016449675,0.0125021767,0.0370618254,-0.0334205069,0.0249869227,-0.0109017454,-0.0200346354,0.0240227487,0.0435083732,-0.1394375712,0.0072530117,-0.0532597825,0.0706319362,-0.0074917222,-0.0101934876,0.0632281974,-0.0033826921,-0.0303224679,-0.0439217016,-0.0369939581,0.0054521379,0.0742033347,-0.0418813042,-0.0432410464,-0.0223675258,-0.0137479482,-0.0742736235,0.0748964772,0.0093003688,0.0046690125,0.0379185975,-0.0747463852,0.006181635,-0.0014268381,0.0228775889,-0.1854632646,-0.0018218627,-0.0064593982,-0.0301949847,-0.0349941552,-0.0231931508,0.0514951348,-0.0287035983,0.0155781731,-0.0187427271,-0.0169345848,0.0081800809,0.0145604536,0.0000600662,0.0159894284,0.0400648527,-0.0187256131,0.0409999639,-0.0570710637,-0.0032163728,-0.0833755806,-0.0320566036,0.0346993618,-0.0030746926,-0.0098449737,0.0514405593,0.0467943028,-0.0361259133,0.0237280764,0.0229929406,0.0251646098,-0.0013241556,0.0388376042,-0.0342621431,0.0089106038,-0.0659839138,-0.0295011532,0.0013180065,0.0026051369,-0.00364917,0.0110450424,-0.0631811246,-0.0234921724,-0.0014707254,0.0470047891,-0.0189223345,0.0201312229,0.0088342363,-0.0013799983,-0.0264693107,0.0619596168,0.0803109705,-0.0186870303,0.0705621913,-0.0036270358,0.0084214499,0.030841453,0.0189320184,0.0177668594,0.0351519249,0.0286031999,-0.0249742027,0.0632963628,-0.0020294813,0.0153411934,-0.0147371748,0.1069966704,-0.0110974954,-0.0258372705,-0.0947205722,-0.0193906426,0.0786093399,-0.057762716,0.005120276,-0.0008847501,0.0407751054,0.0541244075,-0.0191828646,0.0034632073,-0.0203118008,0.1123542041,0.0284458417,0.0687897578,-0.0054487186,-0.0424598679,-0.0408523716,-0.009846339,0.0165546946,0.0148603739,-0.0065665985,0.0284096934,-0.0017131795,-0.0128219044,0.0327752531,0.0055290121,-0.0172687285,0.0419230051,0.0071519744,0.0103470664,-0.0085625276,-0.002445301,-0.0574044548,0.007001535,-0.0374275297,-0.0565852486,0.0133862579,-0.0539768301,-0.0389995314,-0.0015421996,-0.0153392525,0.0087914597,0.0084358631,-0.0150694391,-0.0255121849,-0.0067493077,0.0195810199,0.0035526161,-0.0566508807,-0.0034884422,0.0076577081,-0.0019349164,-0.0933915973,0.0107047651,-0.0115160262,0.0153652178,0.0554238781,0.0353837609,-0.0210676547,-0.027215397,0.0106104482,-0.0032412542,0.0251307301,-0.0470810868,-0.0187066048,0.0101299705,-0.0635551363,0.006672801,-0.0267648622,0.0115926927,0.0128784981,0.0296223201,-0.0115370322,-0.0460843332,-0.0238200836,-0.0298679732,0.0000639048,-0.0325455926,-0.0113404067,0.0134047652,-0.0230354313,-0.0025871131,-0.0523332581,-0.0627388731,-0.0212673359,-0.0142941326,0.0307045914,0.0186006594,0.0553842634,0.0215871539,-0.0209571514,0.0197032969,0.0057999557,0.0092042582,-0.0579000525,0.016980093,0.0237500239,0.044862207,-0.0084266281,0.0135671552,-0.0614716336,0.0155199831,-0.0146702314,-0.0550364293,-0.0015922604,0.0014296541,-0.0143972309,-0.0090730153,-0.0830132365,0.0042938651,-0.0287492815,-0.000944942,0.0250573028,-0.0007404516,-0.0277212989,0.035752397,0.0124703972,0.0365368426,-0.012187223,0.0141844423,-0.0336403958,0.0501541495,0.0125470702,-0.0406807512,-0.0032230855,-0.0045050867,0.0216652956,-0.0364348888,-0.000770005,0.0458755791,-0.0606862418,-0.025416974,-0.0129167261,-0.0291719697,0.0383156873,0.0165243968,-0.034074001,-0.0035534296,0.0341295339,0.062714234,-0.0335174873,0.0526820496,0.0245500188,-0.0168648269,0.0445833057,-0.0515365899,0.0040267394,0.0138214715,0.0767099708,-0.0460297503,-0.0117450804,-0.0216282904,0.0015216907,0.0084925191,0.0197471641,-0.0383199565,-0.0464011058,0.0285539702,0.0430696458,0.0248608664,-0.0957421511,0.0098619666,-0.0508504063,-0.0019455106,0.0331905745,-0.0219282117,0.0277372599,-0.0237664077,-0.0031666525,-0.0341342464,0.0861587301,0.1042946279,-0.0311271548,-0.0160570405,-0.0516595393,0.0460308716,-0.0545721985,0.0497575775,-0.0066446387,0.0148544293,-0.0100334743,-0.0075469879,-0.0199090932,-0.0377107151,-0.0156599302,0.0611127429,-0.0515599139,0.1046383902,0.0052605825,0.011920752,0.005479862,-0.0183068756,0.0286338143,-0.0285995584,-0.0515992194,-0.0312454179,-0.0178552382,0.0538751297,-0.0163744316,-0.0213479735,0.0517331883,0.0315655842,-0.0001380608,0.0063392743,-0.0696990266,0.039955236,-0.0337647162,-0.0259460397,0.0518101305,0.0089159403,0.0387492441,0.0570692755,-0.0074084424,-0.0166393071,-0.0558949858,-0.0143780084,-0.0246648397,-0.0163562745,0.0350407474,0.0230181925,-0.0355867743,0.0007139529,0.0616645627,0.0260947812,0.0192171726,-0.0580423959,-0.0071361396,-0.0166087393,-0.028712295,-0.0289504509,0.0252033249,-0.0337455235,-0.0023730616,-0.0038222882,0.0071793031,-0.0513693132,-0.0263140444,-0.0365366861,0.0015372715,0.0049859076,-0.0231081191,-0.0376094095,-0.0647981465,0.002364258,-0.0038517248,0.060268864,0.0123012271,-0.050487861,-0.0349940583,-0.1039752737,-0.0312081147,0.0585888401,0.0184070021,-0.0162264667,0.0185124222,0.0586102381,0.0184520204,-0.000984811,0.0334297828,0.0515676513,0.0076098917,-0.0016791953,-0.0518883131,0.0225263052,0.0571587868,0.0182260703,-0.0452563725,0.0113980109,-0.0266038924,-0.0319333971,0.0470807776,0.005000141,0.0141437342,0.0397194475,-0.0086695645,-0.0176817179,0.0075067016,0.0257455185,0.0438519418,0.0625418276,0.0462151282,0.0374457948,0.0623779073,0.0304862875,0.0291292928,-0.0298163705,-0.0464277901,0.0581965595,-0.0003414172,-0.0453834012,0.0170372259,-0.012942805,-0.0289583132,-0.013666058,0.0023278301,0.024735095,-0.0176657252,-0.037273027,0.010500554,-0.0044763461,0.0274685305,0.0108271847,0.0125035448,0.0118830148,0.008702442,-0.0313547142,-0.0083101811,-0.0132062081,0.0037650992,-0.0156070394,0.0101033645,-0.0286221877,0.006486496,0.0512357913,-0.0249098521,0.0203719512,0.0328566022,-0.0202126969,-0.0117038572,-0.0182240214,-0.0117892148,-0.0456281267,-0.0140988156,-0.0485767238,0.0410210937,-0.0964041501,-0.0093222409,0.005968478,-0.0837454423,0.018233981,0.0113476124,-0.0230880082,0.0022305513,-0.0109462962,0.0064453026,-0.0252813753,0.04584492,-0.0296660867,0.0026661607,-0.0070492858,0.0338032395,0.042777054,0.1078052074,0.0032939296,0.0289233848,-0.0145180989,0.0505786277,0.0135250576,-0.0288749598,-0.016723752,-0.0214153733,0.0452456586,-0.0216870699,0.0000414065,0.0222884919,0.0263976324,0.0336017162,-0.0380922779,0.0077707618,-0.0064156358,-0.0661674961,0.0493041053,-0.023404913,0.0077279652,-0.0471286066,0.0196763948,-0.0327235758,-0.0579171516,0.0020498578,-0.0373516306,-0.0229917355,0.0317149237,0.0454053022,0.0153092593,0.0792913958,-0.007038102,-0.0453609638,0.0046459078,0.0042184331,-0.0103882682,0.0105674528,-0.0133989397,-0.0238813683,0.0172389541,-0.002505433,-0.0219521467,0.0023159035,-0.0197741892,-0.0211779401,-0.0175551567,0.0092585096,-0.014452612,-0.0134940455,0.0286684111,-0.0271461438,0.0144177442,0.011665171,0.0519521572,-0.0737895742,0.0351427123,-0.0112201208,0.0097108772,0.0232595094,-0.0032425921,-0.0435618609,-0.014500631,-0.0041936119,-0.0467463098,-0.0100966608,-0.015534021,0.0052270978,0.0549816638,0.045256082,0.0090593491,-0.0555005185,0.0298840962,0.0115597453,-0.0525448024,0.0340931863,-0.0468240418,-0.0194191597,-0.0372440703,-0.0234420355,0.0361950584,-0.0211864468,0.0136072915,0.0030956978,-0.0073605273,0.0181839447,-0.0294046141,-0.0237928275,0.0057425755,0.0340283066,-0.0366167463,0.0113488436,-0.0022564579,0.0377973057,0.064141795,0.0349278636,-0.0466860309,-0.0128858779,0.0330601148,-0.0730191097,-0.0279023796,-0.0112652723,-0.0039826338,0.0538036823,0.0009376822,-0.0364233702,0.0428869426,-0.0802643448,0.0174389705,0.031971097,0.0203299839,0.0066944347,0.0332441553,0.0361502543,-0.0323051326,0.0147436066,0.01009533,-0.025862541,0.0186369065,0.0093366643,0.0096272193,0.0178775173,-0.0035740547,0.0139460415,0.036685884,0.0280833784,-0.0311740134,0.0200903974,-0.0007884472,0.0575863756,-0.0348063894,0.0318070687,0.060419783,0.0498434193,0.0024858846,-0.0009356618,0.0194294471,0.0356991887,-0.0432700031,0.0232415162,-0.0424397215,-0.0838425234,0.00730081,-0.0631415993,0.0190430935,-0.0019075014,0.0494346991,0.0439437777,0.0033426576,0.0513805486,0.0553549193,0.0203787629,0.0254695322,-0.0050358213,-0.0067297462,0.0213070381,-0.0198717937,-0.0386063606,-0.0363508463]

In [None]:
import numpy as np
from numpy.linalg import norm
from scipy.spatial.distance import cosine

np.dot(embed_3, embed_4) / (norm(embed_3) * norm(embed_4))

In [None]:
import pandas as pd

data = pd.read_json("chunks_working_site_lease.json", lines=True)

In [None]:
from google.cloud import aiplatform_v1

# Set variables for the current deployed index.
API_ENDPOINT="63565871.us-west1-602280418311.vdb.vertexai.goog"
INDEX_ENDPOINT="projects/602280418311/locations/us-west1/indexEndpoints/2259901015358701568"
DEPLOYED_INDEX_ID="chatbot_docs_endpoint_1712304273160"

# Configure Vector Search client
client_options = {
    "api_endpoint": API_ENDPOINT
}
vector_search_client = aiplatform_v1.MatchServiceClient(
    client_options=client_options,
)

# Build FindNeighborsRequest object
datapoint = aiplatform_v1.IndexDatapoint(
    feature_vector=embed_4
)
query = aiplatform_v1.FindNeighborsRequest.Query(
    datapoint=datapoint,
    # The number of nearest neighbors to be retrieved
    neighbor_count=10
)
request = aiplatform_v1.FindNeighborsRequest(
    index_endpoint=INDEX_ENDPOINT,
    deployed_index_id=DEPLOYED_INDEX_ID,
    # Request can have multiple queries
    queries=[query],
    return_full_datapoint=True,
)

# Execute the request
response = vector_search_client.find_neighbors(request)

# Handle the response
print(response)

In [None]:
for neighbor in response.nearest_neighbors[0].neighbors:
    print(f"{neighbor.distance:.2f} {data[data.id.astype(str) == neighbor.datapoint.datapoint_id].text}")

In [None]:
def get_context(data, response):
    context = []
    for idx, neighbor in enumerate(response.nearest_neighbors[0].neighbors):
        context.append(f"Source: {idx + 1} \n " + data[data.id.astype(str) == neighbor.datapoint.datapoint_id].text.values[0])
    return "\n\n".join(context)

In [None]:
get_context(data, response)