In [56]:
import chromadb
from langchain.embeddings import OpenAIEmbeddings
from langchain.embeddings import SentenceTransformerEmbeddings

persistent_client = chromadb.PersistentClient()


collection = persistent_client.get_or_create_collection(
    "all-MiniLM-L6-v2_1000_split_clean")
display(collection.count())

2833

In [None]:
collection.peek()

In [58]:
# Create context string

from langchain.chat_models import ChatOpenAI
topic = 'parks'

embedding_model = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
query = embedding_model.embed_documents(
    [topic],
)

southwark_results = collection.query(
    query_embeddings=query,
    where={"LPA": "London_Borough_of_Southwark"},
    n_results=10,
)

southwark_results_string = '\n\n'.join(southwark_results['documents'][0][0:5])

th_results = collection.query(
    query_embeddings=query,
    where={"LPA": "London_Borough_of_Tower_Hamlets"},
    n_results=10,
)
th_results_string = '\n\n'.join(th_results['documents'][0][0:5])


islington_results = collection.query(
    query_embeddings=query,
    where={"LPA": "London_Borough_of_Islington"},
    n_results=10,
)
islington_results_string = '\n\n'.join(islington_results['documents'][0][0:5])


context = '''
Context from Southwark: 
{southwark_results}

Context from Tower Hamlets: 
{th_results}

Context from Islington: 
{islington_results}


'''.format(southwark_results=southwark_results_string,
           th_results=th_results_string, islington_results=islington_results_string)

print(context)

llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=1)

query_string = '''
Compare the way Southwark, Tower Hamlets and Islington approach to the topic of {topic}, listing the three biggest differences. 

{context} 
'''. format(context=context, topic=topic)

result = llm.call_as_llm(query_string)

print('-------')

print(result)


Context from Southwark: 
Road Nature Garden OS208 Lorrimore Square Gardens OS209 Elephant and Castle Peninsula OS210 More London OS211 Tate Modern Metropolitan Open Land Schedule ID Name OS4 Potter's Field (aka Tooley Street Park) OS6 Surrey Docks Sports Ground Southwark Plan 2022 573Schedule ID Name OS7 Surrey Docks Sports Ground (pitches2,3) OS8 Lavender Pond OS28 King Stairs Gardens OS36 Stave Hill OS37 Stave Hill Ecological Park OS38 Russia Dock Woodland OS53 Southwark Park OS56 Geraldine Mary Harmsworth Park OS91 Burgess Park OS98 Surrey Canal OS119 Dog Kennel Hill OS124a Peckham Rye Common OS124b Peckham Rye Park OS125 Nunhead Reservoir OS126 Nunhead Cemetery OS127 Ivydale Road Playing Field OS128 Greendale Playing Fields OS129 Greendale Astro Pitch OS132 Water Works OS133 Nunhead Allotments OS135 James Allens Girls School OS136 Charter School OS137 Waverly School OS139 James Allen's Girls School Sports Club OS140 Alleyn's School Playing Field OS142 Homestall Road Playing Ground

In [None]:
# Create context string

from langchain.chat_models import ChatOpenAI
topic = 'parks'

embedding_model = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
query = embedding_model.embed_documents(
    [topic],
)

southwark_results = collection.query(
    query_embeddings=query,
    where={"LPA": "London_Borough_of_Southwark"},
    n_results=10,
)

southwark_results_string = '\n\n'.join(southwark_results['documents'][0][0:5])

th_results = collection.query(
    query_embeddings=query,
    where={"LPA": "London_Borough_of_Tower_Hamlets"},
    n_results=10,
)
th_results_string = '\n\n'.join(th_results['documents'][0][0:5])

islington_results = collection.query(
    query_embeddings=query,
    where={"LPA": "London_Borough_of_Islington"},
    n_results=10,
)
islington_results_string = '\n\n'.join(islington_results['documents'][0][0:5])


context = '''
Context from Southwark: 
{southwark_results}

Context from Tower Hamlets: 
{th_results}

Context from Islington: 
{islington_results}


'''.format(southwark_results=southwark_results_string,
           th_results=th_results_string, islington_results=islington_results_string)

print(context)

llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=1)

query_string = '''
Compare the way Southwark, Tower Hamlets and Islington approach to the topic of {topic}, listing the three biggest differences. 

{context} 
'''. format(context=context, topic=topic)

result = llm.call_as_llm(query_string)

print('-------')

print(result)

In [2]:
from langchain.chat_models import ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.embeddings import SentenceTransformerEmbeddings
import chromadb
from helpers import pinecone_connect


def compare_lpas(topic, lpa_1='Tamworth_Borough_Council', lpa_2='Northumberland_County_Council'):

    index = pinecone_connect()

    embedding_model = SentenceTransformerEmbeddings(
        model_name="all-MiniLM-L6-v2")

    # embed query
    query = embedding_model.embed_documents(
        [topic],
    )

    # Query
    lpa_1_results = index.query(
        vector=query,
        filter={
            "LPA": {"$eq": lpa_1},
        },
        top_k=8,
        include_metadata=True
    )

    display(lpa_1_results['matches'][0]['metadata']['text'])

    lpa_1_results_string = ''
    for match in lpa_1_results['matches'][0:8]:

        lpa_1_results_string += match['metadata']['text'] + '\n'

    lpa_2_results = index.query(
        vector=query,
        filter={
            "LPA": {"$eq": lpa_2},
        },
        top_k=8,
        include_metadata=True
    )
    display(lpa_2_results)

    lpa_2_results_string = ''
    for match in lpa_2_results['matches'][0:8]:

        lpa_2_results_string += match['metadata']['text'] + '\n'

    context = '''
    Context from {lpa_1}: 
    {lpa_1_results}

    Context from {lpa_2}: 
    {lpa_2_results}

    '''.format(lpa_1_results=lpa_1_results_string,
               lpa_2_results=lpa_2_results_string,
               lpa_1=lpa_1,
               lpa_2=lpa_2
               )

    print('context length')
    print(len(context))

    print(context)

    llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=1.5)

    query_string = '''
    Compare the way the following 2 Local Authorities approach to the topic of {topic}, listing the differences and similarities. 

    {context} 
    '''. format(context=context, topic=topic)

    result = llm.call_as_llm(query_string)

    return result


print('----')
print(compare_lpas('Cycling'))

----


IndexError: list index out of range

In [8]:
persistent_client = chromadb.PersistentClient()

collection = persistent_client.get_or_create_collection(
    "all-MiniLM-L6-v2_1000_split")

collection.peek()

{'ids': ['a6401ff0-dc97-47b8-9ce6-1de22654a3bb',
  '6e975d65-3826-4155-90d0-5971a4ccfa87',
  '7d5312b8-88ef-4b37-a12e-84f1da0d5a83',
  '1ab25033-d262-4688-93c3-49b37a4e6ba2',
  'e22d9485-8e6d-4650-93e9-28ade55cd886',
  '9ee0ab0b-c3e6-4842-99f5-5a650a89734e',
  '07a1be7a-c2cb-4cb7-bf20-87a9fc90bdf4',
  'e7d4381d-139a-472c-9667-7fcb66a36fe1',
  '4409a608-00f7-4e4f-b7a6-378a479cf1db',
  '8b4ffea3-f2a6-4873-840e-94abdd4a2bde'],
 'embeddings': [[0.007049312349408865,
   0.0037307392340153456,
   0.05868951231241226,
   -0.12151091545820236,
   -0.026203883811831474,
   0.06338552385568619,
   -0.06024651601910591,
   -0.018123134970664978,
   -0.15988048911094666,
   0.0583149716258049,
   -0.0285935141146183,
   -0.060437120497226715,
   0.021241866052150726,
   0.027746431529521942,
   -0.0008897087536752224,
   0.0534585602581501,
   0.031648579984903336,
   -0.07946105301380157,
   -0.028515184298157692,
   -0.015992064028978348,
   0.07133373618125916,
   -0.023155035451054573,
   0.01