In [1]:
import faiss
from representations.contracts import contracts
from sentence_transformers import SentenceTransformer

model = SentenceTransformer('all-mpnet-base-v2')

## Create index


In [18]:
from tqdm import tqdm 


count = 50

metadata = []
X = []

for i, (contract, paragraph_gen) in tqdm(enumerate(contracts.items())):
    paragraphs = list(paragraph_gen())
    metadata += [(contract, i, p) for i, p in enumerate(paragraphs)]
    paragraph_vecs = model.encode(paragraphs)
    X.append(paragraph_vecs)
    if i > count:
        break
        

51it [00:49,  1.04it/s]


In [19]:
import numpy as np

Xb = np.concatenate(X, axis=0)

Xb.shape

(16646, 768)

Create index

In [20]:
#num cells
nlist = 100
k = 4

dim = 768

quantizer = faiss.IndexFlatL2(dim)  # the other index
index = faiss.IndexIVFFlat(quantizer, dim, nlist)

index.train(Xb)
assert index.is_trained

index.add(Xb)

In [58]:
metadata[1024:1055]

[('Bright Horizons - Credit Agreement',
  13,
  'Now, Therefore, in consideration of the premises and the covenants and obligations contained herein, the parties hereto agree as follows:'),
 ('Bright Horizons - Credit Agreement',
  14,
  'Section 1. Refinancing Amendment.'),
 ('Bright Horizons - Credit Agreement',
  15,
  '(a) This Section 1 and Section 2 hereto constitute a "Refinancing Amendment" pursuant to which (i) each Refinancing Term Lender commits to make, severally but not jointly, to the Borrower Other Term Loans on the Effective Date in a principal amount equal to the amount set forth opposite such Refinancing Term Lender’s name under the heading "Other Term Loans" on Schedule 1 hereto (each, an "Other Term Loan Commitment") and (ii) each Refinancing Revolving Lender commits to make, severally but not jointly, to the Borrower Other Revolving Credit Commitments on the Effective Date in a principal amount equal to the amount set forth opposite such Refinancing Revolving Lende

In [55]:
def closest(query, k=10):
    Xq = model.encode([query])
    
    D, I = index.search(Xq, k)
    for i in range(k):
        distance = D[0][i]
        info = metadata[I[0][i]]
        contract = info[0]
        paragraph = info[2]
        
        print(f'{i+1:<2} ({distance:.3f}): {paragraph} ||| {contract} ')

query = "An \"Individual\" refers to any person, corporation, venture, company, organization or Government Institution."

print(f"Query: {query}")
print("Closest:")
closest(query)

Query: An "Individual" refers to any person, corporation, venture, company, organization or Government Institution.
Closest:
1  (0.473): "Person" means any individual, corporation, partnership, joint venture, limited liability company, trust or unincorporated organization or Governmental Authority. ||| ADMA 
2  (0.480): (i)   "Person" means an individual or entity including any limited liability company, a partnership, a joint venture, a corporation, a trust, an unincorporated organization and a government or any department or agency thereof. ||| 0001140361-19-013661:nc10003419x2_ex10-1 
3  (0.484): "Person" means an individual, corporation, partnership, limited liability company, trust or unincorporated organization or a government or any agency or political subdivision thereof, or any other entity. ||| GA - Purchase Agreement 
4  (0.498): "Person" means an individual or corporation, partnership, trust, incorporated or unincorporated association, joint venture, limited liability compa

In [56]:
query = "Section Digital and records Access. Each individual shall keep complete and accurate records"

print(f"Query: {query}")
print("Closest:")
closest(query)


Query: Section Digital and records Access. Each individual shall keep complete and accurate records
Closest:
1  (0.531): 8. Access to Records and Documents ||| IMA between Black Rock and the Fed 
2  (0.606): Section 2.5 Computer and Books and Records Access 11 ||| ADMA 
3  (0.762): Section 2.5  Computer and Books and Records Access. Each Party shall keep complete and accurate records in all material respects in connection with the provision of Services and such records shall be kept in sufficient detail to permit independent audit of such records in accordance with this Section 2.5. Subject to the confidentiality restrictions set forth herein, during the applicable Term, each Party shall, and shall cause the other members of its Group to, provide reasonable access to the other Party and its legal representatives or independent accountants or auditors to all of its respective computer equipment and software and historical and current books and records as is reasonably necessary for the 

In [64]:
query = "These sections constitute a \"Refinancing Amendment\" pursuant to"

print(f"Query: {query}")
print("Closest:")
closest(query)


Query: These sections constitute a "Refinancing Amendment" pursuant to
Closest:
1  (0.247): Section 1. Refinancing Amendment. ||| Bright Horizons - Credit Agreement 
2  (0.375): "Refinancing Amendment". An amendment to this Credit Agreement in form and substance reasonably satisfactory to the Administrative Agent and the Borrowers executed by (a) each of the Borrowers, (b) the Administrative Agent, (c) each Other Term Loan Lender and Other Revolving Loan Lender, as applicable, and (d) each then existing Lender that agrees to provide any portion of the Credit Agreement Refinancing Indebtedness being incurred pursuant thereto, in accordance with §6.20. ||| 0001193125-18-189473:d794764dex101 
3  (0.403): Refinancing Amendments 6.21 ||| 0001193125-18-189473:d794764dex101 
4  (0.432): 6.20 Refinancing Amendments. ||| 0001193125-18-189473:d794764dex101 
5  (0.463): (b) The Administrative Agent shall promptly notify each Lender as to the effectiveness of each Refinancing Amendment. Each of th