In [3]:
import pandas as pd
import faiss
import os

import tensorflow_hub as hub

from pprint import pprint

import openai
openai.api_key = os.environ["OPENAI_API_KEY"]

In [5]:
# IMPORTANT - used to find doc index
tokens_chunk_size = 512

In [6]:
legal_docs = pd.read_csv("../data/legal_text_classification.csv")
index = faiss.read_index("non_chunked_court_text.index")

module_url = "https://tfhub.dev/google/universal-sentence-encoder/4"

model = hub.load(module_url)
print ("module %s loaded" % module_url)
def embed(input):
  return model(input)

module https://tfhub.dev/google/universal-sentence-encoder/4 loaded


In [7]:
query1 = "claims related to shoe companies"
#query1 = "claims involving large sums of money"

# Encode the query into an embedding
query_embedding = embed([query1]).numpy()

# Search for similar embeddings in the index
D, I = index.search(query_embedding, k=5)  # Retrieve the top 5 similar documents

# Print the indices and distances of the similar documents
print("Top 5 similar documents:")
indices = []
for i, d in zip(I[0], D[0]):
    print(f"Index: {i}, Similarity Score: {1 - d}")
    indices.append(i)

Top 5 similar documents:
Index: 5748, Similarity Score: -0.34309840202331543
Index: 22063, Similarity Score: -0.3481215238571167
Index: 6910, Similarity Score: -0.3489185571670532
Index: 10535, Similarity Score: -0.3654189109802246
Index: 22065, Similarity Score: -0.37540555000305176


In [8]:
idx = 6910

pprint (legal_docs.iloc[idx]["case_title"])
pprint (legal_docs.iloc[idx]["case_text"])

'World Brands Management Pty Ltd v Cube Footwear Pty Ltd [2004] FCA 769'
('The above findings are based not only on the evidence in the proceedings, '
 'but on commonsense and judicial experience of the process people typically '
 'engage in when purchasing footwear. In World Brands Management Pty Ltd v '
 'Cube Footwear Pty Ltd [2004] FCA 769 at [7] (albeit in a passing off '
 'context) Heerey J said: Turning to the TPA and passing-off claims, I do not '
 'agree that the decision of Goldberg J in Dr Martens Australia Pty Ltd v '
 'Figgins Holdings Pty Ltd (1999) 44 IPR 281 establishes some rule of law as '
 'to the circumstances which pertain in the retail footwear trade. '
 "Nevertheless, his Honour's observations are valuable in another way. They "
 "light up and provoke one's memories from personal experience as to the way "
 'in which shoes are bought by retail consumers. As counsel for the '
 'respondents pointed out, shoes are usually not a quick impulsive purchase of '
 'someth

In [19]:
def answer_question(question, text, num_sentences=5, english_level=9):
  response = openai.ChatCompletion.create(
    model = "gpt-3.5-turbo",
    temperature = 0,
    messages = [{"role":"system", "content": f"Follow these instructions when writing the answer:\
      \n1. Different texts in the input are separated by ***"
      },
      {"role": "user", "content":f"Answer the following question about the text:{question}. text:{text}"}]
    )

  return response['choices'][0]['message']['content']


def summarize_text(text, num_sentences=5, english_level=9):
  response = openai.ChatCompletion.create(
    model = "gpt-3.5-turbo",
    temperature = 0,
    messages = [{"role":"system", "content": f"Follow these instructions when writing the summary:\
      \n1. Write a clear and concise summary consisting of {num_sentences} sentences \
      \n2. The summary's english level matches that of a person with {english_level} years of education \
      \n3. The summary should consist of an explanation of what the case is about, who's involved and the outcome"
      },
      {"role": "user", "content":f"Write a summary of the following text:{text}"}]
    )

  return response['choices'][0]['message']['content']

In [14]:
query_docs_list = legal_docs.iloc[indices]["case_text"].tolist()
texts = []
for query_doc in query_docs_list:
  texts.append(query_doc)

text_all = "***".join(texts)


In [16]:
pprint (answer_question("What's common in all of these cases?", text_all))

('***In all of these cases, the issue of knowledge is important in determining '
 'liability for copyright infringement. The courts consider the knowledge, '
 'capacity, and circumstances of the defendant when determining whether they '
 'had the requisite knowledge of the infringement. In the case of Raben '
 'Footwear Pty Ltd v Polygram Records Inc, the court found that knowledge '
 "could not be inferred from the defendant's pattern of trade. In Deckers "
 'Outdoor Corporation Inc v Farley, the court made declarations and ordered '
 'the defendant to deliver up counterfeit footwear and pay damages for '
 "infringing Deckers' intellectual property rights. In World Brands Management "
 'Pty Ltd v Cube Footwear Pty Ltd, the court considered the common process of '
 'purchasing footwear and the expectations of retail consumers. In Lockwood '
 'Security Products Pty Ltd v Doric Products Pty Ltd, the court discussed fair '
 'basis and the comparison between the claims made in a patent and

In [20]:
pprint (answer_question("What's common to all the companies mentioned in this text?", text_all))


('The common factor among all the companies mentioned in the text is that they '
 'are involved in copyright infringement or intellectual property rights '
 'violations.')


In [21]:
pprint (answer_question("What's the common industry for all the companies in these texts?", text_all))


('The common industry for all the companies in these texts is the footwear '
 'industry.')
