In [1]:
# %%capture

# %pip install --upgrade transformers datasets jsonlines ai21 langchain chromadb boto3

In [2]:
import transformers
from datasets import load_dataset

dataset = load_dataset("pasinit/scotus",
                      split='train')

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# dataset.select([1, 2])[0]

In [4]:
sample_texts = [it['text'] for it in dataset.select([3, 6, 9])]

In [5]:
# sample_texts[0]

In [6]:
import ai21

api_key = 'gAXhJl0FdF9SvRIdd90WYFoXBIk1uilx'

ai21.api_key = api_key

In [7]:
import json

- Who is the plaintiff?
- Who is the defendant?
- What is this case about?
- Why is this case important?
- Summarize the plaintiffs argument
- Summarize the defendants argument
- Extract the list of key points in the plaintiffs argument
- Extract the list of key points in the defendants argument
- Identify uncontested facts (occur in both arguments)
- Identify contested facts (conflicts between the arguments)
- Summary of the judgement
- Summary of dissenting argument

In [8]:
from ai21 import Segmentation
from langchain.docstore.document import Document

def get_segments(text):
    # print(text)
    response = ai21.Segmentation.execute(
        source=text,
        sourceType="TEXT")

    return [
        Document(page_content=it['segmentText']) for it in response['segments']]

segments = get_segments(sample_texts[2])

In [9]:
segments[1]

Document(page_content='The Court of Appeals modified the Board\'s order so that the employer would be free to recognize the assisted union not only when certified by the Board but, alternatively, when it "shall have been freely chosen as [their representative] by a majority of the employees after all effects of unfair labor practices have been eliminated."\n\nIt also struck from the Board\'s notice requirement certain references to the rival union.')

In [28]:
from langchain.chains import LLMChain
from langchain.llms import AI21
from langchain.prompts import PromptTemplate
from langchain.chains.summarize import load_summarize_chain
from time import perf_counter
from langchain.llms import Bedrock

llm = Bedrock(
    model_id="ai21.j2-ultra-v1",
    model_kwargs={
        "maxTokens": 8191,
        "topP": .01,
        "temperature": .4,
    }
)



In [29]:
import boto3
from langchain.embeddings import BedrockEmbeddings
from langchain.vectorstores import Chroma

embeddings = BedrockEmbeddings()

db = Chroma.from_documents(segments, embeddings)

In [30]:
from langchain.chains import RetrievalQA

prompt_template = """
Human: Use the following pieces of context to answer the question at the end. \
If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

Question: {question}

Assistant:
Helpful Answer:
"""

prompt = PromptTemplate(
    template=prompt_template, 
    input_variables=['context', 'question'])

qa_chain = RetrievalQA.from_chain_type(llm,
                    retriever=db.as_retriever(
                        search_type="mmr",
                        search_kwargs={"k": 8}
                    ),
                    chain_type_kwargs={"prompt": prompt},
)

question = "What is this case about?"
qa_chain({"query": question})

{'query': 'What is this case about?',
 'result': 'This case is about a company giving illegal support and assistance to a union, and the appropriate remedy for this situation.'}

In [31]:
from pprint import pprint

result = qa_chain({"query": "Extract the list of key points in the plaintiffs argument"})

pprint(result['result'])

('1. The Teamsters Local was International Brotherhood of Teamsters, '
 'Chauffeurs, Warehousemen and Helpers of America, AFL, Local No. 612.\n'
 "2. The Board concurred in the Trial Examiner's findings that when the "
 'Teamsters Local was picketing the premises the company rendered illegal '
 'support and assistance to District 50 by negotiating the details of a '
 'contract with officials of that union before a single employee had actually '
 'authorized it as a representative, by showing the draft contract to the '
 'drivers at a meeting convened by and presided over by the company president, '
 'who assured them that if necessary he would advance the money for dues, '
 'after which, and within less than three hours, the drivers signed District '
 '50 authorization cards, established a local which held its first meeting, at '
 "the president's suggestion, on company premises, and concluded a contract "
 'with the company.\n'
 '3. Crampton Harris argued the cause for District 50, Un

In [32]:
result = qa_chain({"query": "Who is the plaintiff?"})

pprint(result['result'])

'The company'


In [33]:
result = qa_chain({"query": "Who is the petitioner?"})

pprint(result['result'])

'Dominick L. Manoli'


In [34]:
result = qa_chain({"query": "What was the decision in this case?"})

pprint(result['result'])

('The Court vacated the judgment of the Court of Appeals with instructions to '
 'remand the case to the Board for further proceedings consistent with this '
 'opinion.')


In [35]:
result = qa_chain({"query": "What is the board this case refers to?"})

pprint(result['result'])

'National Labor Relations Board'


In [36]:
result = qa_chain({"query": "What was the dissenting opinon in this case?"})

pprint(result['result'])

("The dissenting opinion in this case argued that the Board's order to remove "
 'an assisted union was appropriate and adapted to the situation calling for '
 'redress, and did not extend "preferred treatment" to the noncomplying union. '
 'The dissenting opinion also argued that the modifications of the '
 'cease-and-desist order made by the Court of Appeals went beyond permissible '
 'limits of judicial review under 10 (f) and cannot be sustained.')


In [37]:
result = qa_chain({"query": "Why is this case important?"})

pprint(result['result'])

('This case is important because it establishes the limits on the power of the '
 'National Labor Relation Board (NLRB) to apply remedies in unfair labor '
 "practice cases. It clarifies that the NLRB's power is not limitless and must "
 'be "appropriate" and "adapted to the situation which calls for redress." The '
 'case also emphasizes the need to follow proper procedures, such as raising '
 'objections before the Board, to ensure fair administration of labor laws.')


In [38]:
result = qa_chain({"query": "What are the most important questions in this case?"})

pprint(result['result'])

('The most important question in this case is what is the appropriate remedy '
 'for an employer who has given unlawful assistance to a union.')


In [39]:
result = qa_chain({"query": "What are the conflicting facts between the arguments of the two sides?"})

pprint(result['result'])

('The conflicting facts between the arguments of the two sides are that the '
 'Board argues that District 50 is an assisted but not a dominated union, '
 'while the company argues that District 50 is a dominated union.')


In [40]:
result = qa_chain({"query": "Explain the thought process to understand if District 50 is a dominated or an assisted union?"})

pprint(result['result'])

('District 50 was found by the Board to be an assisted but not a dominated '
 "union, so that a free choice of District 50 by Bowman's employees is a "
 'reasonable possibility. Therefore the certification requirement here '
 "misapplies the Board's own policy by actually defeating the statutory rights "
 "of Bowman's employees.")


In [41]:
result = qa_chain({"query": "Why was District 50 determined to be an assisted union?"})

pprint(result['result'])

('District 50 was determined to be an assisted union because the company '
 'negotiated the details of a contract with officials of that union before a '
 'single employee had actually authorized it as a representative, by showing '
 'the draft contract to the drivers at a meeting convened by and presided over '
 'by the company president, who assured them that if necessary he would '
 'advance the money for dues, after which, and within less than three hours, '
 'the drivers signed District 50 authorization cards, established a local '
 "which held its first meeting, at the president's suggestion, on company "
 'premises, and concluded a contract with the company.')


In [47]:
import os

os.system('pwd')

/Users/famestad/dev/extract-ai21-hackathon/EarningsCall_Dataset


0