### Code

In [40]:
import numpy as np
import pandas as pd
import os
from utils.case_directory import CaseDirectory
from utils.case_metadata import CaseMetadata
from extractors.jury_ruling_classifier import JuryRulingClassifier
from extractors.bench_ruling_classifier import BenchRulingClassifier

In [10]:
def summarize_trial_classification():
    d = CaseDirectory("100_random_sample")
    d.categorize_cases("random_categories.csv")
    df = pd.read_csv("random_categories.csv")
    df2 = pd.read_csv("labeled_cases.csv")
    print("Percentage keyword cases correctly classified")
    print(len(df2[(df.trial == ((df2.trial_type == "jury") | (df2.trial_type == "bench")))])/len(df2))
    print("Categorization of cases misclassified")
    print(df2[(df.trial != ((df2.trial_type == "jury") | (df2.trial_type == "bench")))].trial_type.value_counts())

In [2]:
d = CaseDirectory("100_random_fed")

In [3]:
d.write_metadata("fed_metadata.csv")

In [4]:
d.categorize_outcomes("fed_metadata.csv", "fed_log.csv")

Cases to categorize: 29
Categorizing case 1
Extracting from metadata...
- Getting relevant chunks...
- Response: ({'category': 'undetermined'}, 'No relevant docket_report entries')
Extracting from documents...
- Getting relevant chunks...


Number of requested results 8 is greater than number of elements in index 3, updating n_results = 3


- Querying llm...
- Response: {'reasoning': 'The documents describe challenges sounding in Rule 12(b)(6) and the enforcement of private agreements that require a qui tam plaintiff to turn over evidence, which does not identify the result of the jury trial.', 'category': 'undetermined'}
Categorizing case 2
Extracting from metadata...
- Getting relevant chunks...


  metadata.loc[index, "trial_result"] = category


- Response: ({'category': 'undetermined'}, 'No relevant docket_report entries')
Extracting from documents...
- Getting relevant chunks...
- Querying llm...
- Response: {}
Categorizing case 3
Extracting from metadata...
- Getting relevant chunks...
- Response: ({'category': 'undetermined'}, 'No relevant docket_report entries')
Extracting from documents...
- Getting relevant chunks...


Number of requested results 8 is greater than number of elements in index 4, updating n_results = 4


- Querying llm...
- Response: {'reasoning': 'According to the documents, Plaintiff alleges violations of the Eighth and Ninth Amendments of the United States Constitution. This shows that the jury ruled in favor of Gomez because the allegations state a claim for excessive force in violation of the Fourth Amendment against Defendants Fausnett and Verhoeven.', 'category': 'plaintiff'}
Categorizing case 4
Extracting from metadata...
- Getting relevant chunks...


Number of requested results 8 is greater than number of elements in index 5, updating n_results = 5


- Querying llm...
- Response: {'reasoning': "The jury's verdict is supported by adequate evidence presented at trial, and it should not be disturbed.", 'category': 'undetermined'}
Extracting from documents...
- Getting relevant chunks...
- Querying llm...
- Response: {'reasoning': 'The Joint Final Pretrial Order contains several sections indicating that Plaintiffs are seeking certain relief, including summary judgment for no on-sale bar (Docket No. 97), which suggests that Plaintiffs are attempting to prove infringement by Defendant without authority or license.', 'party': 'Plaintiff', 'case_number': '6:14-cv-00068-KNM'}
Categorizing case 5
Extracting from metadata...
- Getting relevant chunks...
- Querying llm...
- Response: {'reasoning': "According to the documents, on March 2, 2022, the Court issued an order sua sponte, severing Chamberlains affirmative claims of infringement and Defendants' counterclaims from this action and consolidating them with Case No. 2:21-CV-00084-JRG to be 

Number of requested results 8 is greater than number of elements in index 3, updating n_results = 3


- Querying llm...
- Response: {'reasoning': "According to the documents, the jury returned a verdict finding that Defendants infringed Claims 4 and 20 of the '404 patent. This shows that the jury ruled in favor of The Chamberlain Group LLC because they found infringement of their patent.", 'category': 'plaintiff'}
Categorizing case 6
Extracting from metadata...
- Getting relevant chunks...


Number of requested results 8 is greater than number of elements in index 2, updating n_results = 2


- Querying llm...
- Response: {'reasoning': 'The documents describe scheduling events such as a Pretrial Conference, Jury Selection, and Markman Hearing, but do not identify the result of the jury trial.', 'category': 'undetermined'}
Extracting from documents...
- Getting relevant chunks...


Number of requested results 8 is greater than number of elements in index 5, updating n_results = 5


- Querying llm...
- Response: {'reasoning': 'The documents describe court orders and motions that do not identify the result of the jury trial.', 'category': 'undetermined'}
Categorizing case 7
Extracting from metadata...
- Getting relevant chunks...
- Querying llm...
- Response: {'reasoning': 'The documents describe pre-trial conference proceedings and compliance with pre-trial disclosures, which does not identify the result of the jury trial.', 'category': 'undetermined'}
Extracting from documents...
- Getting relevant chunks...
- Response: ({'category': 'undetermined'}, 'No relevant documents')
Categorizing case 8
Extracting from metadata...
- Getting relevant chunks...
- Response: ({'category': 'undetermined'}, 'No relevant docket_report entries')
Extracting from documents...
- Getting relevant chunks...


Number of requested results 8 is greater than number of elements in index 1, updating n_results = 1


- Querying llm...
- Response: {'reasoning': 'The documents describe assessments regarding antitrust injuries and the process for resolving them through motions, which does not identify the result of the jury trial.', 'category': 'undetermined'}
Categorizing case 9
Extracting from metadata...
- Getting relevant chunks...


Number of requested results 8 is greater than number of elements in index 1, updating n_results = 1


- Querying llm...
- Response: {'reasoning': 'According to the documents, a complaint was filed by Jeremy L. Brownfield against Robert Wilkie. This shows that the jury ruled in favor of _ because the plaintiff requested a jury trial.', 'category': 'undetermined'}
Extracting from documents...
- Getting relevant chunks...
- Response: ({'category': 'undetermined'}, 'No relevant documents')
Categorizing case 10
Extracting from metadata...
- Getting relevant chunks...


Number of requested results 8 is greater than number of elements in index 7, updating n_results = 7


- Querying llm...
- Response: {'reasoning': 'The documents describe a case with plaintiffs and defendant, but they do not identify the result of the jury trial.', 'category': 'undetermined'}
Extracting from documents...
- Getting relevant chunks...


Number of requested results 8 is greater than number of elements in index 2, updating n_results = 2


- Querying llm...


KeyboardInterrupt: 

In [109]:
df = pd.read_csv("federal_trial_docs.csv")

In [137]:
df["metadata_path"] = df.path.apply(lambda x: str(Path(x).parents[1])+"/metadata.json")

In [159]:
df[df.category != ""].to_csv("fed_test_key.csv", index=False)

In [162]:
list(df[["metadata_path", "category"]].itertuples(index=False, name=None))

[('100_random_fed/Texas_Eastern_District_Court/2--21-cv-00084/metadata.json',
  'plaintiff'),
 ('100_random_fed/Wisconsin_Western_District_Court/3--14-cv-00099/metadata.json',
  'defendant'),
 ('100_random_fed/California_Northern_District_Court/5--16-cv-01370/metadata.json',
  'defendant'),
 ('100_random_fed/New_Jersey_District_Court/1--16-cv-00395/metadata.json',
  'defendant'),
 ('100_random_fed/New_York_Southern_District_Court/1--10-cv-02881/metadata.json',
  ''),
 ('100_random_fed/North_Carolina_Western_District_Court/3--15-cv-00309/metadata.json',
  ''),
 ('100_random_fed/California_Central_District_Court/2--06-cv-03923/metadata.json',
  'defendant'),
 ('100_random_fed/California_Central_District_Court/5--15-cv-01921/metadata.json',
  'defendant'),
 ('100_random_fed/Illinois_Central_District_Court/1--04-cv-01118/metadata.json',
  'plaintiff')]

In [135]:
df.loc[0, "category"] = "plaintiff"

In [150]:
m = CaseMetadata.from_metadata_path(df.loc[5, "metadata_path"])

In [153]:
dr = m.get_docket_report()
dr[dr.document_path != ""].contents.iloc[0]

"CLERK'S JUDGMENT is hereby entered in accordance with the Court's Order dated  October 6, 2017.  Signed by Clerk, Frank G. Johns.  (tmg)"

In [154]:
m.get_document_by_docket_report_title(dr[dr.document_path != ""].contents.iloc[0])

{'100_random_fed/North_Carolina_Western_District_Court/3--15-cv-00309/Poole_v._Gaston_County_et_al/134.txt': 'United States District Court \nWestern District of North Carolina \nCharlotte Division \n \n \n \nGenger Poole, \nPlaintiff(s), \nvs. \n \nGaston County, et al \n \nDefendant(s). \n) \n) \n) \n) \n) \n) \n) \n \n \n \n \n \nJUDGMENT IN CASE \n3:15-cv-00309-DCK \n \n \n DECISION BY COURT. This action having come before the Court by Motion and a decision \nhaving been rendered; \nIT IS ORDERED AND ADJUDGED that Judgment is hereby entered in accordance with the \nCourt’s October 6, 2017 Order. \n \n \n \n \n \nOctober 6, 2017 \n \n \n \n \n \n \n \n \n \nCase 3:15-cv-00309-DCK   Document 134   Filed 10/06/17   Page 1 of 1\n\x0c'}

In [17]:
print(log.document_context[3])

Case 6:14-cv-00068-KNM   Document 182   Filed 03/04/16   Page 1 of 8 PageID #:  14167
UNITED STATES DISTRICT COURT 
FOR THE EASTERN DISTRICT OF TEXAS 
TYLER DIVISION 
  
   
 Civil Action No. 6:14-CV-0068         
 JURY TRIAL DEMANDED 
EQUISTAR CHEMICALS, LP and  
MSI TECHNOLOGY, L.L.C., 
  
 
v. 
 
WESTLAKE CHEMICAL CORP., 
 
Plaintiffs, 
Defendant. 
PLAINTIFFS’ RESPONSE TO DEFENDANT’S MOTION FOR A BENCH TRIAL, OR 
ALTERNATIVELY A BIFURCATED TRIAL, ON WILLFUL INFRINGEMENT 
 
                        
 
PLAINTIFFS’ RESPONSE TO DEFENDANT’S MOTION FOR A BENCH TRIAL,  
OR ALTERNATIVELY A BIFURCATED TRIAL, ON WILLFUL INFRINGEMENT 
PAGE    1||Case 6:14-cv-00068-KNM   Document 145   Filed 01/19/16   Page 1 of 17 PageID #:  13060
 
UNITED STATES DISTRICT COURT 
FOR THE EASTERN DISTRICT OF TEXAS 
TYLER DIVISION 
  
C.A. No. 6:14-cv-68 
EQUISTAR CHEMICALS, LP and 
MSI TECHNOLOGY L.L.C., 
  
                   Plaintiffs, 
 
v. 
 
WESTLAKE CHEMICAL CORP., 
  
 
 
                   Defendant. 
JO

In [14]:
m = CaseMetadata.from_metadata_path("100_random_fed/California_Eastern_District_Court/1--21-cv-01170/metadata.json")

In [15]:
m.get_docket_report_contents()

['~Util - 1 Set/Reset Deadlines and Hearings, Minute Order',
 '~Util - 1 Set/Reset Deadlines and Hearings, Pretrial Order',
 'Pretrial Conference',
 'Minute Order',
 '~Util - 1 Set/Reset Deadlines and Hearings, Minute Order',
 " STIPULATION and ORDER to Continue the Court's Scheduling Order  signed by Magistrate Judge Barbara A. McAuliffe on 11/16/2023. Non-Expert Discovery  Cut-Off: 1/19/2024; Expert Disclosures: 2/2/2024; Supplemental Expert Disclosures: 2/23/2024; Expert Discovery Cut-Off: 3/22/2024; Pretrial  Motion Filing Deadline: 4/19/2024. (Sant Agata, S)",
 " ORDER Granting Joint Stipulation and Request for an Order Continuing the Court's Scheduling Order Deadline  signed by Magistrate Judge Barbara A. McAuliffe on 09/11/2023. Non-Expert Discovery Cutoff: 11/17/23; Initial Expert  Disclosures: 12/8/23; Supplemental Expert Disclosures: 12/29/23; Expert Discovery Cutoff: 02/23/24; Pretrial Motion Filing Deadline: 03/22/24.(Flores, E)",
 'ORDER  signed by Magistrate Judge Barbara

In [10]:
df[df["trial_type"] != "unknown"][["title", "trial_type", "trial_result"]].loc[80, "title"]

'GENERAL MILLS, INC. et al VS. FRANCHISE TAX BOARD, AN AGENCY OF THE STATE OF'

In [22]:
log[log["title"] == "GENERAL MILLS, INC. et al VS. FRANCHISE TAX BOARD, AN AGENCY OF THE STATE OF"].metadata_context.item()

"THE COURT ORDERED THE FOLLOWING JUDGMENT ENTERED: IT IS ADJUDGED THAT DEFENDANT FRANCHISE TAX BOARD, AN AGENCY OF THE STATE OF CALIFORNIA RECOVER FROM PLAINTIFF GENERAL MILLS, INC. & SUBSIDIARIES REASONABLE COSTS OF SUIT PURSUANT TO CCP 1032. SEE SCANNED DOC||REPLY (POST-TRIAL) BRIEF FILED BY PLAINTIFF GENERAL MILLS, INC. & SUBSIDIARIES||PROPOSED STMT OF DECISION (GROSS RECEIPTS ISSUE) FILED BY DEFENDANT FRANCHISE TAX BOARD, AN AGENCY OF THE STATE OF CALIFORNIA||COURT TRIAL SET FOR FEB-22-2007 CONTINUED TO FEB-23-2007 AT 10:00 AM IN 220. PLAINTIFF'S CASE. JUDGE: A. JAMES ROBERTSON, CLERK: SHERIFE HUSENY, REPORTERS: GENA EALES \t176 & PATTY HUBBLE #3058 (220)||PROPOSED STMT OF DECISION FILED BY DEFENDANT FRANCHISE TAX BOARD, AN AGENCY OF THE STATE OF CALIFORNIA||MASTER MOTION CALENDAR ON MAY-11-2006 IN 206, DEFENDANT FRANCHISE TAX BOARD'S MOTION TO CONTINUE JURY TRIAL WAS GRANTED. THE TRIAL DATE OF 6/12/06 IS CONTINUED TO 2/20/07 IN DEPT. 206 AT 9:30 AM. THE CURRENT MSC OF 5/31/06 IS V

In [23]:
log[log["title"] == "GENERAL MILLS, INC. et al VS. FRANCHISE TAX BOARD, AN AGENCY OF THE STATE OF"].metadata_response_json.item()

"{'reasoning': 'The documents describe the court ordering the defendant, Franchise Tax Board, an Agency of the State of California, to recover reasonable costs of suit from plaintiff General Mills, Inc. & Subsidiaries pursuant to CCP 1032. This indicates that the jury ruled in favor of the defendant.', 'category': 'defendant'}"

In [30]:
pd.merge(log, df[["trial_result", "metadata_path"]], on='metadata_path', how='inner').drop("Unnamed: 0", axis=1).to_csv("100_random_sample_results.csv")

### Find Trial Docs

In [2]:
directory = CaseDirectory("100_random_fed")

In [3]:
directory.categorize_cases("fed_categories.csv")

In [4]:
df = pd.read_csv("fed_categories.csv")

In [6]:
trial_docs = {}

In [12]:
trial_paths = df[df.trial == True].metadata_path.tolist()

In [115]:
path = trial_paths[29]
case = CaseMetadata.from_metadata_path(path)
dr = case.get_docket_report()
dr[dr.document_path != ""]

Unnamed: 0,link_viewer,number,exhibits,link,entry_date,date,contents,document_path
2,https://www.docketalarm.com/cases/Illinois_Cen...,481,[],https://www.docketalarm.com/cases/Illinois_Cen...,NaT,2011-04-18,ORDER & OPINION entered by Judge Joe Billy Mc...,100_random_fed/Illinois_Central_District_Court...
3,https://www.docketalarm.com/cases/Illinois_Cen...,481,[],https://www.docketalarm.com/cases/Illinois_Cen...,NaT,2011-04-18,ORDER & OPINION entered by Judge Joe Billy Mc...,100_random_fed/Illinois_Central_District_Court...
6,https://www.docketalarm.com/cases/Illinois_Cen...,478,[],https://www.docketalarm.com/cases/Illinois_Cen...,NaT,2010-07-13,ORDER entered by Magistrate Judge John A. Gorm...,100_random_fed/Illinois_Central_District_Court...
36,https://www.docketalarm.com/cases/Illinois_Cen...,448,[],https://www.docketalarm.com/cases/Illinois_Cen...,NaT,2009-12-15,ORDER entered by Judge Joe Billy McDade on 12...,100_random_fed/Illinois_Central_District_Court...
37,https://www.docketalarm.com/cases/Illinois_Cen...,448,[],https://www.docketalarm.com/cases/Illinois_Cen...,NaT,2009-12-15,ORDER entered by Judge Joe Billy McDade on 12...,100_random_fed/Illinois_Central_District_Court...
99,https://www.docketalarm.com/cases/Illinois_Cen...,386,[],https://www.docketalarm.com/cases/Illinois_Cen...,NaT,2007-09-06,ORDER granting in part and denying in part 38...,100_random_fed/Illinois_Central_District_Court...
289,https://www.docketalarm.com/cases/Illinois_Cen...,276,[],https://www.docketalarm.com/cases/Illinois_Cen...,NaT,2007-07-30,OPINION & ORDER ENTERED: Ordered that 222 M...,100_random_fed/Illinois_Central_District_Court...
327,https://www.docketalarm.com/cases/Illinois_Cen...,276,[],https://www.docketalarm.com/cases/Illinois_Cen...,NaT,2007-07-30,OPINION & ORDER ENTERED: Ordered that 222 M...,100_random_fed/Illinois_Central_District_Court...
365,https://www.docketalarm.com/cases/Illinois_Cen...,276,[],https://www.docketalarm.com/cases/Illinois_Cen...,NaT,2007-07-30,OPINION & ORDER ENTERED: Ordered that 222 MOTI...,100_random_fed/Illinois_Central_District_Court...
403,https://www.docketalarm.com/cases/Illinois_Cen...,276,[],https://www.docketalarm.com/cases/Illinois_Cen...,NaT,2007-07-30,OPINION & ORDER ENTERED: Ordered that 222 M...,100_random_fed/Illinois_Central_District_Court...


In [111]:
title = dr.loc[752, "contents"]
doc = case.get_document_by_docket_report_title(title)
print(list(doc.values())[0])

1:04-cv-01118-JBM-JAG   # 93    Page 1 of 3                                              

E-FILEDE-FILEDE-FILED
     

 Monday, 26 September, 2005  02:49:50 PM  Monday, 26 September, 2005  02:50:25 PM  Monday, 26 September, 2005  02:51:00 PM 

 Clerk, U.S. District Court, ILCD Clerk, U.S. District Court, ILCD Clerk, U.S. District Court, ILCD
UNITED STATES DISTRICT COURT
CENTRAL DISTRICT OF ILLINOIS
TRISH LEE McCLOUD, by
)
and through her legal
)
guardian, Candy L. Hall,
Plaintiff,
))
))
)          Case No. 04-1118
v.
)
GOODYEAR DUNLOP TIRES NORTH )
AMERICA, LTD. and THE         )
GOODYEAR TIRE & RUBBER    
)
COMPANY, 
))
)
Defendants.
O R D E R
In separate lawsuits Plaintiff Trish Lee McCloud, a passenger
on a motorcycle being operated by William Booker (Case No. 04-1118)
and Plaintiff William Booker (Case No. 04-1159) sued Defendants
Goodyear Dunlop Tires North America, Ltd. (Dunlop) and The Goodyear
Tire & Rubber Company (Goodyear) (collectively referred to as
Dunlop/Goodyear) for i

In [112]:
trial_docs.update(doc)

In [113]:
trial_docs.keys()

dict_keys(['100_random_fed/Texas_Eastern_District_Court/2--21-cv-00084/The_Chamberlain_Group_LLC_v._Overhead_Door_Corporation_et_al/600.txt', '100_random_fed/Wisconsin_Western_District_Court/3--14-cv-00099/Haley_Mary_et_al_v._Kolbe_and_Kolbe_Millwork_Co._Inc._et_al/603.txt', '100_random_fed/California_Northern_District_Court/5--16-cv-01370/Sumotext_Corp._-v-_Zoove_Inc._et_al/470.txt', '100_random_fed/New_Jersey_District_Court/1--16-cv-00395/KUHAR_et_al_v._PETZL_COMPANY_et_al/234.txt', '100_random_fed/New_York_Southern_District_Court/1--10-cv-02881/The_Estate_of_Mauricio_Jaquez_v._The_City_of_New_York_et_al/186.txt', '100_random_fed/North_Carolina_Western_District_Court/3--15-cv-00309/Poole_v._Gaston_County_et_al/134.txt', '100_random_fed/California_Central_District_Court/2--06-cv-03923/Aleksandr_L_Yufa_v._Lockheed_Martin_Corporation_et_al/311.txt', '100_random_fed/California_Central_District_Court/5--15-cv-01921/Bennion_and_Deville_Fine_Homes_Inc_et_al_v._Windermere_Real_Estate_Service

In [116]:
d = [{"path": path, "text": text} for (path, text) in trial_docs.items()]

In [117]:
pd.DataFrame(d).to_csv("federal_trial_docs.csv", index=False)

In [118]:
df2 = pd.read_csv("federal_trial_docs.csv")

In [121]:
df2.path.tolist()

['100_random_fed/Texas_Eastern_District_Court/2--21-cv-00084/The_Chamberlain_Group_LLC_v._Overhead_Door_Corporation_et_al/600.txt',
 '100_random_fed/Wisconsin_Western_District_Court/3--14-cv-00099/Haley_Mary_et_al_v._Kolbe_and_Kolbe_Millwork_Co._Inc._et_al/603.txt',
 '100_random_fed/California_Northern_District_Court/5--16-cv-01370/Sumotext_Corp._-v-_Zoove_Inc._et_al/470.txt',
 '100_random_fed/New_Jersey_District_Court/1--16-cv-00395/KUHAR_et_al_v._PETZL_COMPANY_et_al/234.txt',
 '100_random_fed/New_York_Southern_District_Court/1--10-cv-02881/The_Estate_of_Mauricio_Jaquez_v._The_City_of_New_York_et_al/186.txt',
 '100_random_fed/North_Carolina_Western_District_Court/3--15-cv-00309/Poole_v._Gaston_County_et_al/134.txt',
 '100_random_fed/California_Central_District_Court/2--06-cv-03923/Aleksandr_L_Yufa_v._Lockheed_Martin_Corporation_et_al/311.txt',
 '100_random_fed/California_Central_District_Court/5--15-cv-01921/Bennion_and_Deville_Fine_Homes_Inc_et_al_v._Windermere_Real_Estate_Services_C

### Classification

In [124]:
path = "100_random_sample/New_York_State_Suffolk_County_Supreme_Court/602235---2016/metadata.json"
classifier = JuryRulingClassifier(path, language_model="llama3.1", llm_document_count=7)

In [10]:
classifier.extract()

Extracting from metadata...
- Getting relevant chunks...
- Querying llm...
- Response: {'reasoning': "The documents provided are all related to the plaintiffs' proposed verdict sheets and jury instructions, which suggests that the case is focused on the plaintiff's claims. However, there is no indication of a jury verdict or a ruling in favor of either party. The lack of reference to a verdict or a specific outcome indicates that the decision of the jury trial is not identified in these documents.", 'category': 'undetermined'}
Extracting from documents...
- Getting relevant chunks...
- Querying llm...
- Response: {'reasoning': "The jury verdict forms do not explicitly identify a final decision regarding the plaintiff's claim against the defendant. However, Verdict 4 asks for damages, which implies that the plaintiffs have been found liable by the jury. This shows that the jury ruled in favor of the plaintiffs because they were awarded damages.", 'category': 'plaintiff'}


{'reasoning': "The jury verdict forms do not explicitly identify a final decision regarding the plaintiff's claim against the defendant. However, Verdict 4 asks for damages, which implies that the plaintiffs have been found liable by the jury. This shows that the jury ruled in favor of the plaintiffs because they were awarded damages.",
 'category': 'plaintiff'}

In [133]:
prompt = """
        You are an expert legal analyst. You will be given a sequence of indices and legal summaries from legal documents relating to a case in the United States. Documents are separated by ||. All documents correspond to the same case. 
        Based on the summaries, determine which documents may contain the outcome of the case.
        Relevant documents may include decision, verdict, opinion, sentencing, and trial documents. 
        Respond with a JSON object in the form {"reasoning": "...", "indices": ...}
        reasoning should be in the format "The summary of document _ contains _, so document _ is likely to contain the outcome of the case because _"
        indices should be a list of the indices of relevant documents. Do not include anything other than the relevant indices in indices. 
        """

In [134]:
docs = classifier.metadata.get_docket_report_contents()

In [135]:
s = ""
for i, doc in enumerate(docs):
    s += (f"{i}: {doc}||")

In [136]:
s

"0: DECISION + ORDER ON MOTION (Motion #004)||1: AFFIDAVIT OR AFFIRMATION IN OPPOSITION TO CROSS-MOTION AND IN FURTHER SUPPORT OF MOTION (Motion #001)||2: EXHIBIT(S) - 1 (Motion #004) Exhibit 1 - First Midwest South Willowbrook Statements||3: AFFIDAVIT OR AFFIRMATION IN OPPOSITION TO MOTION (Motion #004) Affirmation in Opposition||4: EXTRACT Trial Extract||5: OTHER COURT FILED DOCUMENT Exhibit Sheet||6: VERDICT SHEET||7: JUROR NOTE(S)||8: JUROR NOTE(S)||9: EXHIBIT(S) Court Exhibit II / Statement of Claim||10: MARKED PLEADINGS Court Exhibit I||11: ADMISSION OF SERVICE (Motion #004)||12: AFFIDAVIT OR AFFIRMATION IN SUPPORT OF MOTION (Motion #004)||13: NOTICE OF MOTION (Motion #004)||14: TRIAL DOCUMENTS Plaintiffs' Proposed Verdict Sheets||15: TRIAL DOCUMENTS Plaintiffs' Proposed Jury Instructions||16: TRIAL DOCUMENTS Plaintiffs' Proposed Verdict Sheets||17: TRIAL DOCUMENTS Plaintiffs' Proposed Verdict Sheets||18: TRIAL DOCUMENTS Plaintiffs' Proposed Verdict Sheets||19: TRIAL DOCUMENTS Pl

In [138]:
import ollama

In [139]:
response = ollama.generate(
            model="llama3.1",
            prompt=s,
            system=prompt,
            format="json",
        )

KeyboardInterrupt: 

In [5]:
df = pd.read_csv("federal_trial_docs.csv")

In [11]:
from pathlib import Path

In [14]:
Path(df.path[0]).parents[2]

PosixPath('100_random_fed/Texas_Eastern_District_Court')

In [22]:
paths = [str(Path(p).parents[1])+"/metadata.json" for p in df.path]

In [32]:
log = pd.DataFrame([classifier.log])

In [41]:
for path in paths[5:]:
    m = CaseMetadata.from_metadata_path(path)
    if m.categorize_trial_type() == "jury":
        classifier = JuryRulingClassifier(path)
        classifier.extract()
        log.loc[len(log)] = classifier.log
    if m.categorize_trial_type() == "bench":
        classifier = BenchRulingClassifier(path)
        classifier.extract()
        log.loc[len(log)] = classifier.log

Extracting from metadata...
- Getting relevant chunks...
- Response: ({'category': 'undetermined'}, 'No relevant docket_report entries')
Extracting from documents...
- Getting relevant chunks...
- Response: ({'category': 'undetermined'}, 'No relevant documents')
Extracting from metadata...
- Getting relevant chunks...


Number of requested results 8 is greater than number of elements in index 7, updating n_results = 7


- Querying llm...
- Response: {'reasoning': "According to the documents, Defendant Lockheed Martin Corporation's Motion for Summary Judgment on Plaintiff's Claims of Infringement was granted by the court on December 23, 2013. The only remaining claims are the Counter Claims asserted by Lockheed Martin Corporation. It appears that Counter Claimant intends to file a Motion to Dismiss the Counter Claims, without prejudice. No further information is provided regarding the outcome of this trial.", 'category': 'defendant'}
Extracting from metadata...
- Getting relevant chunks...


Number of requested results 8 is greater than number of elements in index 7, updating n_results = 7


- Querying llm...
- Response: {'reasoning': 'The documents describe various filings and orders related to the trial, including a Notice of Lodging for a Proposed Pretrial Conference Order and a Joint Stipulation to Continue Trial. However, none of these documents identify the result of the jury trial.', 'category': 'undetermined'}
Extracting from documents...
- Getting relevant chunks...
- Response: ({'category': 'undetermined'}, 'No relevant documents')
Extracting from metadata...
- Getting relevant chunks...


Number of requested results 8 is greater than number of elements in index 6, updating n_results = 6


- Querying llm...
- Response: {'reasoning': 'According to the documents, summary judgment was awarded in favor of Plaintiff Booker and against Defendants Dunlop and Goodyear in the counterclaim in 04-1159. This case was referred to Mag Judge Gorman for further pretrial administration. The documents do not explicitly state the outcome of the trial involving Trish Lee McCloud and Goodyear Dunlop Tires North America Ltd, but they imply that the trial took place and exhibits were filed.', 'category': 'undetermined'}
Extracting from documents...
- Getting relevant chunks...


Number of requested results 8 is greater than number of elements in index 4, updating n_results = 4


- Querying llm...
- Response: {'reasoning': "The case was settled for a confidential amount, which was approved by the court. The approval included attorney's fees of 40% of the settlement amount plus expenses. This indicates that the jury verdict in favor of one party was reached and a monetary award was made.", 'category': 'undetermined'}


In [55]:
log.to_csv("fed_test_log.csv", index=False)

In [45]:
print(df.text[6])

Case 2:06-cv-03923-BRO-FFM   Document 311   Filed 01/23/14   Page 1 of 3   Page ID #:5547
 
 
UNITED STATES DISTRICT COURT 
CENTRAL DISTRICT OF CALIFORNIA 
 
Case No.  CV-06-3923 BRO (FFMx) 
 
JUDGMENT 
 
Judge:  Hon. Beverly Reid O’Connell 
 
Defendant. 
ALEKSANDR L. YUFA, 
 
 
 
Plaintiff, 
 
v.   
 
LOCKHEED MARTIN 
CORPORATION, 
  
LOCKHEED MARTIN 
CORPORATION, 
 
 
Counterclaim-Plaintiff, 
 
v. 
ALEKSANDR L. YUFA, 
 
      Counterclaim-Defendant. 
 
 
 
 
   
 
   
Case No. CV-06-3923 BRO (FFMx) 
 
  
1 
2 
3 
4 
5 
6 
7 
8 
9 
10 
11 
12 
13 
14 
15 
16 
17 
18 
19 
20 
21 
22 
23 
24 
25 
26 
27 
28 

Case 2:06-cv-03923-BRO-FFM   Document 311   Filed 01/23/14   Page 2 of 3   Page ID #:5548
 
Before the Court is Defendant Lockheed Martin Corporation’s (“Lockheed 
 
Martin”) Request for Entry of Judgment.  On December 23, 2013, this Court 
granted Lockheed Martin’s motion for summary judgment of non-infringement 
against all of Plaintiff Aleksandr L. Yufa’s claims of infringement

In [58]:
df = pd.read_csv("fed_test_log.csv")

In [69]:
df

Unnamed: 0,system_prompt,metadata_path,title,metadata_response,metadata_response_json,metadata_context,document_response,document_response_json,document_context,category
0,\n You are an expert legal analyst. You...,100_random_fed/Texas_Eastern_District_Court/2-...,The Chamberlain Group LLC v. Overhead Door Cor...,"{'model': 'mistral', 'created_at': '2024-07-24...",{'reasoning': 'The documents describe various ...,"REDACTION to 583 Sealed Response to Motion, De...","{'model': 'mistral', 'created_at': '2024-07-24...","{'reasoning': 'According to the documents, the...","was awarded only as to the ’404 patent. (See, ...",plaintiff
1,\n You are an expert legal analyst. You...,100_random_fed/Wisconsin_Western_District_Cour...,"Haley, Mary et al v. Kolbe and Kolbe Millwork ...","{'model': 'mistral', 'created_at': '2024-07-24...",{'reasoning': 'The documents describe various ...,Final Pretrial Conference Report and Rule 26(a...,"{'model': 'mistral', 'created_at': '2024-07-24...",{'reasoning': 'The documents describe a motion...,not been entered yet in this case. Although I...,undetermined
2,\n You are an expert legal analyst. You...,100_random_fed/California_Northern_District_Co...,"Sumotext Corp. -v- Zoove, Inc., et al","{'model': 'mistral', 'created_at': '2024-07-24...","{'reasoning': 'According to the documents, a J...",MOTION Relief from Nondispositive Pretrial Ord...,"{'model': 'mistral', 'created_at': '2024-07-24...","{'reasoning': ""According to the documents, the...",IV. MITIGATION TO DAMAGES - APPLECABLE TO CLAI...,undetermined
3,\n You are an expert legal analyst. You...,100_random_fed/New_Jersey_District_Court/1--16...,KUHAR et al v. PETZL COMPANY et al,,,,,,,undetermined
4,\n You are an expert legal analyst. You...,100_random_fed/New_York_Southern_District_Cour...,The Estate of Mauricio Jaquez v. The City of N...,"{'model': 'mistral', 'created_at': '2024-07-24...",{'reasoning': 'The documents describe the sche...,. Plaintiffs' opposition shall be filed not la...,,,,undetermined
5,\n You are an expert legal analyst. You...,100_random_fed/North_Carolina_Western_District...,Poole v. Gaston County et al,,,,,,,undetermined
6,\n You are an expert legal analyst. You...,100_random_fed/California_Central_District_Cou...,Aleksandr L Yufa v. Lockheed Martin Corporatio...,"{'model': 'mistral', 'created_at': '2024-07-24...","{'reasoning': ""According to the documents, Def...",MINUTE ORDER IN CHAMBERS by Judge Beverly Reid...,,,,defendant
7,\n You are an expert legal analyst. You...,100_random_fed/California_Central_District_Cou...,Bennion and Deville Fine Homes Inc et al v. Wi...,"{'model': 'mistral', 'created_at': '2024-07-24...",{'reasoning': 'The documents describe various ...,JOINT REPORT Rule 26(f) Discovery Plan ; esti...,,,,undetermined
8,\n You are an expert legal analyst. You...,100_random_fed/Illinois_Central_District_Court...,"McCloud v. Goodyear Dunlop Tire, et al","{'model': 'mistral', 'created_at': '2024-07-24...","{'reasoning': 'According to the documents, sum...",Proposed Jury Instructions by Goodyear Tire & ...,"{'model': 'mistral', 'created_at': '2024-07-24...","{'reasoning': ""The case was settled for a conf...",the meaning of Section 2(c) of the Illinois Jo...,undetermined


In [77]:
df.loc[8, "metadata_response_json"]

"{'reasoning': 'According to the documents, summary judgment was awarded in favor of Plaintiff Booker and against Defendants Dunlop and Goodyear in the counterclaim in 04-1159. This case was referred to Mag Judge Gorman for further pretrial administration. The documents do not explicitly state the outcome of the trial involving Trish Lee McCloud and Goodyear Dunlop Tires North America Ltd, but they imply that the trial took place and exhibits were filed.', 'category': 'undetermined'}"

In [73]:
m = CaseMetadata.from_metadata_path(df.loc[8, "metadata_path"])

In [75]:
m.get_docket_report_contents()

['CERTIFICATE OF SERVICE by Trish Lee McCloud re 482 Receipt and Lien Release (James, Randy) (Entered: 07/08/2011)',
 'Receipt for Lien Payment & Release. (James, Randy) (Entered: 07/07/2011)',
 " ORDER & OPINION entered by Judge Joe Billy McDade on 4/18/11. Respondent's Objections to and Motion for Reconsideration of the Magistrates Order  479  is GRANTED in part and DENIED in part.  It is granted to the extent that the Court has here re considered portions of Magistrate Judge Gorman's Order, however all of its objections are DENIED.  The Order of Magistrate Judge Gorman is AFFIRMED, Petitioner's Petition to Adjudicate Liens  445  is GRANTED, and its Lien of $261,902.33 is to be paid in full.  IT IS SO ORDERED. SEE FULL ORDER. (FDT, ilcd)",
 " ORDER & OPINION entered by Judge Joe Billy McDade on 4/18/11. Respondent's Objections to and Motion for Reconsideration of the Magistrates Order  479  is GRANTED in part and DENIED in part.  It is granted to the extent that the Court has here re

In [79]:
doc = m.get_document_by_docket_report_title(" ORDER & OPINION entered by Judge Joe Billy McDade on 4/18/11. Respondent's Objections to and Motion for Reconsideration of the Magistrates Order  479  is GRANTED in part and DENIED in part.  It is granted to the extent that the Court has here re considered portions of Magistrate Judge Gorman's Order, however all of its objections are DENIED.  The Order of Magistrate Judge Gorman is AFFIRMED, Petitioner's Petition to Adjudicate Liens  445  is GRANTED, and its Lien of $261,902.33 is to be paid in full.  IT IS SO ORDERED. SEE FULL ORDER. (FDT, ilcd)")

In [85]:
text = list(doc.values())[0]

In [88]:
import regex as re

In [89]:
paragraphs = re.split(r'\n\s*\n', text.strip())

In [106]:
paragraphs[35]

'In addition, Respondent argues that she raised three other claims before \nMagistrate Judge Gorman, which he rejected without articulating his reasons for \ndoing so.  These claims include:  1) Petitioner’s claim should be reduced because its \ncharges for Ms. McCloud’s treatment were substantially higher than Petitioner \nwould have received for the same services provided to an insured patient, or if it \nhad timely sought reimbursement for all of its charges from Michigan Medicaid, \nbecause otherwise Petitioner would be unjustly enriched to the detriment of \nRespondent; 2) Petitioner’s lien claim should be denied because Petitioner has not '

In [68]:
print(df.iloc[8].document_context)

the meaning of Section 2(c) of the Illinois Joint Tortfeasor
Contribution Act.  740 ILCS 100/2(c).
It is Ordered that summary judgment is awarded in favor of
Plaintiff/Counterdefendant 
Booker 
and 
against
Defendants/Counterplaintiffs Dunlop and Goodyear on the latters’
counterclaim in Case No. 04-1159.  This case is referred back to
Magistrate Judge Gorman for further pretrial administration. 
ENTERED this   23rd    day of September, 2005.
          s/ Joe B. McDade         
JOE BILLY McDADE
 
United States District Judge 
3||that Michigan law governs this case.  Plaintiff, Trish McCloud, 
filed a Brief in Response [Doc. 224] on June 25, 2007.  At the 
final pretrial conference this Court ordered expedited briefing 
on this issue and Granted Defendants leave to file a Reply 
Brief.  Defendants filed their Reply Brief [Doc. 229] on July 2, 
2007.  For the following reasons, this Court holds that Illinois 
Law governs this case and Defendants Motion [Doc. 217] is 
DENIED. 
   
 
  
I. 

In [72]:
print(df["document_context"][2])

IV. MITIGATION TO DAMAGES - APPLECABLE TO CLAIM 1 AND CLAIM 2 (Hold
depending on evidence at trial)
If you answered Question 15, answer Question 16 below.
16
Did Defendants prove by a preponderance of evidence that Sumotext acted
unreasonably in failing to take specific steps to minimize or limit its losses?
Yes
No
(“Yes” is a finding for Defendants)
(“No” is a finding for Sumotext)
If you answered “Yes” to Question 16, then answer Question 17.
If you answered “No” to Question 16, skip Question 17.
 
17. What is the amount by which Sumotext’s loss should be reduced if sumotext had taken
those steps?
Please have the‘
esidingj
. or sign, date, and return this form.
 
Date:||United States District Court 


Case 5:16-cv-01370-BLF   Document 175   Filed 06/26/17   Page 15 of 20
 
1 
2 
3 
4 
5 
6 
7 
8 
9 
10 
11 
12 
13 
14 
15 
16 
17 
18 
19 
20 
21 
22 
23 
24 
25 
26 
27 
28 
Rule 12(b)(6) subject to factual testing by summary judgment or trial.”  Newcal Indus., Inc. v. Ikon 
Office 

In [51]:
df2 = pd.read_csv("state_metadata.csv")

In [53]:
df2[df2.trial_type != "unknown"]

Unnamed: 0.1,Unnamed: 0,court,title,docket,judges,judge,type,link,status,flags,nature_of_suit,cause,magistrate,metadata_path,trial,trial_type,trial_result
25,25,"New York State, Suffolk County, Supreme Court",MIKE THOMPSON et al v. MICHAEL DEFELICE,602235/2016,,,Torts - Other (Fraud),https://www.docketalarm.com/cases/New_York_Sta...,Active,,,,,100_random_sample/New_York_State_Suffolk_Count...,True,jury,plaintiff
37,37,California Northern District Court,In re Google RTB Consumer Privacy Litigation,4:21-cv-02155,['Judge Yvonne Gonzalez Rogers'],Judge Yvonne Gonzalez Rogers,,https://www.docketalarm.com/cases/California_N...,,"['ADRMOP', 'CONSOL', 'RELATE']",890 Statutory Actions - Other,28:1331 Fed. Question: Breach of Contract,Magistrate Judge Virginia K. DeMarchi,100_random_sample/California_Northern_District...,True,jury,undetermined
38,38,Delaware District Court,"Rockwell Automation, Inc. v. Parcop S.R.L.",1:21-cv-01238,['Judge Gregory B. Williams'],Judge Gregory B. Williams,,https://www.docketalarm.com/cases/Delaware_Dis...,,"['DISCOVERY-JLH', 'Multi-Media Docs']",840 Trademark,15:1114 Trademark Infringement,Judge Jennifer L. Hall,100_random_sample/Delaware_District_Court/1--2...,True,jury,undetermined
39,39,"Massachusetts State, Superior Court, Essex County","Mathieson, Cory vs. Essex County Sheriff's Dep...",1777CV00789,,,Administrative Civil Actions,https://www.docketalarm.com/cases/Massachusett...,Open,,,,,100_random_sample/Massachusetts_State_Superior...,True,jury,undetermined
45,45,Massachusetts District Court,"Students for Fair Admissions, Inc. v. Presiden...",1:14-cv-14176,['Judge Allison D. Burroughs'],Judge Allison D. Burroughs,,https://www.docketalarm.com/cases/Massachusett...,,,440 Civil Rights - Other,28:1331 Federal Question: Other Civil Rights,,100_random_sample/Massachusetts_District_Court...,True,jury,plaintiff
47,47,"Texas State, Cameron County, 103rd District Court","Javier Martinez Aguilar,Mirna Esthel Martinez ...",2019-DCL-03295,"['Janet Leal', 'Leonel Alejandro']",,Civil-Other Civil,https://www.docketalarm.com/cases/Texas_State_...,Disposed,,,,,100_random_sample/Texas_State_Cameron_County_1...,True,jury,undetermined
49,49,"Florida State, Broward County, County Court","State of Florida Vs Hornsby, Leonard L",16005544MM10A,"['Davis ZD, Michael']","Davis ZD, Michael",Traffic and Misdemeanor - Misdemeanor,https://www.docketalarm.com/cases/Florida_Stat...,Disposed,,,,,100_random_sample/Florida_State_Broward_County...,True,jury,undetermined
53,53,Washington Western District Court,"Microsoft Corporation v. Motorola Inc, et al",2:10-cv-01823,['Judge James L. Robart'],Judge James L. Robart,,https://www.docketalarm.com/cases/Washington_W...,,,190 Contract - Other,28:1332 Diversity,,100_random_sample/Washington_Western_District_...,True,jury,undetermined
68,68,Florida Southern District Court,"Singh v. Royal Caribbean Cruises Ltd., et al",1:20-cv-24987,['Judge Jose E. Martinez'],Judge Jose E. Martinez,,https://www.docketalarm.com/cases/Florida_Sout...,,"['CLOSED', 'JB', 'MEDIATION', 'REF_DISCOV']",340 Marine,28:1332 Diversity,Magistrate Judge Jacqueline Becerra,100_random_sample/Florida_Southern_District_Co...,True,jury,undetermined
74,74,"Texas State, Dallas County, 134th District Court",TUNA TWO THUMBS LP vs. GREENWICH CAPITAL FINAN...,DC-12-12312,['DALE TILLERY'],DALE TILLERY,CNTR CNSMR COM DEBT,https://www.docketalarm.com/cases/Texas_State_...,CLOSED,,,,,100_random_sample/Texas_State_Dallas_County_13...,True,jury,undetermined


### To do
1. Make it easier to visualize jury trials/extract jury trial results and do the same for other variables
2. Add reranking algorithm
3. Add helper function to manually label
4. Sometimes number of parties is way too long
5. response_json = json.loads(response["response"]) -- Check if json really is json

Reranking
- https://adasci.org/a-hands-on-guide-to-enhance-rag-with-re-ranking/
- https://techcommunity.microsoft.com/t5/microsoft-developer-community/doing-rag-vector-search-is-not-enough/ba-p/4161073
- https://community.openai.com/t/bad-formats-for-semantic-search-of-rag-implementing-internal-chatbot-for-troubleshooting-an-sdk/848715
- https://learn.microsoft.com/en-us/azure/search/index-similarity-and-scoring
- https://cohere.com/blog/rerank-3
- https://www.reddit.com/r/LocalLLaMA/comments/1d9h2pg/doing_rag_vector_search_is_not_enough/
- https://www.datacamp.com/tutorial/boost-llm-accuracy-retrieval-augmented-generation-rag-reranking
- https://python.langchain.com/v0.2/docs/integrations/retrievers/flashrank-reranker/

RAG
- https://ollama.com/blog/embedding-models
- https://huggingface.co/learn/nlp-course/chapter5/6
- https://docs.mistral.ai/guides/rag/
- https://docs.trychroma.com/guides

In [1]:
class A:
    def __init__(self, l=[]):
        self.l = l

In [2]:
a = A()
b = A()
b.l.append(4)
a.l

[4]

In [13]:
sl = [8]
from dataclasses import dataclass, field, asdict
@dataclass
class B:
    l: list = field(default_factory=lambda: [8])
c = B()
d = B()
c.l.append(4)
d.l

[8]

In [14]:
asdict(c)

{'l': [8, 4]}