# CourtListener

`o`: Case law opinion clusters with nested Opinion documents.

`r`: List of Federal cases (dockets) with up to three nested documents. If there are more than three matching documents, the more_docs field for the docket result will be true.

`rd`: Federal filing documents from PACER

`d`: Federal cases (dockets) from PACER

`p`: Judges

`r` is more useful when we are looking for like cases in general. `rd` includes all the documents individually like complaints and filing, meaning multiple documents possible for each case.

In [25]:
import pandas as pd
import json
import re

## Type O

In [27]:
# Read the JSONL file
data_o_path = 'courtlistener/search/o/2025-06-13_courtlistener_search_type-o_artificial-intelligence.jsonl'

with open(data_o_path, 'r') as file:
    data_o = [json.loads(line) for line in file]

In [28]:
# Transform the data into a DataFrame
dataset_o = pd.DataFrame(data_o)

dataset_o.head()

Unnamed: 0,absolute_url,attorney,caseName,caseNameFull,citation,citeCount,cluster_id,court,court_citation_string,court_id,...,panel_ids,panel_names,posture,procedural_history,scdb_id,sibling_ids,source,status,suitNature,syllabus
0,/opinion/6520938/noerr-v-lewistown-smelting-re...,"Albert Houck, for plaintiff., Houck & Barron, ...","Noerr v. Lewistown Smelting & Refining, Inc.","Noerr v. Lewistown Smelting & Refining, Inc.","[60 Pa. D. & C.2d 406, 1973 Pa. Dist. & Cnty. ...",0,6520938,"Pennsylvania Court of Common Pleas, Mifflin Co...",,pactcomplmiffli,...,[],[],,,,[6394599],U,Published,,
1,/opinion/7372834/sharp-v-stalker/,"Mr. Alan TL. Strong, for the complainants., Mr...",Sharp v. Stalker,Willoughby W. Sharp v. John Stalker et ux.,"[63 N.J. Eq. 596, 18 Dickinson 596, 52 A. 1120...",0,7372834,New York Court of Chancery,,nychanct,...,[],[],,,,[7291653],U,Published,,
2,/opinion/3438130/shutes-v-weeks/,"Stipp, Perry, Bannister & Starzinger and Putna...",Shutes v. Weeks,"Marguerite Shutes Et Al., Administrators, Appe...","[262 N.W. 518, 220 Iowa 616]",25,3438130,Supreme Court of Iowa,Iowa,iowa,...,[],[],Appeal from Polk District Court. &#8212; JOSEP...,,,[3436026],ZU,Published,,
3,/opinion/7983134/osten-v-jerome/,"Eldredge & Spier, for appellant., James G. Tuc...",Osten v. Jerome,Charles Osten v. Edwin Jerome,"[93 Mich. 196, 1892 Mich. LEXIS 964, 53 N.W. 7]",0,7983134,Michigan Supreme Court,Mich.,mich,...,[],[],,,,[7936034],U,Published,,
4,/opinion/6757835/alabama-v-acacia-mut-life-assn/,"Chilton & McCoy, of Montgomery, Ala., for the ...",Alabama v. Acacia Mut. Life Ass'n,STATE OF ALABAMA v. ACACIA MUT. LIFE ASS'N,[3 F.2d 697],0,6757835,"District Court, M.D. Alabama",M.D. Ala.,almd,...,[],[],,,,[6640454],U,Published,,


In [29]:
print(f'Fields: {list(dataset_o.columns)}')
dataset_o.info()

Fields: ['absolute_url', 'attorney', 'caseName', 'caseNameFull', 'citation', 'citeCount', 'cluster_id', 'court', 'court_citation_string', 'court_id', 'dateArgued', 'dateFiled', 'dateReargued', 'dateReargumentDenied', 'docketNumber', 'docket_id', 'judge', 'lexisCite', 'meta', 'neutralCite', 'non_participating_judge_ids', 'opinions', 'panel_ids', 'panel_names', 'posture', 'procedural_history', 'scdb_id', 'sibling_ids', 'source', 'status', 'suitNature', 'syllabus']
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6913 entries, 0 to 6912
Data columns (total 32 columns):
 #   Column                       Non-Null Count  Dtype 
---  ------                       --------------  ----- 
 0   absolute_url                 6913 non-null   object
 1   attorney                     6913 non-null   object
 2   caseName                     6913 non-null   object
 3   caseNameFull                 6913 non-null   object
 4   citation                     6913 non-null   object
 5   citeCount             

In [None]:
# Check for missing values in the dataset, especially 'caseNameFull'
print("Missing values in each column:")

dataset_o.isna().sum()

Missing values in each column:


absolute_url                      0
attorney                          0
caseName                          0
caseNameFull                      0
citation                          0
citeCount                         0
cluster_id                        0
court                             0
court_citation_string             0
court_id                          0
dateArgued                     6799
dateFiled                         0
dateReargued                   6909
dateReargumentDenied           6828
docketNumber                     99
docket_id                         0
judge                             0
lexisCite                         0
meta                              0
neutralCite                       0
non_participating_judge_ids       0
opinions                          0
panel_ids                         0
panel_names                       0
posture                           0
procedural_history                0
scdb_id                           0
sibling_ids                 

In [30]:
# pattern catches ai or artificial intelligence or arificial-intelligence
pattern = r'\b(ai|artificial[\s\-]?intelligence)\b'

ai_cases_o = dataset_o[dataset_o['caseName'].str.contains(pattern, case=False, na=False, regex=True)]


  ai_cases_o = dataset_o[dataset_o['caseName'].str.contains(pattern, case=False, na=False, regex=True)]


In [31]:
ai_cases_o

Unnamed: 0,absolute_url,attorney,caseName,caseNameFull,citation,citeCount,cluster_id,court,court_citation_string,court_id,...,panel_ids,panel_names,posture,procedural_history,scdb_id,sibling_ids,source,status,suitNature,syllabus
1455,/opinion/9390132/percipientai-inc-v-united-sta...,,"percipient.ai, Inc. v. United States",,[],0,9390132,United States Court of Federal Claims,Fed. Cl.,uscfc,...,[],[],,,,[9385608],C,Published,,REPORTED OPINION. Signed by Senior Judge Eric ...
1503,/opinion/10369169/michael-b-garner-v-authentic...,,Michael B. Garner v. Authenticity.AI Investors...,,[],0,10369169,Court of Chancery of Delaware,Del. Ch.,delch,...,[],[],,,,[10835757],C,Published,,
1504,/opinion/10370561/michael-b-garner-v-authentic...,,Michael B. Garner v. Authenticity.AI Investors...,,[],0,10370561,Court of Chancery of Delaware,Del. Ch.,delch,...,[],[],,,,[10837149],C,Published,,
1920,/opinion/10601317/in-re-creation-of-arkansas-s...,,In Re Creation of Arkansas Supreme Court Admin...,,[2025 Ark. 117],0,10601317,Supreme Court of Arkansas,Ark.,ark,...,[],[],,,,[11067905],C,Published,,
1921,/opinion/4757780/electronic-privacy-informatio...,,Electronic Privacy Information Center v. Natio...,,[],0,4757780,"District Court, District of Columbia",D.D.C.,dcd,...,[],[],,,,[4538127],C,Published,Civil,
1922,/opinion/4683687/electronic-privacy-informatio...,,Electronic Privacy Information Center v. Natio...,,[],0,4683687,"District Court, District of Columbia",D.D.C.,dcd,...,[],[],,,,[4460940],C,Published,Civil,
1923,/opinion/1933074/artificial-intelligence-corp-...,"John W. Cutchin and Thomas R. Darton, Duckor, ...",Artificial Intelligence Corp. v. Casey (In Re ...,"In Re Thomas F. CASEY, Debtor. ARTIFICIAL INTE...","[193 B.R. 942, 1996 WL 137293]",3,1933074,"United States Bankruptcy Court, S.D. California",Bankr. S.D. Cal.,casb,...,[],[],,,,[1933074],LU,Published,,
1924,/opinion/1925615/artificial-intelligence-corp-...,"Roy L. Carlson, Jr., Milberg & De Phillips, P....",Artificial Intelligence Corp. v. Casey (In Re ...,"In Re Thomas F. CASEY, Debtor. ARTIFICIAL INTE...","[198 B.R. 918, 1996 Bankr. LEXIS 919, 1996 WL ...",6,1925615,"United States Bankruptcy Court, S.D. California",Bankr. S.D. Cal.,casb,...,[],[],,,,[1925615],LU,Published,,
3352,/opinion/7321124/loop-ai-labs-inc-v-gatti/,"Bryan Jacob Wolin, Diana Wong, Valeria Calafio...",Loop AI Labs Inc. v. Gatti,LOOP AI LABS INC. v. Anna GATTI,"[195 F. Supp. 3d 1107, 2016 U.S. Dist. LEXIS 8...",6,7321124,"District Court, N.D. California",N.D. Cal.,cand,...,[],[],,,,[7239034],U,Published,,
4859,/opinion/9513271/percipientai-inc-v-united-sta...,,"percipient.ai, Inc. v. United States",,[],0,9513271,Court of Appeals for the Federal Circuit,Fed. Cir.,cafc,...,[],[],,,,[9979884],C,Published,,


## Type R

### Thoughts on Filtering

1. 'Artificial Intelligence' or 'AI' in `caseName`.
    - `case_name_full` is often empty.
2. Set threshold for `meta.score.bm25` score.

Example

```
{
            "assignedTo": "Richard Mark Gergel",
            "assigned_to_id": 1175,
            "attorney": [
                "Robert Samuel Pimentel"
            ],
            "attorney_id": [
                10099419
            ],
            "caseName": "Pimentel v. Artificial Intelligence",
            "case_name_full": "",
            "cause": "42:1983 Civil Rights Act",
            "chapter": null,
            "court": "District Court, D. South Carolina",
            "court_citation_string": "D.S.C.",
            "court_id": "scd",
            "dateArgued": null,
            "dateFiled": "2025-02-03",
            "dateTerminated": "2025-05-06",
            "docketNumber": "9:25-cv-00642",
            "docket_absolute_url": "/docket/69610810/pimentel-v-artificial-intelligence/",
            "docket_id": 69610810,
            "firm": [
                "Sacdc"
            ],
            "firm_id": [
                1025391
            ],
            "jurisdictionType": "Federal Question",
            "juryDemand": "None",
            "meta": {
                "timestamp": "2025-06-07T11:07:43.820822Z",
                "date_created": "2025-02-04T21:54:52.399894Z",
                "score": {
                    "bm25": 418.066
                },
                "more_docs": true
            },
            "pacer_case_id": "299683",
            "party": [
                "Robert  Samuel Pimentel",
                "Artificial Intelligence"
            ],
            "party_id": [
                15046987,
                15046988
            ],
            "recap_documents": [
                {
                    "absolute_url": "/docket/69610810/1/pimentel-v-artificial-intelligence/",
                    "attachment_number": null,
                    "cites": [],
                    "description": "COMPLAINT against Artificial Intelligence, filed by Robert Samuel Pimentel. (Attachments: # 1 Envelope) (agaz, ) (Entered: 02/04/2025)",
                    "docket_entry_id": 415568151,
                    "document_number": 1,
                    "document_type": "PACER Document",
                    "entry_date_filed": "2025-02-04",
                    "entry_number": 1,
                    "filepath_local": null,
                    "id": 429272892,
                    "is_available": false,
                    "meta": {
                        "timestamp": "2025-06-07T11:07:43.817751Z",
                        "date_created": "2025-02-04T21:54:52.646527Z"
                    },
                    "pacer_doc_id": "163014517443",
                    "page_count": null,
                    "short_description": "Complaint",
                    "snippet": ""
                },
                {
                    "absolute_url": "/docket/69610810/1/1/pimentel-v-artificial-intelligence/",
                    "attachment_number": 1,
                    "cites": [],
                    "description": "COMPLAINT against Artificial Intelligence, filed by Robert Samuel Pimentel. (Attachments: # 1 Envelope) (agaz, ) (Entered: 02/04/2025)",
                    "docket_entry_id": 415568151,
                    "document_number": 1,
                    "document_type": "Attachment",
                    "entry_date_filed": "2025-02-04",
                    "entry_number": 1,
                    "filepath_local": null,
                    "id": 434752140,
                    "is_available": false,
                    "meta": {
                        "timestamp": "2025-06-07T11:07:43.817751Z",
                        "date_created": "2025-03-28T22:13:03.030584Z"
                    },
                    "pacer_doc_id": "163014517444",
                    "page_count": 2,
                    "short_description": "Envelope",
                    "snippet": ""
                },
                {
                    "absolute_url": "/docket/69610810/12/pimentel-v-artificial-intelligence/",
                    "attachment_number": null,
                    "cites": [
                        109343,
                        791149
                    ],
                    "description": " ORDER RULING ON REPORT AND RECOMMENDATION: The Court adopts the R & R of theMagistrate Judge (Dkt. No. 9) as the order of the Court and dismisses this action without prejudice.  Signed by Honorable Richard M Gergel on 5/6/2025.  (agaz, )",
                    "docket_entry_id": 424542793,
                    "document_number": 12,
                    "document_type": "PACER Document",
                    "entry_date_filed": "2025-05-06",
                    "entry_number": 12,
                    "filepath_local": "recap/gov.uscourts.scd.299683/gov.uscourts.scd.299683.12.0.pdf",
                    "id": 438650779,
                    "is_available": true,
                    "meta": {
                        "timestamp": "2025-06-07T11:07:43.817751Z",
                        "date_created": "2025-05-06T17:25:33.182669Z"
                    },
                    "pacer_doc_id": "163014718645",
                    "page_count": 3,
                    "short_description": "Order Ruling on Report and Recommendation",
                    "snippet": "        9:25-cv-00642-RMG            Date Filed 05/06/25       Entry Number 12          Page 1 of 3\n\n\n\n\n                             IN THE UNITED STATES DISTRICT COURT\n                                 DISTRICT OF SOUTH CAROLINA\n                                     CHARLESTON DIVISION\n\n       Robert Samuel Pimentel,                              Case No. 9:25-cv-00642-RMG-MHC\n\n                       Plaintiff,\n               v.\n                                                            ORDER\n   "
                }
            ],
            "referredTo": "Molly H. Cherry",
            "referred_to_id": 14478,
            "suitNature": "555 Prison:Prison Condition",
            "trustee_str": null
        },
```

In [19]:
data_r_path = 'courtlistener/search/r/2025-06-17_courtlistener_search_type-r_artificial-intelligence.jsonl'

with open(data_r_path, 'r') as file:
    data_r = [json.loads(line) for line in file]

In [20]:
dataset_r = pd.DataFrame(data_r)
dataset_r.head()

Unnamed: 0,assignedTo,assigned_to_id,attorney,attorney_id,caseName,case_name_full,cause,chapter,court,court_citation_string,...,juryDemand,meta,pacer_case_id,party,party_id,recap_documents,referredTo,referred_to_id,suitNature,trustee_str
0,Elizabeth Anne Kovachevich,1816.0,[],[],Mackinder v. Commissioner of Social Security,,42:405 Review of HHS Decision (SSID),,"District Court, M.D. Florida",M.D. Fla.,...,,"{'timestamp': '2025-03-03T13:49:27.559618Z', '...",266226,"[Mackinder, Commissioner of Social Security]",[],[{'absolute_url': '/docket/5159680/17/mackinde...,Elizabeth A. Jenkins,9146.0,Social Security: SSID Title XVI,
1,William W. Caldwell,510.0,"[Timothy J. Nieman, Ian M. Comisky, Stephen Mo...","[1033089, 1116487, 1425626, 415673, 415674]",Amos v. Franklin Financial Services Corporation,,18:1961 Racketeering (RICO) Act,,"District Court, M.D. Pennsylvania",M.D. Penn.,...,Plaintiff,"{'timestamp': '2025-03-03T07:37:15.814148Z', '...",81245,"[Carole A. Fowler, Ronald L. Prough, Robert A....","[903102, 903103, 903104, 903105, 903106, 90310...",[{'absolute_url': '/docket/4373292/53/amos-v-f...,,,470 Racketeer/Corrupt Organization,
2,Vernon Speede Broderick,401.0,"[Wojciech Jackowski, Samuel Fawkner Abernethy,...","[9727619, 504590, 1399063, 1399064, 1399065, 1...",U.S Commodity Futures Trading Commission v. By...,,15:78m(a) Securities Exchange Act,,"District Court, S.D. New York",S.D.N.Y.,...,Both,"{'timestamp': '2025-03-03T07:23:55.060282Z', '...",408103,"[Christopher Curtin, The New York Mercantile E...","[775680, 775681, 775682, 657146, 775679]",[{'absolute_url': '/docket/4351827/142/22/us-c...,Sarah Netburn,9448.0,Securities/Commodities,
3,Thomas James McAvoy,2098.0,"[Daniel J. Hurteau, April J. Tabor, Robert H. ...","[334720, 334721, 334722, 334723, 334724, 33472...",Unger v. Albany Medical Center,,15:1 Antitrust Litigation,,"District Court, N.D. New York",N.D.N.Y.,...,Defendant,"{'timestamp': '2025-03-03T07:07:00.485760Z', '...",64134,"[Ellis Hospital, Catholic Health East, Ascensi...","[514313, 683150, 683151, 683152, 683153, 68315...",[{'absolute_url': '/docket/4326250/114/unger-v...,David R Homer,9934.0,410 Anti-Trust,
4,Aleta Arthur Trauger,3253.0,"[Heather Marie Gwinn, Amorette Rinkleib, Pamel...","[9027330, 6706563, 6472399, 6793298, 6959961]","Stewart v. Healthcare Revenue Recovery Group, LLC",,15:1692 Fair Debt Collection Act,,"District Court, M.D. Tennessee",M.D. Tenn.,...,Both,"{'timestamp': '2025-03-25T04:14:08.289914Z', '...",83582,"[Healthcare Revenue Recovery Group, LLC, Angel...","[9819067, 9819068]",[{'absolute_url': '/docket/17429136/55/1/stewa...,,,480 Consumer Credit,


In [21]:
dataset_r.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14442 entries, 0 to 14441
Data columns (total 30 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   assignedTo             12344 non-null  object 
 1   assigned_to_id         10861 non-null  float64
 2   attorney               14442 non-null  object 
 3   attorney_id            14442 non-null  object 
 4   caseName               14442 non-null  object 
 5   case_name_full         14442 non-null  object 
 6   cause                  14442 non-null  object 
 7   chapter                275 non-null    object 
 8   court                  14442 non-null  object 
 9   court_citation_string  14442 non-null  object 
 10  court_id               14442 non-null  object 
 11  dateArgued             248 non-null    object 
 12  dateFiled              14339 non-null  object 
 13  dateTerminated         9591 non-null   object 
 14  docketNumber           14442 non-null  object 
 15  do

In [22]:
print("Missing values in each column:")

dataset_r.isna().sum()

Missing values in each column:


assignedTo                2098
assigned_to_id            3581
attorney                     0
attorney_id                  0
caseName                     0
case_name_full               0
cause                        0
chapter                  14167
court                        0
court_citation_string        0
court_id                     0
dateArgued               14194
dateFiled                  103
dateTerminated            4851
docketNumber                 0
docket_absolute_url          0
docket_id                    0
firm                         0
firm_id                      0
jurisdictionType             0
juryDemand                   0
meta                         0
pacer_case_id                0
party                        0
party_id                     0
recap_documents              0
referredTo                9430
referred_to_id           10581
suitNature                   0
trustee_str              14167
dtype: int64

In [32]:
# pattern catches ai or artificial intelligence or arificial-intelligence
pattern = r'\b(ai|artificial[\s\-]?intelligence)\b'

ai_cases_r = dataset_r[dataset_r['caseName'].str.contains(pattern, case=False, na=False, regex=True)]

  ai_cases_r = dataset_r[dataset_r['caseName'].str.contains(pattern, case=False, na=False, regex=True)]


In [33]:
ai_cases_r

Unnamed: 0,assignedTo,assigned_to_id,attorney,attorney_id,caseName,case_name_full,cause,chapter,court,court_citation_string,...,juryDemand,meta,pacer_case_id,party,party_id,recap_documents,referredTo,referred_to_id,suitNature,trustee_str
261,Gregory B. Williams,,[],[],"Guardant Health, Inc. v. Tempus AI, Inc.",,,,"District Court, D. Delaware",D. Del.,...,,"{'timestamp': '2025-05-16T04:33:26.359954Z', '...",88002,"[Guardant Health, Inc., Tempus AI, Inc.]",[],[{'absolute_url': '/docket/69560770/11/guardan...,,,,
383,Sharon Johnson Coleman,676.0,"[Amy Senia, Rachel S Morse, Blair Ann Harringt...","[8109589, 8109590, 8054327, 7654051, 9842851, ...","In Re: Clearview AI, Inc., Consumer Privacy Li...",,28:1331 Federal Question,,"District Court, N.D. Illinois",N.D. Ill.,...,Defendant,"{'timestamp': '2025-05-17T04:40:35.790286Z', '...",395030,"[Clearview AI, Inc., Professor Jane Bambauer,...","[11483904, 11483905, 11483906, 11483907, 11483...",[{'absolute_url': '/docket/29102457/347/in-re-...,,,890 Other Statutory Actions,
387,Katherine Polk Failla,1025.0,"[Paul T Cappuccio, Eugene Y. Mar, Cameron J Gi...","[9933696, 9956192, 9956193, 10319511, 9894026,...","Dow Jones & Company, Inc. v. Perplexity AI, Inc.",,17:101 Copyright Infringement,,"District Court, S.D. New York",S.D.N.Y.,...,Plaintiff,"{'timestamp': '2025-03-03T00:49:14.519852Z', '...",630270,"[NYP Holdings, Inc., Perplexity AI, Inc., Dow ...","[14703404, 14703405, 14703406]",[{'absolute_url': '/docket/69280523/49/dow-jon...,,,820 Copyright,
719,Jinsook Ohta,,"[Jordan R. Jaffe, Andrew Jonathan Bramhall, We...","[10186346, 10244139, 10186347, 10186348, 10186...","Tempus AI, Inc. v. Guardant Health, Inc.",,35:0271 Patent Infringement,,"District Court, S.D. California",S.D. Cal.,...,Plaintiff,"{'timestamp': '2025-03-17T20:01:19.585995Z', '...",807966,"[Tempus AI, Inc., Guardant Health, Inc.]","[15190001, 15190002]",[{'absolute_url': '/docket/69746130/1/tempus-a...,Michelle M. Pettit,,830 Patent,
1003,Haywood Stirling Gilliam Jr.,1206.0,"[James Francis Regan, Thomas Edward Wallerstei...","[95893, 95894, 95895, 95896, 95897, 95898, 958...","Loop AI Labs, Inc. v. Gatti",,18:1964 Racketeering (RICO) Act,,"District Court, N.D. California",N.D. Cal.,...,Both,"{'timestamp': '2025-03-03T05:40:01.102899Z', '...",284971,"[IQSystem, Inc., Loop AI Labs Inc, Almawave US...","[4250464, 4250461, 4250462, 166262, 166263, 16...",[{'absolute_url': '/docket/4181608/466/loop-ai...,Donna M. Ryu,9093.0,470 Racketeer/Corrupt Organization,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13350,Margaret M. Garnett,,"[Leslie A. Demers, Bijal V Vakil, William K. W...","[10210664, 10210665, 10416176, 10171014]","Palantir Technologies Inc. v. Guardian AI, Inc.",,18:1836(a) Injunction against Misappropriation...,,"District Court, S.D. New York",S.D.N.Y.,...,,"{'timestamp': '2025-06-17T02:39:28.759996Z', '...",638340,"[Pranav Pillai, Does 1-10, Palantir Technologi...","[15166440, 15166441, 15166442, 15166443, 15166...",[{'absolute_url': '/docket/69721890/1/palantir...,,,880 Defend Trade Secrets Act (of 2016),
13473,Colleen McMahon,2174.0,"[David Berger, Scott Drury, Joel Laurence Kurt...","[6506386, 6477415, 6370730, 6309266, 6309267, ...","John v. Clearview AI, Inc.",,28:1332pd Diversity-Property Damage,,"District Court, S.D. New York",S.D.N.Y.,...,Plaintiff,"{'timestamp': '2025-03-08T05:20:35.545848Z', '...",536637,"[Rosemary Arias, Clearview AI, Inc., Ryan Balf...","[9534645, 9534646, 9534647, 9534648, 9534649, ...",[{'absolute_url': '/docket/17126996/1/john-v-c...,,,380 Personal Property: Other,
13481,Margaret M. Garnett,,"[Leslie A. Demers, Bijal V Vakil, William K. W...","[10210664, 10210665, 10416176, 10171014]","Palantir Technologies Inc. v. Guardian AI, Inc.",,18:1836(a) Injunction against Misappropriation...,,"District Court, S.D. New York",S.D.N.Y.,...,,"{'timestamp': '2025-06-17T02:39:28.759996Z', '...",638340,"[Pranav Pillai, Does 1-10, Palantir Technologi...","[15166440, 15166441, 15166442, 15166443, 15166...",[{'absolute_url': '/docket/69721890/1/palantir...,,,880 Defend Trade Secrets Act (of 2016),
13580,Sharon Johnson Coleman,676.0,"[David P. Saunders, Kevin Michael Forde, Zacha...","[6412562, 6412563, 6412564, 6412565, 6412566, ...","Thornley v. Clearview AI, Inc.",,28:1332 Diversity-Petition for Removal,,"District Court, N.D. Illinois",N.D. Ill.,...,Both,"{'timestamp': '2025-03-18T08:18:59.383965Z', '...",377338,"[Deborah Benjamin-Koller, Josue Herrera, Melis...","[9736032, 9736029, 9736030, 9736031]",[{'absolute_url': '/docket/17335257/37/thornle...,,,360 P.I.: Other,


In [40]:
def get_bm25_score(row):
    """
    Extracts the BM25 score from the metadata of a row.
    Returns 0 if the score is not available.
    """
    return row.get('meta', {}).get('score', {}).get('bm25', 0)


all_bm25_scores = ai_cases_r.apply(get_bm25_score, axis=1)


In [43]:
print(f'Minimum BM25 score: {all_bm25_scores.min()}')
print(f'Maximum BM25 score: {all_bm25_scores.max()}')
print(f'Mean BM25 score: {all_bm25_scores.mean()}')

Minimum BM25 score: 3.8937874
Maximum BM25 score: 417.8391
Mean BM25 score: 59.49202940769231
