In [None]:
import requests
import pandas as pd

In [None]:
# Create court DataFrame from CSV
columns = ['court_id', 'resource_uri', 'pacer_court_id', 'pacer_has_rss_feed',
       'pacer_rss_entry_types', 'date_last_pacer_contact', 'fjc_court_id',
       'date_modified', 'in_use', 'has_opinion_scraper',
       'has_oral_argument_scraper', 'position', 'citation_string',
       'short_name', 'full_name', 'url', 'start_date', 'end_date',
       'jurisdiction', 'parent_court', 'appeals_to']
nycourts_df = pd.read_csv("nycourts.csv")

ny_court_list = []
for court in nycourts_df:
  ny_court_list.append(nycourts_df[nycourts_df["court_id"] == court])

nycourts_df.set_index("court_id", inplace=True)

# Create opinion list from CSV
my_file = open("opinion_ids.txt", "r")
data = my_file.read()
data_into_list = data.split("\n")
unique_opinion_ids = data_into_list[:-1]
my_file.close()


In [None]:
# Define the base URL
base_url = "https://www.courtlistener.com/api/rest/v4/search/"

# Define query from keywords
keywords = ["car accident", "truck accident", "motorcycle accident", "vehicle accident", "automobile accident", "bus accident",
            "car crash", "truck crash", "motorcycle crash", "vehicle crash", "automobile crash", "bus crash", "rear end", "rear-end"]

query = "(personal injury OR personal injuries OR negligence) AND ("
i = 1
for keyword in keywords:
  if i != len(keywords):
    query += f'{keyword} OR '
  else:
    query += f'{keyword}'
  i += 1
query += ")"

# Define the court filter from the court DataFrame
court_filter = ""
j = 1
for court in nycourts_df.index:
  if j != len(nycourts_df.index):
    court_filter += f'{court} OR '
  else:
    court_filter += f'{court}'
  j += 1

# Define url from base URL and query
url = f"{base_url}?q={query}&court={court_filter}"

# Define the headers with the authorization token
headers = {
    "Authorization": "Token 4af03a3491c5b62365b3019d60ab4db84cd6d7aa"
}

# Initialize variables for storing results
unique_opinion_ids = set()  # Use a set to ensure uniqueness
max_ids = 2000  # Set max_ids to desired number
next_url = url

# Loop to fetch paginated results
while next_url and (len(unique_opinion_ids) < max_ids):
    response = requests.get(next_url, headers=headers)

    if response.status_code != 200:
        print(f"Status Code: {response.status_code}")
        print(response.text)
        break

    data = response.json()
    results = data.get("results", [])
    print(data)

    # Extract opinion IDs and add to the set
    for result in results:
        opinion_list = result.get("opinions")
        if opinion_list:
          for opinion in opinion_list:
            opinion_id = opinion.get("id")
            unique_opinion_ids.add(opinion_id)
            if len(unique_opinion_ids) >= max_ids:
                break

    # Get the next page URL from the API response
    next_url = data.get("next")

# Save the unique opinion IDs to a file
with open("opinion_ids.txt", "w") as file:
    for opinion_id in unique_opinion_ids:
        print(opinion_id)
        file.write(f"{opinion_id}\n")

print(f"Saved {len(unique_opinion_ids)} unique opinion IDs to 'opinion_ids.txt'")


{'count': 12097, 'next': 'https://www.courtlistener.com/api/rest/v4/search/?court=nyd+OR+nyed+OR+nynd+OR+nysd+OR+nywd+OR+nysupct+OR+nysupctalbany+OR+nysupctalgny+OR+nysupctbrnx+OR+nysupctbrm+OR+nysupctctrgs+OR+nysupctcayuga+OR+nysupctchtq+OR+nysupctchmng+OR+nysupctchenango+OR+nysupctclinton+OR+nysupctclmb+OR+nysupctcrtlnd+OR+nysupctdlwr+OR+nysupctdtchss+OR+nysupcterie+OR+nysupctessex+OR+nysupctfrnkln+OR+nysupctfltn+OR+nysupctgnss+OR+nysupctgrn+OR+nysupcthrkmr+OR+nysupctjffrsn+OR+nysupctkings+OR+nysupctlewis+OR+nysupctlvngstn+OR+nysupctmdsn+OR+nysupctmonroe+OR+nysupctmntgmry+OR+nysupctnss+OR+nysupctnewyork+OR+nysupctniagra+OR+nysupctoneida+OR+nysupctnndg+OR+nysupctntr+OR+nysupctorange+OR+nysupctrlns+OR+nysupctswg+OR+nysupctostego+OR+nysupctptnm+OR+nysupctqueens+OR+nysupctren+OR+nysupctrichmond+OR+nysupctrcklnd+OR+nysupctsrtg+OR+nysupctschnec+OR+nysupctscho+OR+nysupctschuy+OR+nysupctsnc+OR+nysupctsntlw+OR+nysupctstbn+OR+nysuprctfflk+OR+nysupctsllvn+OR+nysupcttioga+OR+nysupcttmpkns+OR+nys

In [None]:
# Define opinions DataFrame
opinions_df = pd.DataFrame(columns=['resource_uri', 'id', 'absolute_url', 'cluster_id', 'cluster', 'author_id',
                                    'author', 'joined_by', 'date_created', 'date_modified', 'author_str',
                                    'per_curiam', 'joined_by_str', 'type', 'sha1', 'page_count',
                                    'download_url', 'local_path', 'plain_text', 'html', 'html_lawbox',
                                    'html_columbia', 'html_anon_2020', 'xml_harvard', 'html_with_citations',
                                    'extracted_by_ocr', 'ordering_key', 'opinions_cited'])

# Define the base URL
base_url = "https://www.courtlistener.com/api/rest/v4/opinions/"

# Define the headers with the authorization token
headers = {
    "Authorization": "Token 4af03a3491c5b62365b3019d60ab4db84cd6d7aa"
}

# Loop to fetch results
for opinion_id in unique_opinion_ids:
    print(opinion_id)

    url = f"{base_url}{opinion_id}/"
    print(url)
    response = requests.get(url, headers=headers)

    if response.status_code != 200:
        print(f"Status Code: {response.status_code}")
        print(response.text)
        break

    data = response.json()
    opinions_df.loc[len(opinions_df.index)] = data

opinions_df.rename(columns={"id":"opinion_id"}, inplace=True)
opinions_df.set_index("opinion_id", inplace=True)

opinions_df.to_csv("opinions.csv", header=True, index=True)
print(f'\n{len(opinions_df)} opinions successfully downloaded\n')

opinions_df

6201351
https://www.courtlistener.com/api/rest/v4/opinions/6201351/
5505032
https://www.courtlistener.com/api/rest/v4/opinions/5505032/
1638410
https://www.courtlistener.com/api/rest/v4/opinions/1638410/
6184972
https://www.courtlistener.com/api/rest/v4/opinions/6184972/
6135822
https://www.courtlistener.com/api/rest/v4/opinions/6135822/
6184975
https://www.courtlistener.com/api/rest/v4/opinions/6184975/
1605649
https://www.courtlistener.com/api/rest/v4/opinions/1605649/
6160406
https://www.courtlistener.com/api/rest/v4/opinions/6160406/
6152219
https://www.courtlistener.com/api/rest/v4/opinions/6152219/
8331291
https://www.courtlistener.com/api/rest/v4/opinions/8331291/
5496864
https://www.courtlistener.com/api/rest/v4/opinions/5496864/
6184993
https://www.courtlistener.com/api/rest/v4/opinions/6184993/
6168610
https://www.courtlistener.com/api/rest/v4/opinions/6168610/
8724515
https://www.courtlistener.com/api/rest/v4/opinions/8724515/
6160420
https://www.courtlistener.com/api/rest/v

Unnamed: 0_level_0,resource_uri,absolute_url,cluster_id,cluster,author_id,author,joined_by,date_created,date_modified,author_str,...,plain_text,html,html_lawbox,html_columbia,html_anon_2020,xml_harvard,html_with_citations,extracted_by_ocr,ordering_key,opinions_cited
opinion_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
6201351,https://www.courtlistener.com/api/rest/v4/opin...,/opinion/6332788/simon-v-lumbermens-mutual-cas...,6332788,https://www.courtlistener.com/api/rest/v4/clus...,,,[],2022-02-05T12:36:21.646675-08:00,2022-02-05T12:36:21.646698-08:00,Niehoff,...,,,,,,"<opinion type=""majority"">\n<p id=""b834-9"">OPIN...",,True,,[]
5505032,https://www.courtlistener.com/api/rest/v4/opin...,/opinion/5658355/byrnes-v-new-york-lake-erie-w...,5658355,https://www.courtlistener.com/api/rest/v4/clus...,,,[],2022-01-09T19:08:48.810319-08:00,2022-01-09T19:08:48.810342-08:00,Pratt,...,,,,,,"<opinion type=""majority"">\n<author id=""b533-17...",,True,,[]
1638410,https://www.courtlistener.com/api/rest/v4/opin...,/opinion/1638410/essig-v-united-states/,1638410,https://www.courtlistener.com/api/rest/v4/clus...,3435,https://www.courtlistener.com/api/rest/v4/peop...,[],2013-10-29T23:59:37.538705-07:00,2023-08-23T16:18:38.672736-07:00,Wexler,...,,,<div>\n<center><b>675 F.Supp. 84 (1987)</b></c...,,,"<?xml version=""1.0"" encoding=""utf-8""?>\n<opini...","<div>\n<center><b><span class=""citation no-lin...",False,,[https://www.courtlistener.com/api/rest/v4/opi...
6184972,https://www.courtlistener.com/api/rest/v4/opin...,/opinion/6316535/in-re-the-arbitration-between...,6316535,https://www.courtlistener.com/api/rest/v4/clus...,,,[],2022-02-05T11:54:09.369027-08:00,2022-02-05T11:54:09.369049-08:00,Davidson,...,,,,,,"<opinion type=""majority"">\n<author id=""b806-13...",,True,,[]
6135822,https://www.courtlistener.com/api/rest/v4/opin...,/opinion/6268082/new-york-car-oil-co-v-richmond/,6268082,https://www.courtlistener.com/api/rest/v4/clus...,,,[],2022-02-04T13:41:19.068674-08:00,2022-02-04T13:41:19.068697-08:00,Woodruff,...,,,,,,"<opinion type=""majority"">\n<author id=""b202-5""...",,True,,[]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6184943,https://www.courtlistener.com/api/rest/v4/opin...,/opinion/6316506/de-forte-v-liggett-myers-toba...,6316506,https://www.courtlistener.com/api/rest/v4/clus...,,,[],2022-02-05T11:54:06.276904-08:00,2022-02-05T11:54:06.276927-08:00,Feiden,...,,,,,,"<opinion type=""majority"">\n<author id=""b741-12...",,True,,[]
8331249,https://www.courtlistener.com/api/rest/v4/opin...,/opinion/8362338/schurr-v-houston/,8362338,https://www.courtlistener.com/api/rest/v4/clus...,,,[],2022-10-17T14:02:42.909317-07:00,2022-10-17T14:02:42.909335-07:00,Beckwith,...,,,,,,"<opinion type=""majority"">\n<author id=""ANd"">Be...",,True,,[]
6168564,https://www.courtlistener.com/api/rest/v4/opin...,/opinion/6300344/wrubel-v-state/,6300344,https://www.courtlistener.com/api/rest/v4/clus...,,,[],2022-02-05T11:29:41.159428-08:00,2022-02-05T11:29:41.159450-08:00,Yotjítg,...,,,,,,"<opinion type=""majority"">\n<author id=""b899-7""...",,True,,[]
6184952,https://www.courtlistener.com/api/rest/v4/opin...,/opinion/6316515/callanan-v-state/,6316515,https://www.courtlistener.com/api/rest/v4/clus...,,,[],2022-02-05T11:54:07.171521-08:00,2022-02-05T11:54:07.171544-08:00,Giorno,...,,,,,,"<opinion type=""majority"">\n<author id=""b760-13...",,True,,[]
