In [1]:
import sqlite3
import os
import pandas as pd

# Get relative path to the database file
db_path = "../amici/database/supreme_court_docs.db"

# Check if the file exists
if not os.path.exists(db_path):
    print(f"Database file not found at: {db_path}")
    print(f"Current working directory: {os.getcwd()}")
else:
    # Connect to SQLite database
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    print(f"Successfully connected to SQLite database at {db_path}")

Successfully connected to SQLite database at ../amici/database/supreme_court_docs.db


## Testing the Connection

Once connected, you can test the connection by executing a simple query:

In [2]:
# Use this after connecting with either method above
try:
    # Sample query - adjust table name as needed
    cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
    
    # Fetch and display results
    tables = cursor.fetchall()
    print("Tables in the database:")
    for table in tables:
        print(f"- {table[0]}")
except Exception as e:
    print(f"Error executing query: {e}")

Tables in the database:
- documents
- sqlite_sequence
- dockets
- amici
- lawyers


## Make excel sheets

In [7]:
# Make excel sheets from the database
try:
    # Sample query to fetch data from a specific table
    cursor.execute("SELECT * FROM amici WHERE document_id=524;")  # Replace with your actual table name
    rows = cursor.fetchall()

    column_names = [description[0] for description in cursor.description]
    print(column_names)
    
    # Print the first few rows
    print("Sample data from the table:")
    for row in rows:  # Display first 5 rows
        print(row)
except Exception as e:
    print(f"Error fetching data: {e}")

['amicus_id', 'document_id', 'name', 'category']
Sample data from the table:
(1998, 524, 'Center for Constitutional Rights', 'organization')
(1999, 524, 'Transgender Law Center', 'organization')
(2000, 524, 'National Center for Lesbian Rights', 'organization')
(2001, 524, 'Make the Road New York', 'organization')
(2002, 524, 'Bay Area Lawyers for Individual Freedom', 'organization')
(2003, 524, 'Black & Pink National', 'organization')
(2004, 524, 'Black Alliance for Just Immigration', 'organization')
(2005, 524, 'Black Trans Nation', 'organization')
(2006, 524, 'Center for Community Alternatives', 'organization')
(2007, 524, 'DC LGBTQ+ Community Center', 'organization')
(2008, 524, 'Desiree Alliance', 'organization')
(2009, 524, 'Drug Policy Alliance', 'organization')
(2010, 524, 'Equality Federation', 'organization')
(2011, 524, 'Equality New York', 'organization')
(2012, 524, 'Fountain House', 'organization')
(2013, 524, 'Free to Be Youth Project', 'organization')
(2014, 524, 'GLBTQ 

In [33]:
try:
    # Query to find documents with complete_amici_list=False
    cursor.execute("SELECT * FROM documents WHERE complete_amici_list=0")
    
    # Fetch and display results
    documents_with_appendix = cursor.fetchall()
    
    # Get column names from cursor description
    column_names = [description[0] for description in cursor.description]
    
    # Print count of documents
    print(f"Found {len(documents_with_appendix)} documents with amici list incomplete.")
    
    # Display column names and first few results
    if documents_with_appendix:
        print("\nColumn names:", column_names)
        print("\nFirst 5 documents with incomplete amici:")
        for doc in documents_with_appendix[:5]:
            print(doc)
except Exception as e:
    print(f"Error executing query: {e}")

# Create a file storing the blobs of all documents with complete_amici_list=False
try:
    cursor.execute("SELECT * FROM documents WHERE complete_amici_list=0")
    documents_with_appendix = cursor.fetchall()
    
    # Open a file to write the blobs
    with open("../amici/data/incomplete_amici_blobs.txt", "w") as f:
        for doc in documents_with_appendix:
            # Assuming the blob is in the first column (index 0)
            f.write(doc[7]+'\n')
    
    print("Blobs of incomplete amici documents have been written to incomplete_amici_blobs.txt")
except Exception as e:
    print(f"Error writing blobs to file: {e}")
# Close the database connection
finally:
    if conn:
        conn.close()
        print("Database connection closed.")

Found 1179 documents with amici list incomplete.

Column names: ['document_id', 'url', 'docket_url', 'date', 'date_formatted', 'label', 'doc_title', 'blob', 'transcribed', 'neededOCR', 'complete_amici_list', 'counsel_of_record']

First 5 documents with incomplete amici:
(4, 'http://www.supremecourt.gov/DocketPDF/22/22-535/252021/20230111151208528_22-506and22-535tsacLawyersCommitteeForCivilRightsUnderLaw.pdf', 'www.supremecourt.gov/search.aspx?filename=/docket/docketfiles/html/public/22-535.html', 'Jan 11 2023', '2023-01-11', 'Brief amici curiae of Lawyers’ Committee For Civil Rights Under Law and 21 Other Organizations filed (also in 22-506).  VIDED.  (Distributed)', 'Main Document', 'SUPREMECOURT/www.supremecourt.gov/DocketPDF/22/22-535/252021/20230111151208528_22-506and22-535tsacLawyersCommitteeForCivilRightsUnderLaw.pdf', 1, 0, 0, 'Damon Hewitt')
(29, 'http://www.supremecourt.gov/DocketPDF/19/19-1392/185243/20210729123007530_41063%20pdf%20Pierce.pdf', 'www.supremecourt.gov/search.as

In [17]:
df = pd.read_csv("../amici/data/features.csv")
df['leftrightsorted'] = df.apply(lambda row: '\t'.join(sorted([row['left_norm'], row['right_norm']])), axis=1)
df = df.drop_duplicates(subset=['leftrightsorted'])

In [18]:
df

Unnamed: 0.1,Unnamed: 0,left_norm,right_norm,left_raw,right_raw,left_doc,right_doc,charsim,wordsim,ratio,...,tokensort,tokenset,levenstein,jaro_winkler,first_letter_jaccard,combined_len,len_ratio,sentence_cross_encoding,hbsbm_prob,leftrightsorted
0,0,(new) orleans public defenders,orleans public defenders,(New) Orleans Public Defenders,The Orleans Public Defenders,360,2544,0.772334,0.926875,0.896552,...,0.827586,0.923077,0.866667,0.931746,0.600000,4.060443,0.933333,0.959381,0.003,(new) orleans public defenders\torleans public...
9,9,orleans public defenders,california public defenders assn.,The Orleans Public Defenders,The California Public Defenders Association,2544,360,0.546373,0.602410,0.647887,...,0.563380,0.833333,0.534884,0.852798,0.500000,4.262680,0.651163,0.637640,0.454,california public defenders assn.\torleans pub...
36,36,orleans public defenders,public defender assn.,The Orleans Public Defenders,Public Defender Association,2544,8037,0.632730,0.263915,0.581818,...,0.545455,0.545455,0.178571,0.636197,0.400000,4.007333,0.964286,0.610998,0.017,orleans public defenders\tpublic defender assn.
42,42,10 members of the united st. senate,members of the texas senate,10 Members of the United States Senate,Members of the Texas Senate,1164,1372,0.569091,0.675358,0.769231,...,0.738462,0.875000,0.657895,0.701274,0.571429,4.174387,0.710526,0.690187,0.101,10 members of the united st. senate\tmembers o...
46,46,138 women hurt by planned parenthood abortions,women hurt by abortion,138 Women Hurt by Planned Parenthood Abortions,Women Hurt by Abortion,8222,7805,0.622193,0.526788,0.647059,...,0.647059,0.742857,0.478261,0.618542,0.666667,4.219508,0.478261,0.761664,0.038,138 women hurt by planned parenthood abortions...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
368430,368430,wv free,"wv free, advocates for reproductive health, ri...",WV FREE,"WV FREE, Advocates for Reproductive Health, Ri...",2459,3063,0.377745,0.634917,0.202899,...,0.202899,0.444444,0.112903,0.822581,0.250000,4.234107,0.112903,0.619752,0.793,"wv free\twv free, advocates for reproductive h..."
368434,368434,wyoming mining assn.,"wyoming trucking assn.,.",Wyoming Mining Association,"Wyoming Trucking Association, Inc.",2646,4445,0.605186,0.579337,0.766667,...,0.800000,0.800000,0.676471,0.867459,0.400000,4.094345,0.764706,0.734375,0.000,"wyoming mining assn.\twyoming trucking assn.,."
368446,368446,ylsn distribution d/b/a happy distro,"ylsn distribution, d/b/a happy distro",YLSN Distribution LLC d/b/a Happy Distro,"YLSN Distribution LLC, d/b/a Happy Distro",1991,6306,0.942314,1.000000,0.987654,...,0.987654,0.987654,0.975610,0.995122,1.000000,4.394449,0.975610,0.995164,0.001,ylsn distribution d/b/a happy distro\tylsn dis...
368454,368454,young america's fdn.,young america’s fdn.,Young America's Foundation,Young America’s Foundation,4472,376,0.678084,1.000000,0.961538,...,0.961538,0.961538,0.961538,0.984615,1.000000,3.951244,1.000000,0.996216,0.983,young america's fdn.\tyoung america’s fdn.
