In [4]:
import math
import pandas as pd

aclu_pandas = pd.read_json('aclu.json') # data from https://www.aclu.org/legislative-attacks-on-lgbtq-rights?state
# this bill was incorrectly attributed to NC instead of SC
aclu_pandas.loc[12]['state'] = {'value': 'SC', 'label': 'South Carolina'}


def expand(short_prefix: str):
    if short_prefix.lower().startswith('h'):
        return 'house'
    if short_prefix.lower().startswith('s'):
        return 'senate'

def format_ak(frame):
    bill_name = frame['name']
    prefix, num = bill_name.split(' ')
    return ('html', ''.join([
        'https://www.akleg.gov/basis/Bill/Text/33?Hsid=',
        prefix,
        num.zfill(4),
        'A',
    ]))

def format_ar(frame):
    bill_name = frame['name']
    prefix, num = bill_name.split(' ')
    return ('pdf', ''.join([
        'https://www.arkleg.state.ar.us/Bills/FTPDocument?path=%2FBills%2F2023R%2FPublic%2F',
        prefix,
        num,
        '.pdf',
    ]))

def format_az(frame):
    bill_name = frame['name']
    prefix, num = bill_name.split(' ')
    return ('html', ''.join([
        'https://www.azleg.gov/legtext/56leg/1R/bills/',
        prefix,
        num.zfill(4),
        'P.htm',
    ]))

def format_co(frame):
    bill_name = frame['name']

    # hacky
    prefix, num = bill_name.split(' ')
    _, num = num.split('-')
    return ('pdf', ''.join([
        'https://leg.colorado.gov/sites/default/files/documents/2023A/bills/2023a_',
        num,
        '_01.pdf',
    ]))

def format_ct(frame):
    bill_name = frame['name']
    prefix, num = bill_name.split(' ')
    return ('pdf', ''.join([
        'https://www.cga.ct.gov/2023/TOB/',
        prefix[0],
        '/PDF/2023',
        prefix,
        '-',
        num.zfill(5),
        '-R00-',
        prefix,
        '.pdf',
    ]))

def format_fl(frame):
    bill_name = frame['name']
    prefix, num = bill_name.split(' ')
    return ('pdf', ''.join([
        'https://www.flsenate.gov/Session/Bill/2023/',
        num,
        '/BillText/Filed/PDF',
    ]))

def format_hi(frame):
    bill_name = frame['name']
    # https://www.capitol.hawaii.gov/sessions/session2023/bills/SB1429_.HTM
    # https://www.capitol.hawaii.gov/sessions/session2023/bills/HB0891_.htm
    prefix, num = bill_name.split(' ')
    return ('html', ''.join([
        'https://www.capitol.hawaii.gov/sessions/session2023/bills/',
        prefix,
        num,
        '_.HTM',
    ]))

def format_ia(frame):
    bill_name = frame['name']
    # https://www.legis.iowa.gov/docs/publications/LGI/90/attachments/SF538.rtf
    # https://www.legis.iowa.gov/docs/publications/LGI/90/SF335.pdf
    prefix, num = bill_name.split(' ')
    return ('pdf', ''.join([
        'https://www.legis.iowa.gov/docs/publications/LGI/90/',
        prefix,
        num,
        '.pdf',
    ]))

def format_id(frame):
    bill_name = frame['name']
    # https://legislature.idaho.gov/wp-content/uploads/sessioninfo/2023/legislation/S1071.pdf
    # https://legislature.idaho.gov/wp-content/uploads/sessioninfo/2023/legislation/SB1071.pdf
    prefix, num = bill_name.split(' ')
    return ('pdf', ''.join([
        'https://legislature.idaho.gov/wp-content/uploads/sessioninfo/2023/legislation/',
        prefix[0],
        num.zfill(4),
        '.pdf',
    ]))

def format_ks(frame):
    bill_name = frame['name']
    # http://kslegislature.org/li/b2023_24/measures/documents/sb228_00_0000.pdf
    # http://kslegislature.org/li/b2023_24/measures/documents/SB228_00_0000.pdf
    prefix, num = bill_name.split(' ')
    return ('pdf', ''.join([
        'http://kslegislature.org/li/b2023_24/measures/documents/',
        prefix.lower(),
        num,
        '_00_0000.pdf',
    ]))

def format_ky(frame):
    bill_name = frame['name']
    # https://apps.legislature.ky.gov/recorddocuments/bill/23RS/hb585/orig_bill.pdf
    prefix, num = bill_name.split(' ')
    return ('pdf', ''.join([
        'https://apps.legislature.ky.gov/recorddocuments/bill/23RS/',
        prefix.lower(),
        num,
        '/orig_bill.pdf',
    ]))

def format_md(frame):
    bill_name = frame['name']
    # https://mgaleg.maryland.gov/2023RS/bills/hb/hb0359F.pdf
    prefix, num = bill_name.split(' ')
    return ('pdf', ''.join([
        'https://mgaleg.maryland.gov/2023RS/bills/',
        prefix.lower(),
        '/',
        prefix.lower(),
        num.zfill(4),
        'F.pdf',
    ]))

def format_mi(frame):
    # hacky
    bill_name = frame['name']
    # http://www.legislature.mi.gov/documents/2023-2024/billintroduced/House/htm/2023-HIB-4195.htm
    prefix, num = bill_name.split(' ')
    return ('html', ''.join([
        'http://www.legislature.mi.gov/documents/2023-2024/billintroduced/House/htm/2023-HIB-',
        num,
        '.htm',
    ]))

def format_mn(frame):
    bill_name = frame['name']
    # https://www.revisor.mn.gov/bills/text.php?number=HF1903&type=bill&version=0&session=ls93&session_year=2023&session_number=0
    prefix, num = bill_name.split(' ')
    return ('html', ''.join([
        'https://www.revisor.mn.gov/bills/text.php?number=',
        prefix,
        num,
        '&type=bill&version=0&session=ls93&session_year=2023&session_number=0',
    ]))

def format_mo(frame):
    bill_name = frame['name']
    # https://www.house.mo.gov/billtracking/bills231/hlrbillspdf/2598H.01I.pdf
    # https://senate.mo.gov/23info/pdf-bill/intro/SB39.pdf
    prefix, num = bill_name.split(' ')
    
    if prefix == 'HB':
        # literally wtf
        # will have to come back with bsoup
        return ('pdf', None)

    if prefix == 'SB':
        return ('pdf', ''.join([
            'https://senate.mo.gov/23info/pdf-bill/intro/',
            prefix,
            num,
            '.pdf',
        ]))


def format_ms(frame):
    bill_name = frame['name']
    # http://billstatus.ls.state.ms.us/documents/2023/html/SB/2700-2799/SB2773IN.htm
    # http://billstatus.ls.state.ms.us/documents/2023/html/SB/2700-2900/SB2773IN.htm
    
    # http://billstatus.ls.state.ms.us/documents/2023/html/SB/2001-2099/SB2058IN.htm
    # http://billstatus.ls.state.ms.us/documents/2023/html/SB/2000-2099/SB2058IN.htm
    
    # http://billstatus.ls.state.ms.us/documents/2023/html/HB/500-599/HB0509IN.htm
    
    
    prefix, num = bill_name.split(' ')
    fnum = float(num)/100
    low_index = int(100*math.floor(fnum))

    # nice one, mississippi
    if low_index == 2000:
        low_index = 2001
        
    high_index = int(100*math.ceil(fnum))-1
    return ('html', ''.join([
        'http://billstatus.ls.state.ms.us/documents/2023/html/',
        prefix,
        f'/{str(low_index).zfill(4)}-{str(high_index).zfill(4)}/',
        prefix,
        num.zfill(4),
        'IN.htm',
    ]))

def format_mt(frame):
    # https://leg.mt.gov/bills/2023/billhtml/SB0099.htm
    bill_name = frame['name']
    prefix, num = bill_name.split(' ')
    return ('html', ''.join([
        'https://leg.mt.gov/bills/2023/billhtml/',
        prefix,
        num.zfill(4),
        '.htm',
    ]))

def format_nc(frame):
    # https://www.ncleg.gov/Sessions/2023/Bills/Senate/PDF/S49v0.pdf
    # https://www.ncleg.gov/Sessions/2023/Bills/senate/PDF/S.49v0.pdf
    bill_name = frame['name']
    prefix, num = bill_name.split(' ')
    return ('pdf', ''.join([
        'https://www.ncleg.gov/Sessions/2023/Bills/',
        expand(prefix),
        '/PDF/',
        prefix[0],
        num,
        'v0.pdf',
    ]))

def format_ne(frame):
    # https://nebraskalegislature.gov/FloorDocs/108/PDF/Intro/LB371.pdf
    bill_name = frame['name']
    prefix, num = bill_name.split(' ')
    return ('pdf', ''.join([
        'https://nebraskalegislature.gov/FloorDocs/108/PDF/Intro/',
        prefix,
        num,
        '.pdf',
    ]))

def format_nm(frame):
    # hacky
    # https://www.nmlegis.gov/Sessions/23%20Regular/bills/house/HB0492.html
    bill_name = frame['name']
    prefix, num = bill_name.split(' ')
    return ('html', ''.join([
        'https://www.nmlegis.gov/Sessions/23%20Regular/bills/house/',
        prefix,
        num.zfill(4),
        '.html',
    ]))

def format_ok(frame):
    # http://webserver1.lsb.state.ok.us/cf_pdf/2023-24%20INT/SB/SB789%20INT.PDF
    bill_name = frame['name']
    prefix, num = bill_name.split(' ')
    return ('pdf', ''.join([
        'http://webserver1.lsb.state.ok.us/cf_pdf/2023-24%20INT/',
        prefix,
        '/',
        prefix,
        num,
        '%20INT.pdf',
    ]))

def format_or(frame):
    # https://olis.oregonlegislature.gov/liz/2023R1/Downloads/MeasureDocument/SB897/Introduced
    bill_name = frame['name']
    prefix, num = bill_name.split(' ')
    return ('pdf', ''.join([
        'https://olis.oregonlegislature.gov/liz/2023R1/Downloads/MeasureDocument/',
        prefix,
        num,
        '/Introduced',
    ]))

def format_ri(frame):
    # http://webserver.rilegislature.gov/BillText/BillText23/HouseText23/H5688.pdf
    # http://webserver.rilegislature.gov/BillText/BillText23/HouseText23/HB5688.pdf
    bill_name = frame['name']
    prefix, num = bill_name.split(' ')
    return ('pdf', ''.join([
        'http://webserver.rilegislature.gov/BillText/BillText23/',
        expand(prefix).capitalize(),
        'Text23/',
        prefix[0],        num.zfill(4),
        '.pdf',
    ]))

def format_sc(frame):
    # https://www.scstatehouse.gov/sess125_2023-2024/bills/3827.htm
    bill_name = frame['name']
    prefix, num = bill_name.split(' ')
    return ('html', ''.join([
        'https://www.scstatehouse.gov/sess125_2023-2024/bills/',
        num,
        '.htm',
    ]))

def format_tn(frame):
    # https://wapp.capitol.tn.gov/apps/BillInfo/Default.aspx?BillNumber=HB1414&GA=113
    bill_name = frame['name']
    prefix, num = bill_name.split(' ')
    return ('html', ''.join([
        'https://wapp.capitol.tn.gov/apps/BillInfo/Default.aspx?BillNumber=',
        prefix,
        num.zfill(4),
    ]))

def format_tx(frame):
    bill_name = frame['name']
    prefix, num = bill_name.split(' ')
    return ('html', ''.join([
        'https://capitol.texas.gov/tlodocs/88R/billtext/html/',
        prefix,
        num.zfill(5),
        'I.htm',
    ]))

def format_ut(frame):
    # https://le.utah.gov/~2023/bills/sbillint/SB0039S01.htm
    # https://le.utah.gov/~2023/bills/hbillint/HB0464S01.htm?r=196
    # TODO: how do i fix this dumb fucker
    # https://le.utah.gov/~2023/bills/sbillint/SB0093S02.htm
    bill_name = frame['name']
    prefix, num = bill_name.split(' ')
    return ('html', ''.join([
        'https://le.utah.gov/~2023/bills/',
        prefix.lower()[0],
        'billint/',
        prefix,
        num.zfill(4),
        'S01.htm',
    ]))

def format_va(frame):
    # https://lis.virginia.gov/cgi-bin/legp604.exe?231+ful+HB1399
    bill_name = frame['name']
    prefix, num = bill_name.split(' ')
    return ('html', ''.join([
        'https://lis.virginia.gov/cgi-bin/legp604.exe?231+ful+',
        prefix,
        num,
    ]))

def format_wa(frame):
    # https://lawfilesext.leg.wa.gov/biennium/2023-24/Pdf/Bills/Senate%20Bills/5653.pdf?q=20230309165949
    bill_name = frame['name']
    prefix, num = bill_name.split(' ')
    return ('pdf', ''.join([
        'https://lawfilesext.leg.wa.gov/biennium/2023-24/Pdf/Bills/',
        expand(prefix).capitalize(),
        '%20Bills/',
        num,
        '.pdf',
    ]))

def format_wv(frame):
    # http://www.wvlegislature.gov/Bill_Status/bills_text.cfm?billdoc=hb3001%20intr.htm&yr=2023&sesstype=RS&i=3001
    bill_name = frame['name']
    prefix, num = bill_name.split(' ')
    return ('html', ''.join([
        'http://www.wvlegislature.gov/Bill_Status/bills_text.cfm?billdoc=',
        prefix.lower(),
        num,
        '%20intr.htm&yr=2023&sesstype=RS&i=',
        num
    ]))

def format_wy(frame):
    # https://wyoleg.gov/2023/Introduced/SF0144.pdf
    bill_name = frame['name']
    prefix, num = bill_name.split(' ')
    return ('pdf', ''.join([
        'https://wyoleg.gov/2023/Introduced/',
        prefix,
        num.zfill(4),
        '.pdf',
    ]))

bill_formatters = {
    'AK': format_ak,
    'AR': format_ar,
    'AZ': format_az,
    'CO': format_co,
    'CT': format_ct,
    'FL': format_fl,
    'HI': format_hi,
    'IA': format_ia,
    'ID': format_id,
    'KS': format_ks,
    'KY': format_ky,
    'MD': format_md,
    'MI': format_mi,
    'MN': format_mn,
    'MO': format_mo,
    'MS': format_ms,
    'MT': format_mt,
    'NC': format_nc,
    'NE': format_ne,
    'NM': format_nm,
    'OK': format_ok,
    'OR': format_or,
    'RI': format_ri,
    'SC': format_sc,
    'TN': format_tn,
    'TX': format_tx,
    'UT': format_ut,
    'VA': format_va,
    'WA': format_wa,
    'WV': format_wv,
    'WY': format_wy,
}

In [5]:
pd.set_option('display.max_colwidth', None)
subset = aclu_pandas[['name', 'state', 'link']].copy()
subset['state'] = subset['state'].transform(lambda x: x['value'])
for idx, item in subset.iterrows():
    if item['state'] not in bill_formatters:
        continue
    
    target_extension, URL = bill_formatters[item['state']](item)
    if URL is None:
        continue
        
    print(URL)


https://capitol.texas.gov/tlodocs/88R/billtext/html/HB03902I.htm
https://capitol.texas.gov/tlodocs/88R/billtext/html/HB03883I.htm
http://www.legislature.mi.gov/documents/2023-2024/billintroduced/House/htm/2023-HIB-4195.htm
https://www.legis.iowa.gov/docs/publications/LGI/90/SF538.pdf
https://le.utah.gov/~2023/bills/sbillint/SB0039S01.htm
https://www.flsenate.gov/Session/Bill/2023/1521/BillText/Filed/PDF
https://www.legis.iowa.gov/docs/publications/LGI/90/HF616.pdf
https://www.flsenate.gov/Session/Bill/2023/1674/BillText/Filed/PDF
https://www.flsenate.gov/Session/Bill/2023/1421/BillText/Filed/PDF
https://www.flsenate.gov/Session/Bill/2023/254/BillText/Filed/PDF
https://capitol.texas.gov/tlodocs/88R/billtext/html/HB03213I.htm
https://www.scstatehouse.gov/sess125_2023-2024/bills/585.htm
https://www.legis.iowa.gov/docs/publications/LGI/90/HF623.pdf
https://www.legis.iowa.gov/docs/publications/LGI/90/SF482.pdf
https://www.legis.iowa.gov/docs/publications/LGI/90/SF496.pdf
https://www.legis.i