In [None]:
from selenium import webdriver
from bs4 import BeautifulSoup
from selenium.webdriver.support.ui import Select
from selenium.webdriver.chrome.service import Service
import pandas as pd
import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry

In [None]:
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--ignore-certificate-errors')
chrome_options.add_argument('--incognito')
chrome_options.add_argument('--headless')
service = Service("/Users/tumendemberelshalkhaan/Downloads/chromedriver")
driver = webdriver.Chrome(service=service, options = chrome_options)
driver.get('https://apps.ark.org/inmate_info/index.php')
ids = []

In [None]:
page_source = driver.page_source
soup = BeautifulSoup(page_source, 'lxml')
select = soup.find('select', {'name': 'RACE'})
options = select.find_all('option')
option_text = [option.text for option in options]
races = option_text[1:]

In [None]:
for race in races:

    driver.find_element("name", "disclaimer").click()
    select = Select(driver.find_element("name", 'RACE'))
    select.select_by_visible_text(race)
    driver.find_element("xpath", ".//*[@value='Search']").click()
    
    while True:
        try:
            next = driver.find_element("xpath", "//a[contains(text(), '→')]")
        except:
            break
        title = driver.find_element("xpath", "//table")
        table = title.find_element("xpath", 'tbody')
        rows = table.find_elements("xpath", ".//tr")

        for row in rows:
            cells = row.find_elements("tag name", "td")
            if cells:
                ids.append(cells[2].text)
        next.click()
        
    driver.get('https://apps.ark.org/inmate_info/index.php')

In [None]:
ar = pd.DataFrame(columns=['ID', 'Name', 'Race', 'Date Of Birth', 'Facility', 'Gender', 'Offense Description', 'Sentence Length', 'Sentencing Date'])

In [None]:
failed_ids = []

for id in ids:
    try:
        url = ('https://apps.ark.org/inmate_info/search.php?dcnum=' + id)
        session = requests.Session()
        retry = Retry(connect=3, backoff_factor=0.5)
        adapter = HTTPAdapter(max_retries=retry)
        session.mount('http://', adapter)
        session.mount('https://', adapter)
        response = session.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')

        name_elem = soup.find('div', text='Name:')
        name = name_elem.find_next_sibling('div').get_text()

        race_elem = soup.find('div', text='Race')
        race = race_elem.find_next_sibling('div').get_text()

        dob_elem = soup.find('div', text='Birth Date')
        dob = dob_elem.find_next_sibling('div').get_text()

        fac_elem = soup.find('div', text='Facility')
        fac = fac_elem.find_next_sibling('div').get_text()

        gender_elem = soup.find('div', text='Sex')
        gender = gender_elem.find_next_sibling('div').get_text()

        table = soup.find('h3', text = 'Current Prison Sentence History').find_next_sibling('table').find('tbody')

        offenses = []
        sentences = []
        sendates = []
        counties = []

        for row in table.find_all('tr'):
            data = [cell.get_text(strip=True) for cell in row.find_all('td')]
            offenses.append(data[0])
            sendates.append(data[1])
            counties.append(data[2])
            sentences.append(data[4])

        histories = [list(history) for history in zip(offenses, sentences, counties, sendates)]

        rows = []

        for history in histories:
            row = {
                'ID': id,
                'Name': name,
                'Date Of Birth': dob,
                'Gender': gender,
                'Race': race,
                'Facility': fac,
                'Offense Description': history[0],
                'Sentence Length': history[1],
                'Sentencing Date': history[3],
                'County': history[2]
            }
            rows.append(row)

        ar = pd.concat([ar, pd.DataFrame(rows)])
        
    except Exception as e:
        print(f"Failed to scrape {id}. Error: {e}")
        failed_ids.append(id)
        continue

In [None]:
ar.to_csv('/Users/tumendemberelshalkhaan/Desktop/Arkansas_Raw.csv', index=False)