# Approximating the number of Hispanic inmates

Loading the libraries to be used:

In [None]:
qtconsole

In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
import numpy as np

## Getting a list of common Hispanic names in the US

We are going to use the same list as in the case of Polk County (view the other notebook in this repo). This list is a reduced version of the one found at the website Mongobay.com and the data is taken from the 2010 US Census. This list will be used to compare with the names of the inmates and guess whether or not they are hispanic.

In [2]:
common_names_list = np.loadtxt('common_names_list.txt', dtype=str, unpack=True)
print('Total number of names =', len(common_names_list))

Total number of names = 899


# Trying with only 2 names!

In [3]:
common_names_list = common_names_list[:2]
print('Total number of names =', len(common_names_list))

Total number of names = 2


The next function makes it easier to access the search page with parameters such as 'Search Aliases = No'.

In [4]:
def make_url(last_name):
    return 'http://www.dc.state.fl.us/OffenderSearch/list.aspx?TypeSearch=AI&Page=List&DataAction=Filter&dcnumber=&LastName={}&FirstName=&SearchAliases=0&OffenseCategory=&CurrentLocation=&CountyOfCommitment=&photosonly=0&nophotos=1&matches=50'.format(last_name)

Setting up the webdriver which will simulate accessing the website in a Firefox Browser.
We set it to headless so that an actual browser window.

In [5]:
ffoptions = webdriver.firefox.options.Options()
ffoptions.headless = True
driver = webdriver.Firefox(options=ffoptions)
driver.implicitly_wait(1)

Setting up the list of tables which will be collected and then running the collection loop.

In [6]:
list_of_tables = []

for i, name in enumerate(common_names_list):
    print('i:',i,'; name=',name)
    number_of_results = last_elem_page = 50 #number of results per page and a variable to check if it is finished loading
    try:
        driver.get(make_url(name))
        last_elem_total = driver.find_element_by_id('ctl00_ContentPlaceHolder1_lblgrdListPage')
        last_elem_total = int(last_elem_total.text[last_elem_total.text.find('of') + 3:])
    except:
        #no results and no table found, therefore continue on to the next name
        continue

    while True:
        temp_table = pd.read_html(driver.page_source, attrs={'id':'ctl00_ContentPlaceHolder1_grdList'})[0]
        time.sleep(1)
        if (str(last_elem_page) in temp_table.iloc[-1,0]) or (str(last_elem_total) in temp_table.iloc[-1,0]):
            #sometimes the next page won't load quickly enough and the same table is reloaded
            #this checks where we are in the collection and avoids duplicate tables
            list_of_tables.append(temp_table)
            last_elem_page += number_of_results
            try:
                #moving on to the next page of results.
                elem = driver.find_element_by_name('ctl00$ContentPlaceHolder1$btnListNext')
                elem.send_keys(Keys.RETURN)
            except:
                #no more results. move on to next name.
                break

i: 0 ; name= GARCIA
i: 1 ; name= RODRIGUEZ


We close the webdriver object and concatenate all DataFrames collected.

In [7]:
driver.close()
hispanic_inmates = pd.concat(list_of_tables)
hispanic_inmates.reset_index(inplace=True)

Table inspection

In [8]:
hispanic_inmates.head()

Unnamed: 0,index,Click Number for Details,Name,DC Number,Race,Sex,Release Date,Current Facility,Birth Date
0,0,*1,"GARCIA, ADOLFO I",185800,WHITE,MALE,04/14/2021,EVERGLADES C.I.,10/10/1946
1,1,*2,"GARCIA, ALEXANDER X",158442,WHITE,MALE,05/19/2029,OKALOOSA C.I.,05/31/1988
2,2,*3,"GARCIA, ALFREDO",K04686,BLACK,MALE,02/23/2041,BLACKWATER C.F.,06/27/1982
3,3,*4,"GARCIA, ALFREDO",U56632,WHITE,MALE,03/03/2026,HOLMES C.I.,09/07/1993
4,4,*5,"GARCIA, AMADO JR",C08727,HISPANIC,MALE,05/10/2020,CFRC-EAST,12/29/1989


In [9]:
hispanic_inmates.tail()

Unnamed: 0,index,Click Number for Details,Name,DC Number,Race,Sex,Release Date,Current Facility,Birth Date
732,15,*466,"RODRIGUEZOCHOA, SEBASTIAN",S41740,HISPANIC,MALE,04/06/2021,AVON PARK WORK CAMP,09/01/1991
733,16,*467,"RODRIGUEZPANEQUE, YOEL",M53373,WHITE,MALE,01/28/2024,CHARLOTTE C.I.,12/21/1986
734,17,*468,"RODRIGUEZPEREZ, NEFTALI",B11604,HISPANIC,MALE,07/07/2022,DESOTO ANNEX,03/11/1990
735,18,*469,"RODRIGUEZVASQUEZ, MIGUEL",H36963,WHITE,MALE,06/01/2034,EVERGLADES C.I.,10/05/1991
736,19,*470,"RODRIGUEZVAZQUEZ, GERARDO",X92953,WHITE,MALE,09/25/2034,APALACHEE EAST UNIT,05/22/1979


In [29]:
hispanic_inmates.groupby(['Race']).count()[['Name']] / hispanic_inmates.count()[0] * 100

Unnamed: 0_level_0,Name
Race,Unnamed: 1_level_1
ALL OTHERS/UNKNOWN,1.221167
BLACK,4.613297
HISPANIC,37.042062
WHITE,57.123474


## Distribution of race assignments