In [1]:
#This script takes the process from 'TPWD Web Scrape' and loops over multiple urls

In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd


def get_section_title(soup_obj):
        title_str = soup_obj.find('span',class_='title').get_text()
        return title_str

def get_data_wrapper(section):
    data_wrappers = section.select('div.data-wrapper')
    return data_wrappers

def get_data_header_text(wrapper):
    data_header_text = wrapper.select('div.data-header')[0].get_text()
    return data_header_text

def get_data_value_text(wrapper):
    data_value_text = wrapper.select('div.data-cell')[0].get_text(strip=True).replace("\r\n","")
    return data_value_text

def get_data_row(soup_obj):
    data_row = soup_obj.find_all('div',class_='data-cell')
    return data_row


urls = {'Gun Deer - Antlerless/Spike':'https://tpwd.texas.gov/huntwild/hunt/public/public_hunt_drawing/hunt-category-details.phtml?OCat=GDA',
        'Gun Deer - Either Sex':'https://tpwd.texas.gov/huntwild/hunt/public/public_hunt_drawing/hunt-category-details.phtml?OCat=GDE',
        'Feral Hog':'https://tpwd.texas.gov/huntwild/hunt/public/public_hunt_drawing/hunt-category-details.phtml?OCat=GFH'}


items = urls.items()



In [3]:
df4 = pd.DataFrame()

for category, url in items:
    page = requests.get(url)
    soup = BeautifulSoup(page.content, 'html.parser')
    sections = soup.find_all('section')

    #Generate dataframe for fields with one data-cell per data-row
    list_of_dicts = []
    for section in sections:
        data_wrappers = get_data_wrapper(section)
        d = {}
        d['Title'] = get_section_title(section)
        for wrapper in data_wrappers:
            for cell in data_wrappers:
                d[get_data_header_text(wrapper)] = get_data_value_text(wrapper)

        list_of_dicts.append(d)

    
    df1 = pd.DataFrame.from_records(list_of_dicts)
    
    
    # generate dataframe for Permits, Age Requirements, and Last Year, which have multiple data-cells per row
    def split_data_cell(data_cell):
        pair = data_cell.split(':')
        return pair

    keywords = ['Available', 'Fee', 'Youth ages', 'Adult minimum', 'Supervising adult', 'Applicants', 'Permits/Groups', 'Success Rate']


    list_of_dicts1 = []
    for section in sections:
        data_wrappers = get_data_wrapper(section)
        d = {}
        for wrapper in data_wrappers:
                data_row = get_data_row(wrapper)
                for cell in data_row:
                    for i in keywords:
                        if i in cell.get_text():
                            split_values = cell.get_text(strip=True).split(':')
                            key = split_values[0].replace("\r\n","")
                            value = split_values[1].replace("\r\n","")
                            d[key] = value
                        else:
                            continue
        list_of_dicts1.append(d)

    df2 = pd.DataFrame.from_records(list_of_dicts1)

        #join the two dataframes
    df3 = pd.concat([df1.reset_index(drop=True),df2.reset_index(drop=True)], axis=1)
    df3


    #clean fields
    df3['Hunt Dates'] = df3['Hunt Dates'].str.replace(' ','')
    df3['Hunt Dates'] = df3['Hunt Dates'].str.replace('–','; ')
    df3['Youth ages'] = df3['Youth ages'].str.replace(' ','')
    df3['Youth ages'] = df3['Youth ages'].str.replace('-',' to ')

    #drop fields
    try:
        df3 = df3.drop(columns=['Groups'])
    except:
        pass
    try:
        df3 = df3.drop(columns=['Last Year'])
    except:
        pass
    try:
        df3 = df3.drop(columns=['Age Requirements'])
    except:
        pass
    try:
        df3 = df3.drop(columns=['Permits'])
    except:
        pass

    #add fields
    df3['Available'] = df3['Available'].astype(float)
    df3['Applicants'] = df3['Applicants'].astype(float)
    df3["Draw Probability"] = (df3["Available"] / df3["Applicants"])
    
    #append to dataframe
    df4 = df4.append(df3)

df4.head()

Unnamed: 0,Title,Hunt Dates,Bag Limit,ONLY MEANS ALLOWED,Hunt Method,Baiting,Hunt Restrictions,MEANS NOT ALLOWED,Available,Fee per adult,Fee per youth,Youth ages,Adult minimum age,Supervising adult minimum age,Applicants,Permits/Groups,Success Rate,Draw Probability
0,Bastrop SP,"Jan05,2022; Jan07,2022",Three White-tailed Deer; Antlerless or Spike B...,Centerfire Rifles,Compartment; Hunters are encouraged to bring p...,Allowed,,,15.0,$80.00,$0.00,8 to 16,17,18,1343.0,15,24%,0.011169
1,Colorado Bend SP,"Dec08,2021; Dec10,2021",Three White-tailed Deer; Antlerless or Spike B...,Centerfire Rifles,Compartment; Hunters are encouraged to bring p...,Allowed,Baiting is allowed; Corn must be labeled for w...,,50.0,$80.00,$0.00,8 to 16,17,18,2263.0,50,52%,0.022095
2,Cooper WMA,"Nov27,2021; Nov29,2021",Three White-tailed Deer; Antlerless or Spike B...,Centerfire Rifles,Compartment; Hunters may bring portable blinds...,Allowed,,,20.0,$80.00,$0.00,8 to 16,17,18,461.0,12,16%,0.043384
3,Copper Breaks SP,"Dec04,2021; Dec06,2021",One White-tailed Deer; Antlerless or Spike Buc...,Centerfire Rifles,Compartment; Hunters are encouraged to arrive ...,Not allowed,"NO ATV's, UTV's or Equines are allowed, NO bai...",,1.0,$80.00,$0.00,8 to 16,17,18,276.0,1,,0.003623
4,Enchanted Rock SNA,"Nov09,2021; Nov12,2021",Five White-tailed Deer; Antlerless or Spike Bu...,Centerfire Rifles,Assigned Blind; Hunters will be taken to and f...,Allowed,Baiting allowed; Corn must be labeled for wild...,,30.0,$130.00,$0.00,8 to 16,17,18,1622.0,30,56%,0.018496


In [4]:
#generate csv
df4.to_csv('hunt_data.csv')