In [1]:
# Populate the data in resources for all counties
# Dependencies
import requests
from dotenv import load_dotenv
import os
import pandas as pd
from bs4 import BeautifulSoup
from requests.exceptions import ConnectTimeout, ReadTimeout, RequestException
import subprocess

In [2]:
# this section of code make a request and permits up to max_retries 
def get_page(url, max_retries=3):
    retries = 0
    while retries < max_retries:
        try:
            # Attempt to make the HTTP GET request with a specified timeout
            response = requests.get(url, timeout=2)  # adding a sensible timeout
            # If the request is successful, break out of the loop
            if response.status_code == 200:
                return response.text
            if response.status_code == 404:
                return None , None
        except (ConnectTimeout, ReadTimeout) as e:
            # Print an error message showing the retry attempt
            print(f"Attempt {retries + 1} of {max_retries} failed with timeout. Retrying...")
            retries += 1
            if retries == max_retries:
                print("Max retries exceeded. Failing...")
                raise  # Re-raise the last exception after final attempt
        except RequestException as e:
            print(f"Attempt {retries + 1} failed with a request exception: {e}. Retrying...")
            retries += 1
            if retries == max_retries:
                print("Max retries exceeded with request exceptions. Failing...")
                raise
        except: 
            return None, None

    # If all retries are exhausted without a return, it indicates a failure
    print("Request failed after maximum retries.")
    return None, None


In [3]:
# this section of code make a request and permits up to max_retries 
def make_request_with_retries(url, max_retries=3):
    retries = 0
    while retries < max_retries:
        try:
            # Attempt to make the HTTP GET request with a specified timeout
            response = requests.get(url, timeout=2)  # adding a sensible timeout
            # If the request is successful, break out of the loop
            if response.status_code == 200:
                if response:
                    print(f"Request succeeded for {url}")
                    # Parse the HTML content
                    soup = BeautifulSoup(response.text, 'html.parser')
                    # Find the <title> tag
                    title_tag = soup.find('title')
                    # Extract the text from the <title> tag
                    title_text = title_tag.text if title_tag else None
                    # Add the title text to the list if it exists
                    if title_text:
                        # Split the string to isolate the part before the first '|'
                        main_part = title_text.split('|')[0].strip()
                        # Extract the county name; it's always followed by ", TX"
                        county = main_part.split(", TX")[0].split('for ')[-1].strip()
                        # Extract the series ID; it's always within parentheses
                        series_id = main_part.split('(')[-1].split(')')[0].strip()
                return county,series_id
            if response.status_code == 404:
                return None , None
        except (ConnectTimeout, ReadTimeout) as e:
            # Print an error message showing the retry attempt
            print(f"Attempt {retries + 1} of {max_retries} failed with timeout. Retrying...")
            retries += 1
            if retries == max_retries:
                print("Max retries exceeded. Failing...")
                raise  # Re-raise the last exception after final attempt
        except RequestException as e:
            print(f"Attempt {retries + 1} failed with a request exception: {e}. Retrying...")
            retries += 1
            if retries == max_retries:
                print("Max retries exceeded with request exceptions. Failing...")
                raise
        except: 
            return None, None

    # If all retries are exhausted without a return, it indicates a failure
    print("Request failed after maximum retries.")
    return None, None


In [4]:
'''# get  a list of all counties in dallas. this will be used to build the list of request URL's for resident population.
# https://fred.stlouisfed.org/categories/29898 this page has all counties in Dallas
def extract_readable_text(html_content):
    soup = BeautifulSoup(html_content, 'html.parser')
    # Extract all paragraph texts and combine them
    text = '\n'.join(p.get_text() for p in soup.find_all('p'))
    return text
try:
    url = f'https://fred.stlouisfed.org/categories/29898'
    counties_page = get_page(url)
    print (f'this is it \n {extract_readable_text(counties_page)}')
except Exception as e:  # Catching a general exception for any other errors
    print(f"An error occurred while processing {url}: {e}")
    '''

'# get  a list of all counties in dallas. this will be used to build the list of request URL\'s for resident population.\n# https://fred.stlouisfed.org/categories/29898 this page has all counties in Dallas\ndef extract_readable_text(html_content):\n    soup = BeautifulSoup(html_content, \'html.parser\')\n    # Extract all paragraph texts and combine them\n    text = \'\n\'.join(p.get_text() for p in soup.find_all(\'p\'))\n    return text\ntry:\n    url = f\'https://fred.stlouisfed.org/categories/29898\'\n    counties_page = get_page(url)\n    print (f\'this is it \n {extract_readable_text(counties_page)}\')\nexcept Exception as e:  # Catching a general exception for any other errors\n    print(f"An error occurred while processing {url}: {e}")\n    '

In [5]:
counties = []
hpi_ids = []
for index in range(1, 520):  # Adjusted range to Python syntax
    try:
        url = f'https://fred.stlouisfed.org/series/MHITX48{index:03}A052NCEN' # MHITX48085A052NCEN
        county,hpi_id = make_request_with_retries(url)
        if county:
                counties.append(county)
                hpi_ids.append(hpi_id)
        else:
            print(f"Failed to retrieve the webpage for {url}.")
    except Exception as e:  # Catching a general exception for any other errors
        print(f"An error occurred while processing {url}: {e}")
# Print the list to see the result
print(f"Collected {len(counties)} HPI Counties.")
county_series_ids_df = pd.DataFrame({
    'County': counties,
    'HHI ID': hpi_ids
})
county_series_ids_df.head(10)

Request succeeded for https://fred.stlouisfed.org/series/MHITX48001A052NCEN
Failed to retrieve the webpage for https://fred.stlouisfed.org/series/MHITX48002A052NCEN.
Request succeeded for https://fred.stlouisfed.org/series/MHITX48003A052NCEN
Failed to retrieve the webpage for https://fred.stlouisfed.org/series/MHITX48004A052NCEN.
Request succeeded for https://fred.stlouisfed.org/series/MHITX48005A052NCEN
Failed to retrieve the webpage for https://fred.stlouisfed.org/series/MHITX48006A052NCEN.
Request succeeded for https://fred.stlouisfed.org/series/MHITX48007A052NCEN
Failed to retrieve the webpage for https://fred.stlouisfed.org/series/MHITX48008A052NCEN.
Request succeeded for https://fred.stlouisfed.org/series/MHITX48009A052NCEN
Failed to retrieve the webpage for https://fred.stlouisfed.org/series/MHITX48010A052NCEN.
Request succeeded for https://fred.stlouisfed.org/series/MHITX48011A052NCEN
Failed to retrieve the webpage for https://fred.stlouisfed.org/series/MHITX48012A052NCEN.
Requ

Unnamed: 0,County,HHI ID
0,Anderson County,MHITX48001A052NCEN
1,Andrews County,MHITX48003A052NCEN
2,Angelina County,MHITX48005A052NCEN
3,Aransas County,MHITX48007A052NCEN
4,Archer County,MHITX48009A052NCEN
5,Armstrong County,MHITX48011A052NCEN
6,Atascosa County,MHITX48013A052NCEN
7,Austin County,MHITX48015A052NCEN
8,Bailey County,MHITX48017A052NCEN
9,Bandera County,MHITX48019A052NCEN


In [6]:
# write the county series ids to a file. 

file_path = f"../resources/TX_County_HHI_ID.csv"  # Construct file path with .csv extension
county_series_ids_df.to_csv(file_path, index=False)