In [1]:
# Populate the data in resources for all counties
# Dependencies
import requests
from dotenv import load_dotenv
import os
import pandas as pd
from bs4 import BeautifulSoup
from requests.exceptions import ConnectTimeout, ReadTimeout, RequestException

In [2]:
# this section of code make a request and permits up to max_retries 
def get_page(url, max_retries=3):
    retries = 0
    while retries < max_retries:
        try:
            # Attempt to make the HTTP GET request with a specified timeout
            response = requests.get(url, timeout=2)  # adding a sensible timeout
            # If the request is successful, break out of the loop
            if response.status_code == 200:
                return response.text
            if response.status_code == 404:
                return None , None
        except (ConnectTimeout, ReadTimeout) as e:
            # Print an error message showing the retry attempt
            print(f"Attempt {retries + 1} of {max_retries} failed with timeout. Retrying...")
            retries += 1
            if retries == max_retries:
                print("Max retries exceeded. Failing...")
                raise  # Re-raise the last exception after final attempt
        except RequestException as e:
            print(f"Attempt {retries + 1} failed with a request exception: {e}. Retrying...")
            retries += 1
            if retries == max_retries:
                print("Max retries exceeded with request exceptions. Failing...")
                raise
        except: 
            return None, None

    # If all retries are exhausted without a return, it indicates a failure
    print("Request failed after maximum retries.")
    return None, None


In [3]:
# this section of code make a request and permits up to max_retries 
def make_request_with_retries(url, max_retries=3):
    retries = 0
    while retries < max_retries:
        try:
            # Attempt to make the HTTP GET request with a specified timeout
            response = requests.get(url, timeout=2)  # adding a sensible timeout
            # If the request is successful, break out of the loop
            if response.status_code == 200:
                if response:
                    print(f"Request succeeded for {url}")
                    # Parse the HTML content
                    soup = BeautifulSoup(response.text, 'html.parser')
                    # Find the <title> tag
                    title_tag = soup.find('title')
                    # Extract the text from the <title> tag
                    title_text = title_tag.text if title_tag else None
                    # Add the title text to the list if it exists
                    if title_text:
                        # Split the string to isolate the part before the first '|'
                        main_part = title_text.split('|')[0].strip()
                        # Extract the county name; it's always followed by ", TX"
                        county = main_part.split(", TX")[0].split('for ')[-1].strip()
                        # Extract the series ID; it's always within parentheses
                        series_id = main_part.split('(')[-1].split(')')[0].strip()
                return county,series_id
            if response.status_code == 404:
                return None , None
        except (ConnectTimeout, ReadTimeout) as e:
            # Print an error message showing the retry attempt
            print(f"Attempt {retries + 1} of {max_retries} failed with timeout. Retrying...")
            retries += 1
            if retries == max_retries:
                print("Max retries exceeded. Failing...")
                raise  # Re-raise the last exception after final attempt
        except RequestException as e:
            print(f"Attempt {retries + 1} failed with a request exception: {e}. Retrying...")
            retries += 1
            if retries == max_retries:
                print("Max retries exceeded with request exceptions. Failing...")
                raise
        except: 
            return None, None

    # If all retries are exhausted without a return, it indicates a failure
    print("Request failed after maximum retries.")
    return None, None


In [4]:
# get  a list of all counties in dallas. this will be used to build the list of request URL's for resident population.
# https://fred.stlouisfed.org/categories/29898 this page has all counties in Dallas
def extract_readable_text(html_content):
    soup = BeautifulSoup(html_content, 'html.parser')
    # Extract all paragraph texts and combine them
    text = '\n'.join(p.get_text() for p in soup.find_all('p'))
    return text
try:
    url = f'https://fred.stlouisfed.org/categories/29898'
    counties_page = get_page(url)
    print (f'this is it \n {extract_readable_text(counties_page)}')
except Exception as e:  # Catching a general exception for any other errors
    print(f"An error occurred while processing {url}: {e}")

this is it 
 Explore resources provided by the Research Division at the Federal Reserve Bank of St. Louis.
Your trusted data source
since 1991.

Categories > U.S. Regional Data > States > Texas



In [5]:
counties = []
hpi_ids = []
for index in range(1, 520):  # Adjusted range to Python syntax
    try:
        url = f'https://fred.stlouisfed.org/series/ATNHPIUS48{index:03}A'
        county,hpi_id = make_request_with_retries(url)
        if county:
                counties.append(county)
                hpi_ids.append(hpi_id)
        else:
            print(f"Failed to retrieve the webpage for {url}.")
    except Exception as e:  # Catching a general exception for any other errors
        print(f"An error occurred while processing {url}: {e}")
# Print the list to see the result
print(f"Collected {len(counties)} HPI Counties.")
county_series_ids_df = pd.DataFrame({
    'County': counties,
    'HPI ID': hpi_ids
})
county_series_ids_df.head(10)

Request succeeded for https://fred.stlouisfed.org/series/ATNHPIUS48001A
Failed to retrieve the webpage for https://fred.stlouisfed.org/series/ATNHPIUS48002A.
Request succeeded for https://fred.stlouisfed.org/series/ATNHPIUS48003A
Failed to retrieve the webpage for https://fred.stlouisfed.org/series/ATNHPIUS48004A.
Request succeeded for https://fred.stlouisfed.org/series/ATNHPIUS48005A
Failed to retrieve the webpage for https://fred.stlouisfed.org/series/ATNHPIUS48006A.
Request succeeded for https://fred.stlouisfed.org/series/ATNHPIUS48007A
Failed to retrieve the webpage for https://fred.stlouisfed.org/series/ATNHPIUS48008A.
Request succeeded for https://fred.stlouisfed.org/series/ATNHPIUS48009A
Failed to retrieve the webpage for https://fred.stlouisfed.org/series/ATNHPIUS48010A.
Failed to retrieve the webpage for https://fred.stlouisfed.org/series/ATNHPIUS48011A.
Failed to retrieve the webpage for https://fred.stlouisfed.org/series/ATNHPIUS48012A.
Request succeeded for https://fred.stl

Unnamed: 0,County,HPI ID
0,Anderson County,ATNHPIUS48001A
1,Andrews County,ATNHPIUS48003A
2,Angelina County,ATNHPIUS48005A
3,Aransas County,ATNHPIUS48007A
4,Archer County,ATNHPIUS48009A
5,Atascosa County,ATNHPIUS48013A
6,Austin County,ATNHPIUS48015A
7,Bandera County,ATNHPIUS48019A
8,Bastrop County,ATNHPIUS48021A
9,Bee County,ATNHPIUS48025A


In [6]:
# write the county series ids to a file. 

file_path = f"../resources/TX_County_HPI_ID.csv"  # Construct file path with .csv extension
county_series_ids_df.to_csv(file_path, index=False)