From 0ac1237e1c3393191aa95095074b030002db1a97 Mon Sep 17 00:00:00 2001 From: Nivedita Singh Date: Wed, 8 Apr 2026 16:59:57 +0000 Subject: [PATCH 1/5] code fix --- .../national_1900_1959.py | 20 +++++++++++++++++-- .../national_1960_1979.py | 19 ++++++++++++++++-- .../population_estimates_by_asr/process.py | 14 ++++++++----- 3 files changed, 44 insertions(+), 9 deletions(-) diff --git a/scripts/us_census/pep/population_estimates_by_asr/national_1900_1959.py b/scripts/us_census/pep/population_estimates_by_asr/national_1900_1959.py index 4973f2df30..9a1f2589de 100644 --- a/scripts/us_census/pep/population_estimates_by_asr/national_1900_1959.py +++ b/scripts/us_census/pep/population_estimates_by_asr/national_1900_1959.py @@ -41,8 +41,24 @@ def national1900(output_folder: str): # 8=Female_NonWhiteAlone cols = ['Age', '0', '1', '2', '3', '4', '5', '6', '7', '8'] # reading the csv format input file and converting it to a dataframe - df = pd.read_csv(url,names=cols,engine='python',skiprows=9,\ - skipfooter=15,encoding='ISO-8859-1') + try: + # Check if the URL is accessible and returns a CSV + import requests + response = requests.head(url, allow_redirects=True) + if response.status_code != 200 or 'text/csv' not in response.headers.get( + 'Content-Type', ''): + print(f"Skipping {url} as it is not a CSV or not accessible.") + continue + + df = pd.read_csv(url, + names=cols, + engine='python', + skiprows=9, + skipfooter=15, + encoding='ISO-8859-1') + except Exception as e: + print(f"Error reading {url}: {e}") + continue #Writing raw data to csv df.to_csv(os.path.join( os.path.dirname(os.path.abspath(__file__)), "raw_data", diff --git a/scripts/us_census/pep/population_estimates_by_asr/national_1960_1979.py b/scripts/us_census/pep/population_estimates_by_asr/national_1960_1979.py index 3fe6808b00..77ed3d7998 100644 --- a/scripts/us_census/pep/population_estimates_by_asr/national_1960_1979.py +++ b/scripts/us_census/pep/population_estimates_by_asr/national_1960_1979.py @@ -38,8 +38,23 @@ def national1960(output_folder: str): ] # Reading the csv format input file and converting it to a dataframe. # Skipping unwanted rows from top and bottom. - df = pd.read_csv(url,names=cols,engine='python',skiprows=8,\ - skipfooter=15) + try: + # Check if the URL is accessible and returns a CSV + import requests + response = requests.head(url, allow_redirects=True) + if response.status_code != 200 or 'text/csv' not in response.headers.get( + 'Content-Type', ''): + print(f"Skipping {url} as it is not a CSV or not accessible.") + continue + + df = pd.read_csv(url, + names=cols, + engine='python', + skiprows=8, + skipfooter=15) + except Exception as e: + print(f"Error reading {url}: {e}") + continue #Writing raw data to csv df.to_csv(os.path.join( os.path.dirname(os.path.abspath(__file__)), "raw_data", diff --git a/scripts/us_census/pep/population_estimates_by_asr/process.py b/scripts/us_census/pep/population_estimates_by_asr/process.py index 1d8f124097..cfbd1bdca7 100644 --- a/scripts/us_census/pep/population_estimates_by_asr/process.py +++ b/scripts/us_census/pep/population_estimates_by_asr/process.py @@ -68,13 +68,17 @@ def add_future_year_urls(): for YEAR in range(2030, 2020, -1): url_to_check = url.format(YEAR=YEAR) try: - check_url = requests.head(url_to_check) - if check_url.status_code == 200: + check_url = requests.head(url_to_check, allow_redirects=True) + # Check both the status code AND the content type + content_type = check_url.headers.get('Content-Type', '') + + if check_url.status_code == 200 and 'text/csv' in content_type: _FILES_TO_DOWNLOAD.append({"download_path": url_to_check}) break - - except: - logging.error(f"URL is not accessable {url_to_check}") + else: + logging.warning(f"URL exists but is not a CSV: {url_to_check}") + except Exception as e: + logging.error(f"URL is not accessible {url_to_check}: {e}") MCF_TEMPLATE = ("Node: dcid:{pv1}\n" From 6870e0cc88ed5d02adb412e11adbfb3a6c222d56 Mon Sep 17 00:00:00 2001 From: Nivedita Singh Date: Thu, 9 Apr 2026 06:01:09 +0000 Subject: [PATCH 2/5] code fix --- scripts/us_census/pep/population_estimates_by_asr/process.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/us_census/pep/population_estimates_by_asr/process.py b/scripts/us_census/pep/population_estimates_by_asr/process.py index cfbd1bdca7..f00d5eab00 100644 --- a/scripts/us_census/pep/population_estimates_by_asr/process.py +++ b/scripts/us_census/pep/population_estimates_by_asr/process.py @@ -76,7 +76,8 @@ def add_future_year_urls(): _FILES_TO_DOWNLOAD.append({"download_path": url_to_check}) break else: - logging.warning(f"URL exists but is not a CSV: {url_to_check}") + logging.warning( + f"URL exists but is not a CSV: {url_to_check}") except Exception as e: logging.error(f"URL is not accessible {url_to_check}: {e}") From d104a483b1dfde5226ec35dfe710d09796ea6d9e Mon Sep 17 00:00:00 2001 From: Nivedita Singh Date: Thu, 9 Apr 2026 08:36:40 +0000 Subject: [PATCH 3/5] code fix --- .../pep/population_estimates_by_asr/national_1900_1959.py | 2 +- .../pep/population_estimates_by_asr/national_1960_1979.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/us_census/pep/population_estimates_by_asr/national_1900_1959.py b/scripts/us_census/pep/population_estimates_by_asr/national_1900_1959.py index 9a1f2589de..fb835ecfb0 100644 --- a/scripts/us_census/pep/population_estimates_by_asr/national_1900_1959.py +++ b/scripts/us_census/pep/population_estimates_by_asr/national_1900_1959.py @@ -16,6 +16,7 @@ ''' import os import pandas as pd +import requests def national1900(output_folder: str): @@ -43,7 +44,6 @@ def national1900(output_folder: str): # reading the csv format input file and converting it to a dataframe try: # Check if the URL is accessible and returns a CSV - import requests response = requests.head(url, allow_redirects=True) if response.status_code != 200 or 'text/csv' not in response.headers.get( 'Content-Type', ''): diff --git a/scripts/us_census/pep/population_estimates_by_asr/national_1960_1979.py b/scripts/us_census/pep/population_estimates_by_asr/national_1960_1979.py index 77ed3d7998..18a5292d94 100644 --- a/scripts/us_census/pep/population_estimates_by_asr/national_1960_1979.py +++ b/scripts/us_census/pep/population_estimates_by_asr/national_1960_1979.py @@ -16,6 +16,7 @@ ''' import os import pandas as pd +import requests def national1960(output_folder: str): @@ -40,7 +41,6 @@ def national1960(output_folder: str): # Skipping unwanted rows from top and bottom. try: # Check if the URL is accessible and returns a CSV - import requests response = requests.head(url, allow_redirects=True) if response.status_code != 200 or 'text/csv' not in response.headers.get( 'Content-Type', ''): From 87bc7446023fa25e525dadd514b3888aa9c3f793 Mon Sep 17 00:00:00 2001 From: Nivedita Singh Date: Mon, 13 Apr 2026 09:02:24 +0000 Subject: [PATCH 4/5] resloved comments --- .../population_estimates_by_asr/national_1900_1959.py | 10 +++++----- .../population_estimates_by_asr/national_1960_1979.py | 10 +++++----- .../pep/population_estimates_by_asr/process.py | 7 ++----- 3 files changed, 12 insertions(+), 15 deletions(-) diff --git a/scripts/us_census/pep/population_estimates_by_asr/national_1900_1959.py b/scripts/us_census/pep/population_estimates_by_asr/national_1900_1959.py index fb835ecfb0..318d179ee7 100644 --- a/scripts/us_census/pep/population_estimates_by_asr/national_1900_1959.py +++ b/scripts/us_census/pep/population_estimates_by_asr/national_1900_1959.py @@ -17,6 +17,7 @@ import os import pandas as pd import requests +from absl import logging def national1900(output_folder: str): @@ -43,11 +44,10 @@ def national1900(output_folder: str): cols = ['Age', '0', '1', '2', '3', '4', '5', '6', '7', '8'] # reading the csv format input file and converting it to a dataframe try: - # Check if the URL is accessible and returns a CSV + # Check if the URL is accessible response = requests.head(url, allow_redirects=True) - if response.status_code != 200 or 'text/csv' not in response.headers.get( - 'Content-Type', ''): - print(f"Skipping {url} as it is not a CSV or not accessible.") + if response.status_code != 200: + logging.warning(f"Skipping {url} as it is not accessible.") continue df = pd.read_csv(url, @@ -57,7 +57,7 @@ def national1900(output_folder: str): skipfooter=15, encoding='ISO-8859-1') except Exception as e: - print(f"Error reading {url}: {e}") + logging.error(f"Error reading {url}: {e}") continue #Writing raw data to csv df.to_csv(os.path.join( diff --git a/scripts/us_census/pep/population_estimates_by_asr/national_1960_1979.py b/scripts/us_census/pep/population_estimates_by_asr/national_1960_1979.py index 18a5292d94..919d2d80b5 100644 --- a/scripts/us_census/pep/population_estimates_by_asr/national_1960_1979.py +++ b/scripts/us_census/pep/population_estimates_by_asr/national_1960_1979.py @@ -17,6 +17,7 @@ import os import pandas as pd import requests +from absl import logging def national1960(output_folder: str): @@ -40,11 +41,10 @@ def national1960(output_folder: str): # Reading the csv format input file and converting it to a dataframe. # Skipping unwanted rows from top and bottom. try: - # Check if the URL is accessible and returns a CSV + # Check if the URL is accessible response = requests.head(url, allow_redirects=True) - if response.status_code != 200 or 'text/csv' not in response.headers.get( - 'Content-Type', ''): - print(f"Skipping {url} as it is not a CSV or not accessible.") + if response.status_code != 200: + logging.warning(f"Skipping {url} as it is not accessible.") continue df = pd.read_csv(url, @@ -53,7 +53,7 @@ def national1960(output_folder: str): skiprows=8, skipfooter=15) except Exception as e: - print(f"Error reading {url}: {e}") + logging.error(f"Error reading {url}: {e}") continue #Writing raw data to csv df.to_csv(os.path.join( diff --git a/scripts/us_census/pep/population_estimates_by_asr/process.py b/scripts/us_census/pep/population_estimates_by_asr/process.py index f00d5eab00..054724babf 100644 --- a/scripts/us_census/pep/population_estimates_by_asr/process.py +++ b/scripts/us_census/pep/population_estimates_by_asr/process.py @@ -69,15 +69,12 @@ def add_future_year_urls(): url_to_check = url.format(YEAR=YEAR) try: check_url = requests.head(url_to_check, allow_redirects=True) - # Check both the status code AND the content type - content_type = check_url.headers.get('Content-Type', '') - - if check_url.status_code == 200 and 'text/csv' in content_type: + if check_url.status_code == 200: _FILES_TO_DOWNLOAD.append({"download_path": url_to_check}) break else: logging.warning( - f"URL exists but is not a CSV: {url_to_check}") + f"URL is not accessible: {url_to_check}") except Exception as e: logging.error(f"URL is not accessible {url_to_check}: {e}") From 24d4a566c70dedbc34e4fb274f4ceda70369dbde Mon Sep 17 00:00:00 2001 From: Nivedita Singh Date: Mon, 13 Apr 2026 09:34:39 +0000 Subject: [PATCH 5/5] resloved comments --- scripts/us_census/pep/population_estimates_by_asr/process.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/us_census/pep/population_estimates_by_asr/process.py b/scripts/us_census/pep/population_estimates_by_asr/process.py index 054724babf..1cd9800020 100644 --- a/scripts/us_census/pep/population_estimates_by_asr/process.py +++ b/scripts/us_census/pep/population_estimates_by_asr/process.py @@ -73,8 +73,7 @@ def add_future_year_urls(): _FILES_TO_DOWNLOAD.append({"download_path": url_to_check}) break else: - logging.warning( - f"URL is not accessible: {url_to_check}") + logging.warning(f"URL is not accessible: {url_to_check}") except Exception as e: logging.error(f"URL is not accessible {url_to_check}: {e}")