From 0ac1237e1c3393191aa95095074b030002db1a97 Mon Sep 17 00:00:00 2001
From: Nivedita Singh <niveditasing@google.com>
Date: Wed, 8 Apr 2026 16:59:57 +0000
Subject: [PATCH 1/5] code fix

---
 .../national_1900_1959.py                     | 20 +++++++++++++++++--
 .../national_1960_1979.py                     | 19 ++++++++++++++++--
 .../population_estimates_by_asr/process.py    | 14 ++++++++-----
 3 files changed, 44 insertions(+), 9 deletions(-)

diff --git a/scripts/us_census/pep/population_estimates_by_asr/national_1900_1959.py b/scripts/us_census/pep/population_estimates_by_asr/national_1900_1959.py
index 4973f2df30..9a1f2589de 100644
--- a/scripts/us_census/pep/population_estimates_by_asr/national_1900_1959.py
+++ b/scripts/us_census/pep/population_estimates_by_asr/national_1900_1959.py
@@ -41,8 +41,24 @@ def national1900(output_folder: str):
         # 8=Female_NonWhiteAlone
         cols = ['Age', '0', '1', '2', '3', '4', '5', '6', '7', '8']
         # reading the csv format input file and converting it to a dataframe
-        df = pd.read_csv(url,names=cols,engine='python',skiprows=9,\
-            skipfooter=15,encoding='ISO-8859-1')
+        try:
+            # Check if the URL is accessible and returns a CSV
+            import requests
+            response = requests.head(url, allow_redirects=True)
+            if response.status_code != 200 or 'text/csv' not in response.headers.get(
+                    'Content-Type', ''):
+                print(f"Skipping {url} as it is not a CSV or not accessible.")
+                continue
+
+            df = pd.read_csv(url,
+                             names=cols,
+                             engine='python',
+                             skiprows=9,
+                             skipfooter=15,
+                             encoding='ISO-8859-1')
+        except Exception as e:
+            print(f"Error reading {url}: {e}")
+            continue
         #Writing raw data to csv
         df.to_csv(os.path.join(
             os.path.dirname(os.path.abspath(__file__)), "raw_data",
diff --git a/scripts/us_census/pep/population_estimates_by_asr/national_1960_1979.py b/scripts/us_census/pep/population_estimates_by_asr/national_1960_1979.py
index 3fe6808b00..77ed3d7998 100644
--- a/scripts/us_census/pep/population_estimates_by_asr/national_1960_1979.py
+++ b/scripts/us_census/pep/population_estimates_by_asr/national_1960_1979.py
@@ -38,8 +38,23 @@ def national1960(output_folder: str):
         ]
         # Reading the csv format input file and converting it to a dataframe.
         # Skipping unwanted rows from top and bottom.
-        df = pd.read_csv(url,names=cols,engine='python',skiprows=8,\
-            skipfooter=15)
+        try:
+            # Check if the URL is accessible and returns a CSV
+            import requests
+            response = requests.head(url, allow_redirects=True)
+            if response.status_code != 200 or 'text/csv' not in response.headers.get(
+                    'Content-Type', ''):
+                print(f"Skipping {url} as it is not a CSV or not accessible.")
+                continue
+
+            df = pd.read_csv(url,
+                             names=cols,
+                             engine='python',
+                             skiprows=8,
+                             skipfooter=15)
+        except Exception as e:
+            print(f"Error reading {url}: {e}")
+            continue
         #Writing raw data to csv
         df.to_csv(os.path.join(
             os.path.dirname(os.path.abspath(__file__)), "raw_data",
diff --git a/scripts/us_census/pep/population_estimates_by_asr/process.py b/scripts/us_census/pep/population_estimates_by_asr/process.py
index 1d8f124097..cfbd1bdca7 100644
--- a/scripts/us_census/pep/population_estimates_by_asr/process.py
+++ b/scripts/us_census/pep/population_estimates_by_asr/process.py
@@ -68,13 +68,17 @@ def add_future_year_urls():
         for YEAR in range(2030, 2020, -1):
             url_to_check = url.format(YEAR=YEAR)
             try:
-                check_url = requests.head(url_to_check)
-                if check_url.status_code == 200:
+                check_url = requests.head(url_to_check, allow_redirects=True)
+                # Check both the status code AND the content type
+                content_type = check_url.headers.get('Content-Type', '')
+
+                if check_url.status_code == 200 and 'text/csv' in content_type:
                     _FILES_TO_DOWNLOAD.append({"download_path": url_to_check})
                     break
-
-            except:
-                logging.error(f"URL is not accessable {url_to_check}")
+                else:
+                    logging.warning(f"URL exists but is not a CSV: {url_to_check}")
+            except Exception as e:
+                logging.error(f"URL is not accessible {url_to_check}: {e}")
 
 
 MCF_TEMPLATE = ("Node: dcid:{pv1}\n"

From 6870e0cc88ed5d02adb412e11adbfb3a6c222d56 Mon Sep 17 00:00:00 2001
From: Nivedita Singh <niveditasing@google.com>
Date: Thu, 9 Apr 2026 06:01:09 +0000
Subject: [PATCH 2/5] code fix

---
 scripts/us_census/pep/population_estimates_by_asr/process.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/us_census/pep/population_estimates_by_asr/process.py b/scripts/us_census/pep/population_estimates_by_asr/process.py
index cfbd1bdca7..f00d5eab00 100644
--- a/scripts/us_census/pep/population_estimates_by_asr/process.py
+++ b/scripts/us_census/pep/population_estimates_by_asr/process.py
@@ -76,7 +76,8 @@ def add_future_year_urls():
                     _FILES_TO_DOWNLOAD.append({"download_path": url_to_check})
                     break
                 else:
-                    logging.warning(f"URL exists but is not a CSV: {url_to_check}")
+                    logging.warning(
+                        f"URL exists but is not a CSV: {url_to_check}")
             except Exception as e:
                 logging.error(f"URL is not accessible {url_to_check}: {e}")
 

From d104a483b1dfde5226ec35dfe710d09796ea6d9e Mon Sep 17 00:00:00 2001
From: Nivedita Singh <niveditasing@google.com>
Date: Thu, 9 Apr 2026 08:36:40 +0000
Subject: [PATCH 3/5] code fix

---
 .../pep/population_estimates_by_asr/national_1900_1959.py       | 2 +-
 .../pep/population_estimates_by_asr/national_1960_1979.py       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/us_census/pep/population_estimates_by_asr/national_1900_1959.py b/scripts/us_census/pep/population_estimates_by_asr/national_1900_1959.py
index 9a1f2589de..fb835ecfb0 100644
--- a/scripts/us_census/pep/population_estimates_by_asr/national_1900_1959.py
+++ b/scripts/us_census/pep/population_estimates_by_asr/national_1900_1959.py
@@ -16,6 +16,7 @@
 '''
 import os
 import pandas as pd
+import requests
 
 
 def national1900(output_folder: str):
@@ -43,7 +44,6 @@ def national1900(output_folder: str):
         # reading the csv format input file and converting it to a dataframe
         try:
             # Check if the URL is accessible and returns a CSV
-            import requests
             response = requests.head(url, allow_redirects=True)
             if response.status_code != 200 or 'text/csv' not in response.headers.get(
                     'Content-Type', ''):
diff --git a/scripts/us_census/pep/population_estimates_by_asr/national_1960_1979.py b/scripts/us_census/pep/population_estimates_by_asr/national_1960_1979.py
index 77ed3d7998..18a5292d94 100644
--- a/scripts/us_census/pep/population_estimates_by_asr/national_1960_1979.py
+++ b/scripts/us_census/pep/population_estimates_by_asr/national_1960_1979.py
@@ -16,6 +16,7 @@
 '''
 import os
 import pandas as pd
+import requests
 
 
 def national1960(output_folder: str):
@@ -40,7 +41,6 @@ def national1960(output_folder: str):
         # Skipping unwanted rows from top and bottom.
         try:
             # Check if the URL is accessible and returns a CSV
-            import requests
             response = requests.head(url, allow_redirects=True)
             if response.status_code != 200 or 'text/csv' not in response.headers.get(
                     'Content-Type', ''):

From 87bc7446023fa25e525dadd514b3888aa9c3f793 Mon Sep 17 00:00:00 2001
From: Nivedita Singh <niveditasing@google.com>
Date: Mon, 13 Apr 2026 09:02:24 +0000
Subject: [PATCH 4/5] resloved comments

---
 .../population_estimates_by_asr/national_1900_1959.py  | 10 +++++-----
 .../population_estimates_by_asr/national_1960_1979.py  | 10 +++++-----
 .../pep/population_estimates_by_asr/process.py         |  7 ++-----
 3 files changed, 12 insertions(+), 15 deletions(-)

diff --git a/scripts/us_census/pep/population_estimates_by_asr/national_1900_1959.py b/scripts/us_census/pep/population_estimates_by_asr/national_1900_1959.py
index fb835ecfb0..318d179ee7 100644
--- a/scripts/us_census/pep/population_estimates_by_asr/national_1900_1959.py
+++ b/scripts/us_census/pep/population_estimates_by_asr/national_1900_1959.py
@@ -17,6 +17,7 @@
 import os
 import pandas as pd
 import requests
+from absl import logging
 
 
 def national1900(output_folder: str):
@@ -43,11 +44,10 @@ def national1900(output_folder: str):
         cols = ['Age', '0', '1', '2', '3', '4', '5', '6', '7', '8']
         # reading the csv format input file and converting it to a dataframe
         try:
-            # Check if the URL is accessible and returns a CSV
+            # Check if the URL is accessible
             response = requests.head(url, allow_redirects=True)
-            if response.status_code != 200 or 'text/csv' not in response.headers.get(
-                    'Content-Type', ''):
-                print(f"Skipping {url} as it is not a CSV or not accessible.")
+            if response.status_code != 200:
+                logging.warning(f"Skipping {url} as it is not accessible.")
                 continue
 
             df = pd.read_csv(url,
@@ -57,7 +57,7 @@ def national1900(output_folder: str):
                              skipfooter=15,
                              encoding='ISO-8859-1')
         except Exception as e:
-            print(f"Error reading {url}: {e}")
+            logging.error(f"Error reading {url}: {e}")
             continue
         #Writing raw data to csv
         df.to_csv(os.path.join(
diff --git a/scripts/us_census/pep/population_estimates_by_asr/national_1960_1979.py b/scripts/us_census/pep/population_estimates_by_asr/national_1960_1979.py
index 18a5292d94..919d2d80b5 100644
--- a/scripts/us_census/pep/population_estimates_by_asr/national_1960_1979.py
+++ b/scripts/us_census/pep/population_estimates_by_asr/national_1960_1979.py
@@ -17,6 +17,7 @@
 import os
 import pandas as pd
 import requests
+from absl import logging
 
 
 def national1960(output_folder: str):
@@ -40,11 +41,10 @@ def national1960(output_folder: str):
         # Reading the csv format input file and converting it to a dataframe.
         # Skipping unwanted rows from top and bottom.
         try:
-            # Check if the URL is accessible and returns a CSV
+            # Check if the URL is accessible
             response = requests.head(url, allow_redirects=True)
-            if response.status_code != 200 or 'text/csv' not in response.headers.get(
-                    'Content-Type', ''):
-                print(f"Skipping {url} as it is not a CSV or not accessible.")
+            if response.status_code != 200:
+                logging.warning(f"Skipping {url} as it is not accessible.")
                 continue
 
             df = pd.read_csv(url,
@@ -53,7 +53,7 @@ def national1960(output_folder: str):
                              skiprows=8,
                              skipfooter=15)
         except Exception as e:
-            print(f"Error reading {url}: {e}")
+            logging.error(f"Error reading {url}: {e}")
             continue
         #Writing raw data to csv
         df.to_csv(os.path.join(
diff --git a/scripts/us_census/pep/population_estimates_by_asr/process.py b/scripts/us_census/pep/population_estimates_by_asr/process.py
index f00d5eab00..054724babf 100644
--- a/scripts/us_census/pep/population_estimates_by_asr/process.py
+++ b/scripts/us_census/pep/population_estimates_by_asr/process.py
@@ -69,15 +69,12 @@ def add_future_year_urls():
             url_to_check = url.format(YEAR=YEAR)
             try:
                 check_url = requests.head(url_to_check, allow_redirects=True)
-                # Check both the status code AND the content type
-                content_type = check_url.headers.get('Content-Type', '')
-
-                if check_url.status_code == 200 and 'text/csv' in content_type:
+                if check_url.status_code == 200:
                     _FILES_TO_DOWNLOAD.append({"download_path": url_to_check})
                     break
                 else:
                     logging.warning(
-                        f"URL exists but is not a CSV: {url_to_check}")
+                        f"URL is not accessible: {url_to_check}")
             except Exception as e:
                 logging.error(f"URL is not accessible {url_to_check}: {e}")
 

From 24d4a566c70dedbc34e4fb274f4ceda70369dbde Mon Sep 17 00:00:00 2001
From: Nivedita Singh <niveditasing@google.com>
Date: Mon, 13 Apr 2026 09:34:39 +0000
Subject: [PATCH 5/5] resloved comments

---
 scripts/us_census/pep/population_estimates_by_asr/process.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/scripts/us_census/pep/population_estimates_by_asr/process.py b/scripts/us_census/pep/population_estimates_by_asr/process.py
index 054724babf..1cd9800020 100644
--- a/scripts/us_census/pep/population_estimates_by_asr/process.py
+++ b/scripts/us_census/pep/population_estimates_by_asr/process.py
@@ -73,8 +73,7 @@ def add_future_year_urls():
                     _FILES_TO_DOWNLOAD.append({"download_path": url_to_check})
                     break
                 else:
-                    logging.warning(
-                        f"URL is not accessible: {url_to_check}")
+                    logging.warning(f"URL is not accessible: {url_to_check}")
             except Exception as e:
                 logging.error(f"URL is not accessible {url_to_check}: {e}")