# Data Processing Pipeline
This script performs several functions: it loads necessary keys, reads series identifiers, retrieves and processes data, and finally saves this data for further use in visualization.
## Load the Key
- **Description**: Load the necessary authentication key or configuration that may be needed for data access.
## Read Series ID
- **File Location**: `../resources/list_of_series.csv`
## Process Each Series
### Loop Through Each Series Row
For each row in the series list, perform the following steps:
#### 1. Data Retrieval
- **Description**: Reads the series ID from a CSV file to identify which data series to process.
- **API Call**: Use the specified API to fetch the data related to the series ID.
#### 2. Data Processing
- **Transform**: Loop through the retrieved data and organize it by year into a pandas DataFrame.
#### 3. File Naming
- **Generate Filename**: Create a unique filename for each dataset based on attributes from the `list_of_series`.
#### 4. Save Data

- **Location**: `../resources/`
- **Description**: Write the processed files to the specified directory for later use in data visualization tasks.


In [24]:
# Populate the data in resources for all counties
# Dependencies
import requests
from dotenv import load_dotenv
import os
import pandas as pd
from bs4 import BeautifulSoup

https://api.stlouisfed.org/fred/category/series?category_id=125&api_key=abcdefghijklmnopqrstuvwxyz123456
https://fred.stlouisfed.org/docs/api/fred/category_series.html#Description 

In [2]:
# Set environment variables from the .env in the local environment
def key_check(key_path=None):
    try:
        reply=load_dotenv(key_path,verbose=True,override=True)   
        assert reply , 'Dotenv is not found'
        fred_api_key = os.getenv("FRED_API_KEY")
        assert fred_api_key is not None, 'FRED_API_KEY not found in .env file'
        responce=requests.get(f'https://api.stlouisfed.org/fred/category/series?category_id=125&api_key={fred_api_key}')
        assert responce.status_code == 200, f'The key provided failed to authenticate tmdb_api_key {fred_api_key} code {responce.status_code}'
    except Exception as e:
        # Handle potential errors in loading .env or missing API keys
        print(f'An error occurred: {e}')
        return(False)
    else:
        print('All keys loaded correctly')
        return (True)
my_env_path='C:\SRC\AI\ProjectOne_Team2\Frank_playground\keys.env'
if key_check(my_env_path):
    fred_api_key = os.getenv("FRED_API_KEY")
else:
    print ('fix Keys and rerun')

All keys loaded correctly


In [3]:
# read the file into a list
list_of_series_df = pd.read_csv('C:\SRC\AI\ProjectOne_Team2\\resources\\all_tx _counties.csv')

In [4]:
#  Set the base URL
rt_start='&observation_start=1997-01-01'
rt_end='&observation_end=2022-01-01'
series_id=''
url = (f"https://api.stlouisfed.org/fred/series/observations?series_id={series_id}{rt_start}{rt_end}&api_key={fred_api_key}&file_type=json")

In [5]:
def create_filename(row):
    # Join the desired fields with underscores
    filename = '_'.join(row[['country', 'state', 'county', 'series_Desc']])
    # Replace spaces with underscores and remove any problematic characters
    filename = filename.replace(' ', '_').replace('/', '_')
    return filename

In [8]:

for index, row in list_of_series_df.iterrows():
    series_id=list_of_series_df.loc[index,'series_id']
    url = (f"https://api.stlouisfed.org/fred/series/observations?series_id={series_id}{rt_start}{rt_end}&api_key={fred_api_key}&file_type=json")
    try:
        series=[]
        response = requests.get(url)
        response.raise_for_status()  # This will raise an HTTPError if the HTTP request returned an unsuccessful status code
        series = response.json()
    except requests.exceptions.HTTPError as http_err:
        # Handle specific HTTP errors e.g., 404, 500 etc.
        print(f'error {response.status_code} series ID {series_id}')  
    except requests.exceptions.RequestException as err:
        # Handle other requests-related errors
        print(f'An error occurred: {err}')
    except Exception as e:
        # Handle other exceptions such as a JSON decoding error
        print(f'An error occurred: {e}')
    # display (series)
    # Create an empty list to store the reviews
    else:
        display (f"https://api.stlouisfed.org/fred/series/observations?series_id={series_id}{rt_start}{rt_end}&api_key={fred_api_key}&file_type=json")
        break
        series_list =[]
        for item in series["observations"]:
            if item['value'] != '.':
                series_list.append(item)      
        # Clean Data
        series_df = pd.DataFrame(series_list)
        series_df = (
            series_df
            .drop(series_df.columns[[0, 1]], axis=1)
            .assign(date=lambda df: pd.to_datetime(df['date']))
            .set_index('date')
            .dropna()
        )
        # CREATE A FILENAME AND WRITE THE DATA TO THE FILE.
        filename = create_filename(row)
        list_of_series_df['filename']=list_of_series_df.apply(create_filename, axis=1)
        file_path = f"../resources/{filename}.csv"  # Construct file path with .csv extension
        series_df.to_csv(file_path, index=True)

'https://api.stlouisfed.org/fred/series/observations?series_id=ATNHPIUS48001A&observation_start=1997-01-01&observation_end=2022-01-01&api_key=791aff06728f5b2b7056c7e789b48ad5&file_type=json'

In [7]:
list_of_series_df.to_csv('C:/SRC/AI/ProjectOne_Team2//resources/filename.csv', index=False)

In [None]:


titles = []
for index in range (1,600): 
# Send a GET request to the URL
    url = (f'https://fred.stlouisfed.org/series/ATNHPIUS48{index:03}A')
    display (url)
    response = requests.get(url)
    # Check if the request was successful
    if response.status_code == 200:
        # Parse the HTML content
        soup = BeautifulSoup(response.text, 'html.parser')
        # Find the <title> tag
        title_tag = soup.find('title')
        # Extract the text from the <title> tag
        title_text = title_tag.text if title_tag else None
        # Create a list and add the title text
        if title_text:
            titles.append(title_text)  # Add the title text to the list
    else:
        print("Failed to retrieve the webpage. Status code:", response.status_code)
# Print the list to see the result
print(titles)

In [45]:
# Initialize lists to store the extracted data
counties = []
series_ids = []
for item in titles:
    # Split the string to isolate the part before the first '|'
    main_part = item.split('|')[0].strip()
    
    # Extract the county name; it's always followed by ", TX"
    county = main_part.split(", TX")[0].split('for ')[-1].strip()
    
    # Extract the series ID; it's always within parentheses
    series_id = main_part.split('(')[-1].split(')')[0].strip()
    
    # Append to lists
    counties.append(county)
    series_ids.append(series_id)

# Create a DataFrame from the lists
county_series_id_df = pd.DataFrame({
    'County': counties,
    'Series ID': series_ids
})
county_series_id_df.head(100)
file_path = f"../resources/tx_county_Series_id.csv"  # Construct file path with .csv extension
county_series_id_df.to_csv(file_path, index=False)