<a href="https://colab.research.google.com/github/babessell1/GWC_Test/blob/main/BLS_API_Test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pandas numpy numba



In [14]:
import requests
import json
import prettytable
headers = {'Content-type': 'application/json'}
data = json.dumps({"seriesid": ['CUUR0000SA0','SUUR0000SA0'],"startyear":"2011", "endyear":"2014"})
p = requests.post('https://api.bls.gov/publicAPI/v1/timeseries/data/', data=data, headers=headers)
json_data = json.loads(p.text)
for _series_ in json_data['Results']['series']:
    x=prettytable.PrettyTable(["series id","year","period","value","footnotes"])
    seriesId = _series_['seriesID']
    for item in _series_['data']:
        year = item['year']
        period = item['period']
        value = item['value']
        footnotes=""
        for footnote in item['footnotes']:
            if footnote:
                footnotes = footnotes + footnote['text'] + ','

        if 'M01' <= period <= 'M12':
            x.add_row([seriesId,year,period,value,footnotes[0:-1]])
    output = open(seriesId + '.txt','w')
    output.write (x.get_string())
    output.close()

KeyError: 'series'

In [9]:
# -------------------------------------------------------------------
# 1. Imports
# -------------------------------------------------------------------
import requests                # Used to make HTTP/HTTPS requests to the BLS API
import json                    # Serialize Python dicts ↔ JSON strings
import pandas as pd            # Main data-wrangling library
from datetime import datetime  # To fetch the current year if the caller omits endyear
from io import StringIO
import re
import csv

In [18]:
# -------------------------------------------------------------------
# 1. Imports
# -------------------------------------------------------------------
import requests                # Used to make HTTP/HTTPS requests to the BLS API
import json                    # Serialize Python dicts ↔ JSON strings
import pandas as pd            # Main data-wrangling library
from datetime import datetime  # To fetch the current year if the caller omits endyear


# -------------------------------------------------------------------
# 2. Helper function: query the BLS API and return a tidy DataFrame
# -------------------------------------------------------------------
def bls_query(series_list,            # List of BLS series IDs you want (e.g. ["JTS000000000..."])
              startyear='2019',       # First calendar year of data to request
              endyear=None,           # Last calendar year (defaults to current year)
              api_key=None):          # Optional: your BLS API key if you need >500 calls/day
    """
    Send one POST request to the BLS v2 timeseries endpoint and
    return a pandas DataFrame with datetime index and one column per series.
    """

    # If user didn’t pass an endyear, default to the current year (e.g. 2024)
    if endyear is None:
        endyear = datetime.now().year

    # HTTP header telling the server we are sending JSON in the body
    headers = {'Content-type': 'application/json'}

    # Build the request payload as a native Python dict first
    request_json = {
        "seriesid": series_list,      # Must be a list, even if it contains one element
        "startyear": startyear,       # Must be a string per API spec
        "endyear":  str(endyear)      # Ensure this is also a string
    }

    # Add the registration key only if the caller supplied one
    if api_key:                       # Truthy if not None and not empty string
        request_json["registrationKey"] = api_key

    # ----------------------------------------------------------------
    # Make the POST request
    # ----------------------------------------------------------------
    resp = requests.post(
        'https://api.bls.gov/publicAPI/v2/timeseries/data/',  # v1 still works, v2 is preferred
        headers=headers,
        data=json.dumps(request_json)   # Convert dict → JSON string for the body
    )

    # Raise a Python exception if we hit a 4xx/5xx HTTP error
    resp.raise_for_status()

    # Extract only the list of series objects from the JSON response
    # The response JSON structure is: {"Results":{"series":[...]}}
    data = resp.json()['Results']['series']

    # ----------------------------------------------------------------
    # Convert each series into one pandas DataFrame, then concat
    # ----------------------------------------------------------------
    df_list = []                       # Will hold one DataFrame per series
    for s in data:                     # Loop over all series the API returned
        rows = []                      # Temp list of dicts, one per observation
        for item in s['data']:         # Each "item" is a single month/period
            rows.append({
                'series_id': s['seriesID'],                    # e.g. "JTS420000000000000JOL"
                'date': f"{item['year']}-{item['period'][1:]}",# Construct "YYYY-MM" string
                'value': float(item['value'])                  # Convert numeric string → float
            })
        # Convert list of dicts → DataFrame and add it to df_list
        df_list.append(pd.DataFrame(rows))

    # Concatenate all individual series DataFrames vertically (row-wise)
    big_df = pd.concat(df_list)

    # Cast the date column from text → pandas datetime type for time-series ops
    big_df['date'] = pd.to_datetime(big_df['date'])

    # Pivot from "long" to "wide":
    #   rows  → dates
    #   cols  → series_id
    #   vals  → numeric value
    wide_df = big_df.pivot(index='date',
                           columns='series_id',
                           values='value').sort_index()

    return wide_df  # Ready for downstream analysis


# -------------------------------------------------------------------
# 3. Choose which industries (series IDs) you care about
# -------------------------------------------------------------------
# Keys are friendly names you’ll see in your final DataFrame;
# values are official BLS JOLTS series IDs (seasonally adjusted).
series_ids = {
    'JobOpenings_Total'        : 'JTS000000000000000JOL',  # All industries
    'JobOpenings_Manufacturing': 'JTS300000000000000JOL',  # Manufacturing
    'JobOpenings_Retail'       : 'JTS420000000000000JOL',  # Retail Trade
    'JobOpenings_Information'  : 'JTS500000000000000JOL',  # Information sector
    'JobOpenings_Health'       : 'JTS620000000000000JOL',  # Health Care & Social Assistance
    # Add more <friendly_name>:<series_id> pairs as needed
}


# -------------------------------------------------------------------
# 4. Pull the data from BLS for all chosen series in one shot
# -------------------------------------------------------------------
df = bls_query(                       # Call our helper
        list(series_ids.values()),    # Only the actual series IDs
        startyear='2019'              # Feel free to adjust
     )

# -------------------------------------------------------------------
# 5. Rename machine-generated column labels (series IDs) to readable names
# -------------------------------------------------------------------
df = df.rename(columns={v: k for k, v in series_ids.items()})
# Now you’ll have columns like: JobOpenings_Total, JobOpenings_Retail, etc.


# -------------------------------------------------------------------
# 6. Calculate a simple demand indicator: year-over-year % change
# -------------------------------------------------------------------
# .pct_change(periods=12) computes (current_value ‑ value_12_months_ago)/value_12_months_ago
# Multiply by 100 to express as a percentage.
yoy = df.pct_change(periods=12) * 100


# -------------------------------------------------------------------
# 7. Find the latest month available and rank industries by YoY growth
# -------------------------------------------------------------------
latest = (yoy
          .tail(1)                   # Last row → most recent month
          .T                         # Transpose so industries are rows
          .sort_values(by=yoy.tail(1).index[0])  # Sort ascending (lowest to highest)
         )

print("Year-over-year % change in job openings (latest month):")
print(latest)  # Display table in console / Jupyter cell

KeyError: 'series'