# Imports

In [1]:
import pandas as pd
from datetime import datetime , timedelta
import requests
import json
import numpy as np
fred_key = ""


# Functions

In [None]:
def get_all_data_vintage_fed_0(
    series_name:str,
    output_type:int=4,
    api_key=fred_key
)-> pd.DataFrame :
    """A function specific to the fred api, that requests all the data vintage from 2010, with all the revision
    Args:
        series_name (str): series id from FRED, ex INDPRO for industrial production
        output_type (int, optional): _description_. Defaults to 4: first vintage, for all vintage use 2, last vintage 1
        api_key (_type_, optional): _description_. Defaults to fred_key.

    Returns:
        a df with as an index the date that the data relates to (for instance 2022-10-31 for October 2022 Industrial Production)
        a value column and a publication date column, to get all the data available at one date, you just use df.loc[df['publication_date']=='date']
    """
    base_url = "https://api.stlouisfed.org/fred/series/observations?"
    fred_url = f"{base_url}series_id={series_name}&api_key={api_key}&realtime_start=1986-01-01&file_type=json&output_type={output_type}"


    response = requests.get(url=fred_url)
    try:
        assert response.status_code == requests.codes.ok

        json_data = json.loads(response.content)["observations"]

        df = pd.DataFrame.from_records(json_data)

    except AssertionError:
        print("Bad URL!")

    df["date"] = pd.to_datetime(df["date"])
    
    df=df.set_index("date")
    
    if output_type==4:
        df['realtime_start']=pd.to_datetime(df['realtime_start'])
        df['value']=df['value'].replace('.',np.nan).astype(float)
        return df[['value','realtime_start']].rename(columns={'value':series_name,'realtime_start':'publication_date'})
    elif output_type==1:
        return df['value'].rename(series_name)
        #df.index = df.index + pd.offsets.MonthEnd(0)
    df=(
        df
        .dropna(axis=0,how='all')
        .replace('.',value=np.nan)
        .astype('float')
    )

    df.columns = pd.to_datetime(df.columns.str.replace(f'{series_name}_', ''))
    df=(
        df
        #.loc['2010':]
        .sort_index(axis=1,ascending=True)
        .melt(ignore_index=False)
        .rename(columns={'variable':'publication_date','value':series_name})

        .dropna()
    )
    return df


In [None]:
def get_all_data_vintage_fed(
    series_name:str,
    api_key:str = fred_key,
    output_type:int=4,
    base_url:str = "https://api.stlouisfed.org/fred/series/observations?"
)-> pd.DataFrame :
    """A function specific to the fred api, that requests all the data vintage from 2010, with all the revision
    Args:
        series_name (str): series id from FRED, ex INDPRO for industrial production
        output_type (int, optional): _description_. Defaults to 4: first vintage, for all vintage use 2, last vintage 1
        api_key (str, optional): Personal API key. Defaults to fred_key
        base_url(str) : Defaults to https://api.stlouisfed.org/fred/series/observations?

    Returns:
        a df with as an index the date that the data relates to (for instance 2022-10-31 for October 2022 Industrial Production)
        a value column and a publication date column, to get all the data available at one date, you just use df.loc[df['publication_date']=='date']
    """
    #base_url = "https://api.stlouisfed.org/fred/series/observations?"
    fred_url = f"{base_url}series_id={series_name}&api_key={api_key}&realtime_start=1986-01-01&file_type=json&output_type={output_type}"


    response = requests.get(url=fred_url)
    try:
        assert response.status_code == requests.codes.ok

        json_data = json.loads(response.content)["observations"]

        df = pd.DataFrame.from_records(json_data)

    except AssertionError:
        print("Bad URL!")

    df["date"] = pd.to_datetime(df["date"])
    
    df=df.set_index("date")
    
    if output_type==4:
        df['realtime_start']=pd.to_datetime(df['realtime_start'])
        df['value']=df['value'].replace('.',np.nan).astype(float)
        return df[['value','realtime_start']].rename(columns={'value':series_name,'realtime_start':'publication_date'})
    elif output_type==1:
        return df['value'].rename(series_name)
        #df.index = df.index + pd.offsets.MonthEnd(0)
    df=(
        df
        .dropna(axis=0,how='all')
        .replace('.',value=np.nan)
        .astype('float')
    )

    df.columns = pd.to_datetime(df.columns.str.replace(f'{series_name}_', ''))
    df=(
        df
        #.loc['2010':]
        .sort_index(axis=1,ascending=True)
        .melt(ignore_index=False)
        .rename(columns={'variable':'publication_date','value':series_name})

        .dropna()
    )
    return df

In [None]:
# Function to select a vintage from the all vintage series: i_vintage=0 means last, 1 means the one before etc

def select_one_vintage_0(df: pd.DataFrame,series_id :str,i_vintage:int=0)-> pd.DataFrame :
    #vintage_date=df.publication_date.drop_duplicates().nlargest(n=i_vintage+1,keep='last')[i_vintage]
    vintage_date=df.publication_date.drop_duplicates().nlargest(n=i_vintage+1,keep='last').iloc[i_vintage]
 
    df=(
        df
        .loc[df.publication_date==vintage_date]
        )

    df=(
        df
        .loc[~df.index.duplicated(keep='first')]
        .loc[:,['publication_date',series_id]]
    )
    return df



In [2]:

# def select_one_vintage(df: pd.DataFrame, series_id:str,i_vintage:int=0)-> pd.DataFrame:
#     """"""
#     vintage_date=df.publication_date.drop_duplicates().nlargest(n=i_vintage+1,keep='last').iloc[i_vintage]

#     df = df.loc[df.publication_date == vintage_date]

#     df = df.loc[~df.index.duplicated(keep='first'), ['publication_date', series_id]]    

#     return df

# Loading claims data

- To get all the vintages : output_type=2 in get_all_data_vintage_fed
- To get only the first vintage : output_type=4 in get_all_data_vintage_fed
- To get only the last vintage: do get_all_data_vintage_fed(...,output_type=2) and then select_one_vintage(....,i_vintage=0)

Common series name in claims: <br>
- Initial Claims SA : ICSA
- Initial Claims NSA : ICNSA
- Continuous Claims SA : CCSA
- Continuous Claims NSA : CCNSA <br>
For state claims, add the two letter state code before the series : TXICSA, CACCNSA, NYCCSA etc


/!\ I haven't found a good way to request multiple series at the same type, you have to do one request per series

In [None]:
series_name='CCSA'

df_first_nsa_0 = get_all_data_vintage_fed_0(series_name,output_type=4)[series_name]
df_all_vintage_0 = get_all_data_vintage_fed_0(series_name,output_type=2)

df_first_nsa=get_all_data_vintage_fed(series_name,output_type=4)[series_name]
df_all_vintage=get_all_data_vintage_fed(series_name,output_type=2)

print(df_first_nsa_0.equals(df_first_nsa))
print(df_all_vintage_0.equals(df_all_vintage))      


In [None]:

df_last_nsa=select_one_vintage(df_all_vintage,series_name,0)[series_name]

df_last_nsa_2=select_one_vintage_2(df_all_vintage,series_name,0)[series_name]

df_last_nsa.equals(df_last_nsa_2)

In [None]:
# Reproducing the pseudo real time claims : first vintage when available (after 2009), and last vintage before that
begin_live=df_first_nsa.first_valid_index()
df_real_time=pd.concat([df_last_nsa.loc[:begin_live].iloc[:-1],df_first_nsa],axis=0)

In [None]:
# To get the actual publication date, request the first vintage series, there is a publication_date columns
df_with_publication_date=get_all_data_vintage_fed(series_name,output_type=4)
print(df_with_publication_date)

In [None]:
df_first_nsa.info()
df_first_nsa.shape

In [None]:
df_all_vintage.shape

In [None]:
df_all_vintage.head()