In [1]:
%run _startup.ipynb

In [2]:
from dotenv import load_dotenv
load_dotenv()

from openai import OpenAI
client = OpenAI()

### 1. Get the list of main FOMC TOPICS

In [3]:
df = pd.read_pickle('data/df_statements.pkl')

# skip the text after the members 
def text_shorten(x):
    x = x.split('Voting for')
    return x[0]
df['txt2'] = df['txt'].map(text_shorten)

In [4]:
# estimate the number of tokens
txt = '\n'.join(df['txt'])
print(
    len(txt),
    len(re.findall('[0-9a-z]+',txt)),
    len(re.findall('[0-9a-z]+',txt)) * 1.5,
)

# estimate number of tokens
txt = '\n'.join(df['txt2'])
print(
    len(txt),
    len(re.findall('[0-9a-z]+',txt)),
    len(re.findall('[0-9a-z]+',txt)) * 1.5,
)

142010 21405 32107.5
123132 18828 28242.0


In [5]:
txt = '\n'.join(df['txt2'][5:])
prompt1 = f"Create a list of key economic indicators from the following text: {txt}"

completion = client.chat.completions.create(
    model = "gpt-4o",
    temperature = 0,
    messages = [
        {"role": "user", "content": prompt1}
    ]
)

In [6]:
ch = completion.choices[0]
print(ch.message.content)

Based on the provided text, here is a list of key economic indicators:

1. **Labor Market Conditions**:
   - Job gains
   - Unemployment rate

2. **Economic Activity**:
   - Overall economic activity
   - Household spending
   - Business fixed investment
   - Exports

3. **Inflation**:
   - Overall inflation rate
   - Inflation for items other than food and energy
   - Market-based measures of inflation compensation
   - Survey-based measures of longer-term inflation expectations

4. **Federal Funds Rate**:
   - Target range for the federal funds rate

5. **Monetary Policy Actions**:
   - Treasury securities and agency mortgage-backed securities purchases
   - Repurchase agreement operations
   - Reduction of holdings of Treasury securities and agency debt

6. **Financial Conditions**:
   - Credit conditions for households and businesses
   - Financial and international developments

7. **Global Developments**:
   - Impact of global events such as the coronavirus pandemic and geopoliti

### 2. Get relevant indicators from FRED

In [84]:
txt_ind = '''
1. **Labor Market Conditions**:
   - Job gains
   - Unemployment rate

2. **Economic Activity**:
   - Overall economic activity
   - Household spending
   - Business fixed investment
   - Exports
   - Production

3. **Inflation**:
   - Overall inflation rate
   - Inflation for items other than food and energy
   - Market-based measures of inflation compensation
   - Survey-based measures of longer-term inflation expectations
   - Inflation pressures
   - Inflation expectations

4. **Monetary Policy Actions**:
   - Treasury securities and agency mortgage-backed securities purchases
   - Repurchase agreement operations
   - Reduction of holdings of Treasury securities and agency debt

5. **Financial Conditions**:
   - Credit conditions for households and businesses
   - Financial and international developments

6. **Global Developments**:
   - Impact of global events such as the coronavirus pandemic and geopolitical tensions

'''

In [88]:
txt_out = 'FRED Index|FRED name|Description|Units|Frequency|Group'
prompt1 = f"""
    You are acting as an economist.
    What indicators can I take from the FRED website to analyze the following economic indicators: {txt_ind},
    Create an output csv file with the following format: {txt_out},
    Separator is |
"""

completion = client.chat.completions.create(
    model = "gpt-4o",
    temperature = 1,
    messages = [
        {"role": "user", "content": prompt1}
    ]
)

In [89]:
pd.set_option('display.max_colwidth',250)
def create_csv(x):
    x = x.split('```')[1].split('\n')[1:]
    x = [x1.split('|') for x1 in x if x1 != '']
    df_out = pd.DataFrame(x[1:], columns=x[0])
    return df_out

In [90]:
ch = completion.choices[0]
df_out = create_csv(ch.message.content)
df_out

Unnamed: 0,FRED Index,FRED name,Description,Units,Frequency,Group
0,PAYEMS,Total Nonfarm Payrolls,Total nonfarm payroll employment represents job gains in thousands.,Thousands,Monthly,Labor Market Conditions
1,UNRATE,Civilian Unemployment Rate,The unemployment rate reflects the number of unemployed as a percentage of the labor force.,Percent,Monthly,Labor Market Conditions
2,GDPC1,Real Gross Domestic Product,The real GDP measures the value of economic output adjusted for price changes.,Billions of Chained 2012 Dollars,Quarterly,Economic Activity
3,PCE,Personal Consumption Expenditures,Measures household spending on goods and services.,Billions of Dollars,Monthly,Economic Activity
4,PNFICA,Private Nonresidential Fixed Investment,Private sector investments in structures and equipment.,Billions of Dollars,Quarterly,Economic Activity
5,EXPGS,Exports of Goods and Services,The value of goods and services exported by a country.,Billions of Dollars,Quarterly,Economic Activity
6,INDPRO,Industrial Production Index,"Measures the output of the industrial sector, including manufacturing, mining, and utilities.",Index 2017=100,Monthly,Economic Activity
7,CPIAUCSL,Consumer Price Index for All Urban Consumers,Reflects overall inflation rate by tracking changes in cost of a basket of consumer goods.,Index 1982-1984=100,Monthly,Inflation
8,CPILFESL,Consumer Price Index for All Urban Consumers: All Items Less Food & Energy,Excludes food and energy for core inflation analysis.,Index 1982-1984=100,Monthly,Inflation
9,T10YIE,10-Year Breakeven Inflation Rate,Market-based measure of inflation compensation.,Percent,Daily,Inflation


In [94]:
# Let's repeat several times
txt_out = 'FRED Index|FRED name|Description|Units|Frequency|Group'
prompt1 = f"""
    You are acting as an economist.
    What indicators can I take from the FRED website to analyze the following economic indicators: {txt_ind},
    Create an output csv file with the following format: {txt_out},
    Separator is |
"""

df_out = pd.DataFrame()
for tr in range(5):
    completion = client.chat.completions.create(
        model = "gpt-4o",
        temperature = 1,
        messages = [
            {"role": "user", "content": prompt1}
        ]
    )    
    ch = completion.choices[0]
    df_out1 = create_csv(ch.message.content)
    df_out = pd.concat([df_out, df_out1])
print(df_out.shape)

(87, 7)


In [95]:
# filter at least 2 times in the list
feats = txt_out.split('|')
df_cnt = df_out.groupby('FRED Index').size()
df_all = df_out.groupby('FRED Index')[feats].last().reset_index(drop = True).sort_values('Group')

print(df_all.shape)
list_ind = list(df_cnt[df_cnt > 1].index)
# df_all = df_all[df_all['FRED Index'].isin(list_ind)]
# print(df_all.shape)

(44, 6)


In [122]:
# df_all

In [123]:
# df_all.to_clipboard()

### 3. Describe indicators please

In [124]:
txt_in = '''
GDP
PNFI
PCE
INDPRO
GDPC1
EXPGS
DPCCRV1Q225SBEA
Y033RC1Q027SBEA
CFNAI
DRTSCILM
DTWEXBGS
BUSLOANS
TOTALSL
NFCI
CPILFESL
CPIAUCSL
PCEPI
PCEPILFE
MICH
PAYEMS
UNRATE
EMRATIO
WALCL
EXCSRESNS
TREAST
WSECOUT
GEPUCURRENT
'''

txt_out = 'FRED Index|FRED name|Description|Units|Frequency'
prompt1 = f"""
    You are acting as an economist.
    Take this list of indicators from FRED website: {txt_in},
    Create an output csv file with the following format: {txt_out},
    Separator is |
"""

completion = client.chat.completions.create(
    model = "gpt-4o",
    temperature = 1,
    messages = [
        {"role": "user", "content": prompt1}
    ]
)

ch = completion.choices[0]
df_out = create_csv(ch.message.content)
df_out

Unnamed: 0,FRED Index,FRED name,Description,Units,Frequency
0,GDP,Gross Domestic Product,Total value of goods produced and services provided,Billions of Dollars,Quarterly
1,PNFI,Private Nonresidential Fixed Investment,Expenditures on private sector fixed assets,Billions of Dollars,Quarterly
2,PCE,Personal Consumption Expenditures,Expenditures by households on goods and services,Billions of Dollars,Monthly
3,INDPRO,Industrial Production Index,Output of the industrial sector of the economy,Index 2017=100,Monthly
4,GDPC1,Real GDP,Inflation-adjusted value of goods produced and services provided,Billions of Chained 2012 Dollars,Quarterly
5,EXPGS,Exports of Goods and Services,Total export of goods and services,Billions of Dollars,Quarterly
6,DPCCRV1Q225SBEA,Durable Goods FRED Code,Unspecified description,Unspecified units,Unspecified frequency
7,Y033RC1Q027SBEA,Unspecified FRED Name,Unspecified description,Unspecified units,Unspecified frequency
8,CFNAI,Chicago Fed National Activity Index,A weighted average of 85 existing monthly indicators of national economic activity,"Index, 0 = historical trend",Monthly
9,DRTSCILM,Delinquency Rate on All Loans,Loan payment defaults as a percentage of total loans,Percentage,Quarterly


In [121]:
df_out.to_clipboard()

### 4. The final indicators table

In [153]:
# The final table of indexes
df_all = pd.read_clipboard()
df_all

Unnamed: 0,Group,FRED Index,Name,Description,Units,Frequency,Postprocessing
0,Economic Activity,GDP,Gross Domestic Product,Total market value of all goods and services produced in the U.S.,Billions of Dollars,Quarterly,pct12
1,Economic Activity,GDPC1,Real Gross Domestic Product,GDP adjusted for inflation,Billions of Dollars,Quarterly,pct12
2,Economic Activity,EXPGS,Exports of Goods and Services,Total value of goods and services exported,Billions of Dollars,Quarterly,pct12
3,Economic Activity,PNFI,Private Nonresidential Fixed Investment,Business investment in fixed assets,Billions of Dollars,Quarterly,pct12
4,Economic Activity,INDPRO,Industrial Production Index,Measure of industrial production output,Index 2017=100,Lag monthly,
5,Economic Activity,CFNAI,Chicago Fed National Activity Index,A weighted average of 85 indicators of national economic activity.,Index,Lag monthly,
6,Financial Conditions,TOTBKCR,"Bank Credit, All Commercial Banks","Bank Credit, All Commercial Banks",Billions of Dollars,Weekly,pct12
7,Financial Conditions,DPSACBW027SBOG,"Deposits, All Commercial Banks","Deposits, All Commercial Banks",Billions of Dollars,Weekly,pct12
8,Financial Conditions,STLFSI4,St. Louis Fed Financial Stress Index,The STLFSI4 measures the degree of financial stress in the markets and is constructed from 18 weekly data series,Index,Weekly,
9,Financial Conditions,NFCI,Chicago Fed Financial Conditions Index,"Measures overall financial conditions in money markets, debt and equity markets, and the banking systems.",Index,Weekly,


In [154]:
df_all.groupby('Group')['FRED Index'].agg(lambda x: ' '.join(x)).to_dict()

{'Economic Activity': 'GDP GDPC1 EXPGS PNFI INDPRO CFNAI',
 'Financial Conditions': 'TOTBKCR DPSACBW027SBOG STLFSI4 NFCI',
 'Inflation': 'CPILFESL CPIAUCSL PCE PCEPI PCEPILFE MICH',
 'Labor Market Conditions': 'PAYEMS UNRATE EMRATIO',
 'Monetary Policy Actions': 'WSECOUT'}

In [155]:
df_all.groupby('Frequency')['FRED Index'].agg(lambda x: ' '.join(x)).to_dict()

{'Lag monthly': 'INDPRO CFNAI PCE PCEPI PCEPILFE MICH',
 'Monthly': 'CPILFESL CPIAUCSL PAYEMS UNRATE EMRATIO',
 'Quarterly': 'GDP GDPC1 EXPGS PNFI',
 'Weekly': 'TOTBKCR DPSACBW027SBOG STLFSI4 NFCI WSECOUT'}

In [156]:
df_all.groupby('Postprocessing')['FRED Index'].agg(lambda x: ' '.join(x)).to_dict()

{'diff12': 'UNRATE EMRATIO',
 'pct12': 'GDP GDPC1 EXPGS PNFI TOTBKCR DPSACBW027SBOG CPILFESL CPIAUCSL PCE PCEPI PCEPILFE PAYEMS WSECOUT'}