# Index Return data

- using yahoo finance for historical stock return data 
- going to need the following
    - Dates of the announcement
    - Index returns 10 days before the announcement
    - Index returns the day of the announcement
    - Index returns 10 days after the announcement 

In [1]:
import pandas as pd
import yfinance as yf

# Load Dates for Statements & Intermeetings
dates_statements = pd.read_csv('dates/dates_updated.csv')
dates_statements['Statement Date'] = pd.to_datetime(dates_statements['Statement Date'], errors='coerce')
dates_statements['document_type'] = 'statement'

dates_intermeetings = pd.read_csv('dates/dates_updated.csv')
dates_intermeetings['Intermeeting Date'] = pd.to_datetime(dates_intermeetings['Intermeeting Date'], errors='coerce')
dates_intermeetings['document_type'] = 'intermeeting'

  dates_statements['Statement Date'] = pd.to_datetime(dates_statements['Statement Date'], errors='coerce')
  dates_intermeetings['Intermeeting Date'] = pd.to_datetime(dates_intermeetings['Intermeeting Date'], errors='coerce')


In [2]:
tickers = [
    '^GSPC', '^IXIC', '^DJI', '^RUT', '^W5000',
    'XLF', 'XLRE', 'XLU', 'XLY', 'XLP',
    'XLE', 'XLV', 'XLI', 'XLB', 'XLK', 'XLC',
    '^IRX', '^TNX'
]

In [3]:
# Load Combined Returns Data
combined_returns = pd.read_csv('raw_data/combined_dates.csv')
print(combined_returns.columns.tolist())

# Define T-10 to T+10 columns
t_columns = [f'T{t}' if t < 0 else f'T+{t}' if t > 0 else 'T0' for t in range(-10, 11)]

# Build dictionary by ticker
all_indices_data = {}

for ticker in combined_returns['ticker'].unique():
    ticker_data = combined_returns[combined_returns['ticker'] == ticker]
    ticker_data = ticker_data.set_index('announcement_date')  # Set date as index
    ticker_data.index = pd.to_datetime(ticker_data.index)  # Ensure datetime index
    all_indices_data[ticker] = ticker_data

print("All index data loaded.")

['meeting_id', 'announcement_date', 'ticker', 'document_type', 'T-15', 'T-14', 'T-13', 'T-12', 'T-11', 'T-10', 'T-9', 'T-8', 'T-7', 'T-6', 'T-5', 'T-4', 'T-3', 'T-2', 'T-1', 'T+0', 'T+1', 'T+2', 'T+3', 'T+4', 'T+5', 'T+6', 'T+7', 'T+8', 'T+9', 'T+10', 'T+11', 'T+12', 'T+13', 'T+14', 'T+15']
All index data loaded.


In [4]:
all_indices_data['^DJI']['T+0']

announcement_date
2000-02-02   -0.003428
2000-03-21    0.021264
2000-03-23    0.023297
2000-05-16    0.011731
2000-05-18    0.000700
                ...   
2025-01-08    0.002512
2025-01-29   -0.003051
2025-02-19    0.001599
2025-03-19    0.009219
2025-04-09    0.078704
Name: T+0, Length: 411, dtype: float64

In [5]:
# Define your get_trading_window function
def get_trading_window(trading_dates, fed_date, returns_series, window=10):
    try:
        fed_idx = trading_dates.get_loc(fed_date)
    except KeyError:
        idx_array = trading_dates.get_indexer([fed_date], method='ffill')
        fed_idx = idx_array[0]
        if fed_idx == -1:
            return None

    if fed_idx < 0 or fed_idx >= len(returns_series):
        return None

    result = {}
    for t in range(-window, 0):
        if 0 <= fed_idx + t < len(returns_series):
            result[f'T{t:+}'] = returns_series.iloc[fed_idx + t]
        else:
            result[f'T{t:+}'] = pd.NA
    if 0 <= fed_idx < len(returns_series):
        result['T0'] = returns_series.iloc[fed_idx]
    else:
        result['T0'] = pd.NA
    for t in range(1, window + 1):
        if 0 <= fed_idx + t < len(returns_series):
            result[f'T{t:+}'] = returns_series.iloc[fed_idx + t]
        else:
            result[f'T{t:+}'] = pd.NA

    return result

In [6]:
# Process Statement Prices
rows = []

for idx, row_fomc in dates_statements.iterrows():
    date = row_fomc['Statement Date']
    document_type = row_fomc['document_type']

    for ticker in all_indices_data.keys():
        ticker_data = all_indices_data[ticker]

        if date not in ticker_data.index:
            continue  # skip if no data

        row = {'announcement_date': date, 'ticker': ticker, 'document_type': document_type}

        available_returns = ticker_data.loc[date]

        # Find valid pre-event returns
        pre_event_returns = []
        for t in range(-15, 0):
            col_name = f'T{t}'
            if col_name in available_returns and pd.notna(available_returns[col_name]):
                pre_event_returns.append((t, available_returns[col_name]))
            if len(pre_event_returns) == 10:
                break

        # Find valid post-event returns
        post_event_returns = []
        for t in range(1, 16):
            col_name = f'T+{t}'
            if col_name in available_returns and pd.notna(available_returns[col_name]):
                post_event_returns.append((t, available_returns[col_name]))
            if len(post_event_returns) == 10:
                break

        # Event day (T0)
        t0_value = available_returns.get('T+0', pd.NA)

        # Always create full structure: T-10 to T-1, T0, T+1 to T+10
        for i in range(-10, 0):
            if len(pre_event_returns) >= abs(i):
                row[f'T{i}'] = pre_event_returns[i + 10 - 1][1]  # -10 is idx 0
            else:
                row[f'T{i}'] = pd.NA

        row['T0'] = t0_value

        for i in range(1, 11):
            if len(post_event_returns) >= i:
                row[f'T+{i}'] = post_event_returns[i - 1][1]
            else:
                row[f'T+{i}'] = pd.NA

        rows.append(row)

statements_df = pd.DataFrame(rows)

column_order = ['announcement_date', 'ticker', 'document_type'] + [f'T{t}' if t < 0 else f'T+{t}' if t > 0 else 'T0' for t in range(-10, 11)]
for col in column_order:
    if col not in statements_df.columns:
        statements_df[col] = pd.NA
statements_df = statements_df[column_order]

print("Statement prices processed.")

Statement prices processed.


In [7]:
statements_df

Unnamed: 0,announcement_date,ticker,document_type,T-10,T-9,T-8,T-7,T-6,T-5,T-4,...,T+1,T+2,T+3,T+4,T+5,T+6,T+7,T+8,T+9,T+10
0,2000-02-02,^GSPC,statement,0.010628,0.000522,-0.007095,-0.002912,-0.027634,0.006065,-0.004213,...,0.011248,-0.000421,-0.000091,0.012273,-0.020815,0.003627,-0.020969,0.002033,0.008713,-0.010256
1,2000-02-02,^TNX,statement,-0.0075,-0.003408,0.00342,0.00489,-0.014305,0.0,-0.000598,...,-0.018642,0.006487,0.017493,-0.004826,0.010608,-0.001949,-0.005258,-0.012385,0.001988,-0.000763
2,2000-02-02,^IRX,statement,0.003617,0.022945,-0.009346,0.0,0.00566,0.013133,0.001852,...,-0.012727,0.01105,-0.003643,0.009141,-0.005435,0.001822,-0.003636,-0.00365,0.021978,-0.005376
3,2000-02-02,XLC,statement,,,,,,,,...,,,,,,,,,,
4,2000-02-02,XLK,statement,0.021014,0.001765,0.005868,0.001167,-0.032634,0.021385,-0.032733,...,0.028846,0.011682,0.011547,0.009132,-0.012443,0.023769,-0.026573,0.000574,0.002872,-0.005154
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3703,2025-03-19,^W5000,statement,0.00748,-0.012261,0.011536,-0.018777,0.005135,-0.027938,-0.006469,...,-0.002582,0.000463,0.019161,0.000932,-0.011556,-0.004142,-0.020066,0.004373,0.003932,0.00802
3704,2025-03-19,^RUT,statement,0.011854,-0.010803,0.010204,-0.01628,0.004321,-0.027179,0.002239,...,-0.00646,-0.005632,0.025474,-0.006637,-0.010284,-0.00392,-0.02054,-0.005615,0.000164,0.016459
3705,2025-03-19,^DJI,statement,0.008519,-0.015518,0.01142,-0.009941,0.005229,-0.020794,-0.01141,...,-0.000269,0.000763,0.014242,0.000098,-0.003116,-0.003653,-0.016922,0.010049,-0.000281,0.005605
3706,2025-03-19,^IXIC,statement,0.003074,-0.003544,0.014633,-0.026059,0.007026,-0.040003,-0.001845,...,-0.003333,0.005224,0.022747,0.004578,-0.020405,-0.005307,-0.027019,-0.001368,0.008706,0.008663


In [8]:
# Process Intermeeting Prices
rows = []

for idx, row_fomc in dates_intermeetings.iterrows():
    date = row_fomc['Intermeeting Date']
    document_type = row_fomc['document_type']

    for ticker in all_indices_data.keys():
        ticker_data = all_indices_data[ticker]

        if date not in ticker_data.index:
            continue  # skip if no data

        row = {'announcement_date': date, 'ticker': ticker, 'document_type': document_type}

        available_returns = ticker_data.loc[date]

        # Find valid pre-event returns
        pre_event_returns = []
        for t in range(-15, 0):
            col_name = f'T{t}'
            if col_name in available_returns.index and pd.notna(available_returns[col_name]):
                pre_event_returns.append((t, available_returns[col_name]))
            if len(pre_event_returns) == 10:
                break

        # Find valid post-event returns
        post_event_returns = []
        for t in range(1, 16):
            col_name = f'T+{t}'
            if col_name in available_returns.index and pd.notna(available_returns[col_name]):
                post_event_returns.append((t, available_returns[col_name]))
            if len(post_event_returns) == 10:
                break

        # Event day (T0)
        t0_value = available_returns.get('T+0', pd.NA)

        # Always create full structure: T-10 to T-1
        for i in range(-10, 0):
            idx = abs(i) - 1
            if idx < len(pre_event_returns):
                row[f'T{i}'] = pre_event_returns[idx][1]
            else:
                row[f'T{i}'] = pd.NA
        
        # Event day
        row['T0'] = t0_value
        
        # T+1 to T+10
        for i in range(1, 11):
            idx = i - 1
            if idx < len(post_event_returns):
                row[f'T+{i}'] = post_event_returns[idx][1]
            else:
                row[f'T+{i}'] = pd.NA


        rows.append(row)

intermeeting_df = pd.DataFrame(rows)

column_order = ['announcement_date', 'ticker', 'document_type'] + [f'T{t}' if t < 0 else f'T+{t}' if t > 0 else 'T0' for t in range(-10, 11)]
for col in column_order:
    if col not in intermeeting_df.columns:
        intermeeting_df[col] = pd.NA
intermeeting_df = intermeeting_df[column_order]

print("Intermeeting prices processed.")

Intermeeting prices processed.


In [9]:
intermeeting_df

Unnamed: 0,announcement_date,ticker,document_type,T-10,T-9,T-8,T-7,T-6,T-5,T-4,...,T+1,T+2,T+3,T+4,T+5,T+6,T+7,T+8,T+9,T+10
0,2000-03-23,^GSPC,intermeeting,0.004532,0.025566,-0.005353,0.004114,0.047646,0.024273,-0.017685,...,0.000072,-0.002357,-0.010585,0.000524,-0.013656,0.007164,0.004931,-0.007464,-0.004924,0.009392
1,2000-03-23,^TNX,intermeeting,-0.00277,-0.00551,-0.002747,-0.008015,-0.006055,-0.003493,-0.01006,...,0.017133,0.003401,-0.004036,-0.003404,-0.013661,-0.006925,-0.006143,-0.024223,0.007019,0.00561
2,2000-03-23,^IRX,intermeeting,0.0,0.001745,0.005263,0.0,0.001757,-0.003503,0.0,...,-0.001745,-0.006993,0.005282,0.001751,-0.001748,0.001751,-0.005245,-0.003515,0.005291,0.001754
3,2000-03-23,XLC,intermeeting,,,,,,,,...,,,,,,,,,,
4,2000-03-23,XLK,intermeeting,0.017365,0.026206,-0.004176,0.023505,0.034826,-0.017382,-0.022823,...,0.008621,0.008058,-0.024225,-0.029295,-0.02711,0.018928,-0.042312,-0.009698,0.006529,0.012973
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3685,2025-04-09,^W5000,intermeeting,-0.002977,-0.05874,-0.05105,0.00802,0.003932,0.004373,-0.020066,...,-0.035874,0.017714,0.008244,-0.001298,-0.021046,0.002311,-0.023624,0.025441,0.016976,
3686,2025-04-09,^RUT,intermeeting,-0.009245,-0.043715,-0.06591,0.016459,0.000164,-0.005615,-0.02054,...,-0.042741,0.015731,0.011117,0.001085,-0.010324,0.009198,-0.021429,0.027147,0.015268,
3687,2025-04-09,^DJI,intermeeting,-0.009115,-0.055026,-0.039772,0.005605,-0.000281,0.010049,-0.016922,...,-0.02499,0.015635,0.007761,-0.003845,-0.017329,-0.013289,-0.024828,0.026632,0.010707,
3688,2025-04-09,^IXIC,intermeeting,0.000992,-0.058174,-0.059681,0.008663,0.008706,-0.001368,-0.027019,...,-0.043075,0.020574,0.006399,-0.000494,-0.030673,-0.00127,-0.025515,0.027063,0.025007,


In [11]:
combined_df = pd.concat([statements_df, intermeeting_df], ignore_index=True)
combined_df = combined_df.dropna(subset=['announcement_date'])
combined_df = combined_df.sort_values(by='announcement_date').reset_index(drop=True)
combined_df['meeting_id'] = combined_df['announcement_date'].rank(method='dense').astype(int)
combined_df = combined_df[['meeting_id'] + [col for col in combined_df.columns if col != 'meeting_id']]
combined_df

Unnamed: 0,meeting_id,announcement_date,ticker,document_type,T-10,T-9,T-8,T-7,T-6,T-5,...,T+1,T+2,T+3,T+4,T+5,T+6,T+7,T+8,T+9,T+10
0,1,2000-02-02,^GSPC,statement,0.010628,0.000522,-0.007095,-0.002912,-0.027634,0.006065,...,0.011248,-0.000421,-0.000091,0.012273,-0.020815,0.003627,-0.020969,0.002033,0.008713,-0.010256
1,1,2000-02-02,XLY,statement,0.036186,-0.002085,-0.013584,-0.020656,-0.037318,0.00618,...,0.003906,-0.001667,-0.021158,0.022753,-0.016685,-0.00905,-0.014269,0.005211,0.002304,-0.03908
2,1,2000-02-02,^IXIC,statement,0.02833,0.004958,0.009207,0.010954,-0.032894,0.017414,...,0.033633,0.007875,0.018291,0.024465,-0.014514,0.02805,-0.020104,0.005255,0.000502,0.001556
3,1,2000-02-02,^DJI,statement,0.009188,-0.006173,-0.012016,-0.008773,-0.021645,0.001973,...,0.000931,-0.004507,-0.005291,0.004751,-0.023585,-0.00519,-0.020521,0.009077,0.018845,-0.014618
4,1,2000-02-02,^RUT,statement,0.015154,0.012776,0.013961,0.012631,-0.020583,-0.002601,...,0.023025,0.007457,0.013073,0.009579,-0.002772,0.011586,-0.009424,0.005288,0.000556,0.01392
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7393,408,2025-04-09,^IRX,intermeeting,-0.001203,-0.007168,-0.004283,0.000714,0.001669,0.001194,...,-0.007332,0.003812,-0.004272,0.001192,0.001905,-0.000713,0.001189,0.000713,-0.001187,
7394,408,2025-04-09,^TNX,intermeeting,0.04266,-0.017263,-0.033603,0.009625,-0.021196,-0.002115,...,-0.001364,0.022531,-0.028711,-0.009395,-0.010178,0.01262,0.016617,-0.003632,-0.000456,
7395,408,2025-04-09,^IXIC,intermeeting,0.000992,-0.058174,-0.059681,0.008663,0.008706,-0.001368,...,-0.043075,0.020574,0.006399,-0.000494,-0.030673,-0.00127,-0.025515,0.027063,0.025007,
7396,408,2025-04-09,XLE,intermeeting,-0.006475,-0.091999,-0.078508,0.001383,0.005886,0.011035,...,-0.065323,0.024812,0.003549,-0.001516,0.008223,0.022585,-0.026135,0.02545,-0.001843,


In [6]:
import pandas as pd
import yfinance as yf
from datetime import datetime, timedelta

In [46]:

dates = pd.read_csv('dates/dates_updated.csv')
dates['Statement Date'] = pd.to_datetime(dates['Statement Date'])
dates['document_type'] = 'statement' 

tickers = [
    '^GSPC',     # S&P 500
    '^IXIC',     # NASDAQ Composite
    '^DJI',      # Dow Jones Industrial Average
    '^RUT',      # Russell 2000
    '^W5000',    # Wilshire 5000
    'XLF',       # Financials Sector (ETF)
    'XLRE',      # Real Estate Sector (ETF)
    'XLU',       # Utilities Sector (ETF)
    'XLY',       # Consumer Discretionary Sector (ETF)
    'XLP',       # Consumer Staples Sector (ETF)
    'XLE',       # Energy Sector (ETF)
    'XLV',       # Healthcare Sector (ETF)
    'XLI',       # Industrials Sector (ETF)
    'XLB',       # Materials Sector (ETF)
    'XLK',       # Information Technology Sector (ETF)
    'XLC',       # Communication Services Sector (ETF)
    '^IRX',      # Three-month Treasury Bill Yield
    '^TNX',      # Ten-year Treasury Yield
]

start_date = dates['Statement Date'].min() - pd.Timedelta(days=15)
end_date = dates['Statement Date'].max() + pd.Timedelta(days=15)

all_indices_data = {}
for ticker in tickers:
    #print(f"Downloading data for {ticker}...")
    data = yf.download(ticker, start=start_date, end=end_date)
    data.columns = data.columns.get_level_values(0)
    data['return'] = data['Close'].pct_change()
    all_indices_data[ticker] = data[['return']].dropna()
    #print(f"Data for {ticker} downloaded.")

rows = []

for index, row_fomc in dates.iterrows():
    date = row_fomc['Statement Date']
    document_type = row_fomc['document_type']
    for ticker in tickers:
        row = {'announcement_date': date, 'ticker': ticker, 'document_type': document_type} 
        for t in range(-15, 16):
            target_date = date + pd.Timedelta(days=t)
            if target_date in all_indices_data[ticker].index:
                row[f'T{t:+}'] = all_indices_data[ticker].loc[target_date, 'return']
            else:
                row[f'T{t:+}'] = pd.NA
        rows.append(row)

statements_df = pd.DataFrame(rows)

column_order = ['announcement_date', 'ticker', 'document_type'] + [f'T{t:+}' for t in range(-15, 16)]
statements_df = statements_df[column_order]

statements_df

statements_df.to_csv('raw_data/statement_prices.csv', index=False)

  dates['Statement Date'] = pd.to_datetime(dates['Statement Date'])
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%*********

### Problem
Need to figure out how to get the days for around the intermeeting dates now because 
the dates in the html links are the same as the fed statements for those, which would cause a problem 
when getting return data.

How to get around this?
Find a dataset with the dates of the intermeeting or make our own (just manually do it - would take an hour maybe)

Ended up just manually doing this

In [47]:

dates = pd.read_csv('dates/dates_updated.csv')
dates['Intermeeting Date'] = pd.to_datetime(dates['Intermeeting Date'])
dates['document_type'] = 'intermeeting' 

tickers = [
    '^GSPC',     # S&P 500
    '^IXIC',     # NASDAQ Composite
    '^DJI',      # Dow Jones Industrial Average
    '^RUT',      # Russell 2000
    '^W5000',    # Wilshire 5000
    'XLF',       # Financials Sector (ETF)
    'XLRE',      # Real Estate Sector (ETF)
    'XLU',       # Utilities Sector (ETF)
    'XLY',       # Consumer Discretionary Sector (ETF)
    'XLP',       # Consumer Staples Sector (ETF)
    'XLE',       # Energy Sector (ETF)
    'XLV',       # Healthcare Sector (ETF)
    'XLI',       # Industrials Sector (ETF)
    'XLB',       # Materials Sector (ETF)
    'XLK',       # Information Technology Sector (ETF)
    'XLC',       # Communication Services Sector (ETF)
    '^IRX',      # Three-month Treasury Bill Yield
    '^TNX',      # Ten-year Treasury Yield
]

start_date = dates['Intermeeting Date'].min() - pd.Timedelta(days=15)
end_date = dates['Intermeeting Date'].max() + pd.Timedelta(days=15)

all_indices_data = {}
for ticker in tickers:
    #print(f"Downloading data for {ticker}...")
    data = yf.download(ticker, start=start_date, end=end_date)
    data.columns = data.columns.get_level_values(0)
    data['return'] = data['Close'].pct_change()
    all_indices_data[ticker] = data[['return']].dropna()
    #print(f"Data for {ticker} downloaded.")

rows = []

for index, row_fomc in dates.iterrows():
    date = row_fomc['Intermeeting Date']
    document_type = row_fomc['document_type']
    for ticker in tickers:
        row = {'announcement_date': date, 'ticker': ticker, 'document_type': document_type} 
        for t in range(-15, 16):
            target_date = date + pd.Timedelta(days=t)
            if target_date in all_indices_data[ticker].index:
                row[f'T{t:+}'] = all_indices_data[ticker].loc[target_date, 'return']
            else:
                row[f'T{t:+}'] = pd.NA
        rows.append(row)

intermeeting_df = pd.DataFrame(rows)

column_order = ['announcement_date', 'ticker', 'document_type'] + [f'T{t:+}' for t in range(-15, 16)]
intermeeting_df = intermeeting_df[column_order]

intermeeting_df

intermeeting_df.to_csv('raw_data/intermeeting_prices.csv', index=False)

  dates['Intermeeting Date'] = pd.to_datetime(dates['Intermeeting Date'])
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***

### Need to merge the two datasets

In [52]:
combined_df = pd.concat([statements_df, intermeeting_df], ignore_index=True)
combined_df = combined_df.dropna(subset=['announcement_date'])
combined_df = combined_df.sort_values(by='announcement_date').reset_index(drop=True)
combined_df['meeting_id'] = combined_df['announcement_date'].rank(method='dense').astype(int)
combined_df = combined_df[['meeting_id'] + [col for col in combined_df.columns if col != 'meeting_id']]
combined_df.to_csv('raw_data/combined_dates.csv', index = False)

In [53]:
combined_df

Unnamed: 0,meeting_id,announcement_date,ticker,document_type,T-15,T-14,T-13,T-12,T-11,T-10,...,T+6,T+7,T+8,T+9,T+10,T+11,T+12,T+13,T+14,T+15
0,1,2000-02-02,^GSPC,statement,,0.000522,-0.007095,-0.002912,,,...,0.012273,-0.020815,0.003627,-0.020969,,,0.002033,0.008713,-0.010256,0.000425
1,1,2000-02-02,^TNX,statement,,-0.003408,0.00342,0.00489,,,...,-0.004826,0.010608,-0.001949,-0.005258,,,-0.012385,0.001988,-0.000763,0.00336
2,1,2000-02-02,^IRX,statement,,0.022945,-0.009346,0.0,,,...,0.009141,-0.005435,0.001822,-0.003636,,,-0.00365,0.021978,-0.005376,0.003604
3,1,2000-02-02,XLC,statement,,,,,,,...,,,,,,,,,,
4,1,2000-02-02,XLK,statement,,0.001765,0.005868,0.001167,,,...,0.009132,-0.012443,0.023769,-0.026573,,,0.000574,0.002872,-0.005154,0.022452
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7393,408,2025-04-09,^DJI,intermeeting,0.000098,-0.003116,-0.003653,-0.016922,,,...,-0.003845,-0.017329,-0.013289,,,,-0.024828,0.026632,0.010707,
7394,408,2025-04-09,^IXIC,intermeeting,0.004578,-0.020405,-0.005307,-0.027019,,,...,-0.000494,-0.030673,-0.00127,,,,-0.025515,0.027063,0.025007,
7395,408,2025-04-09,^IRX,intermeeting,0.0,0.001913,-0.000477,0.0,,,...,0.001192,0.001905,-0.000713,,,,0.001189,0.000713,-0.001187,
7396,408,2025-04-09,XLU,intermeeting,-0.015991,0.00663,-0.000258,0.007363,,,...,-0.000129,-0.009011,0.010262,,,,-0.023659,0.027262,0.004231,


# Sentiment Analysis 

I just copied the inputs file from the midterm so we can use the ML and LM dictionaries through that......

Shouldn't be too difficult for that part

Then we need to do the topic analysis as well......

## The tricky parts - Be working on this by tuesday or we gonna be in trouble 

### Chat GPT API integration to rank documents on bullish to bearish scale

### ChronoBERT - yikes 