In [52]:
import pandas as pd
from libs.helpermodules import memory_handling as mh
import numpy as np
from datetime import timedelta, datetime
import time

In [53]:
# downloading the pickle file 
file = "fedspeeches_preprocessed.pkl"
helper = mh.PickleHelper.pickle_load(file)
df = helper.obj

In [54]:
from twelvedata import TDClient
td = TDClient(apikey="e4677a81081448f2bca9fdb93fc360e8")

In [55]:
"""
# assigned speakers for the analysis
speaker_list_lorenzo = ['Chairman Ben S. Bernanke', 'Vice Chairman Donald L. Kohn', 'Governor Susan Schmidt Bies', 'Governor Kevin Warsh', 'Governor Randall S. Kroszner', 'Governor Susan S. Bies', 'Governor Frederic S. Mishkin', 'Governor Donald L. Kohn', 'Governor Mark W. Olson', 'Vice Chairman Roger W. Ferguson, Jr.', 'Governor Elizabeth A. Duke', 'Governor Daniel K. Tarullo', 'Brian F. Madigan, Director, Division of Monetary Affairs']
speaker_list_fabio = ['Governor Michelle W. Bowman', 'Vice Chair for Supervision and Chair of the Financial Stability Board Randal K. Quarles', 'Vice Chairman for Supervision and Chair of the Financial Stability Board Randal K. Quarles', 'Governor Christopher J. Waller', 'Governor Randal K. Quarles', 'Vice Chair for Supervision Michael S. Barr', 'Governor Lisa D. Cook', 'Vice Chair Lael Brainard', 'Governor Philip N. Jefferson', 'Chair Pro Tempore Jerome H. Powell', 'Vice Chair Philip N. Jefferson', 'Governor Adriana D. Kugler']
speaker_list_riccardo = [ 'Vice Chair Janet L. Yellen', 'Governor Sarah Bloom Raskin', 'Governor Jeremy C. Stein', 'Governor Jerome H. Powell', 'Chairman  Ben S. Bernanke', 'Governor Lael Brainard', 'Chair Janet L. Yellen', 'Vice Chairman Stanley Fischer', 'Vice Chairman for Supervision Randal K. Quarles', 'Chairman Jerome H. Powell', 'Vice Chairman Richard H. Clarida', 'Chair Jerome H. Powell', 'Vice Chair Richard H. Clarida', 'Vice Chair for Supervision Randal K. Quarles']
speaker_list = ['Governor Michelle W. Bowman', 'Vice Chair for Supervision and Chair of the Financial Stability Board Randal K. Quarles', 'Vice Chairman for Supervision and Chair of the Financial Stability Board Randal K. Quarles', 'Governor Christopher J. Waller', 'Governor Randal K. Quarles', 'Vice Chair for Supervision Michael S. Barr', 'Governor Lisa D. Cook', 'Vice Chair Lael Brainard', 'Governor Philip N. Jefferson', 'Chair Pro Tempore Jerome H. Powell', 'Vice Chair Philip N. Jefferson', 'Governor Adriana D. Kugler']
df = df[df['speaker'].isin(speaker_list)]
"""

"\n# assigned speakers for the analysis\nspeaker_list_lorenzo = ['Chairman Ben S. Bernanke', 'Vice Chairman Donald L. Kohn', 'Governor Susan Schmidt Bies', 'Governor Kevin Warsh', 'Governor Randall S. Kroszner', 'Governor Susan S. Bies', 'Governor Frederic S. Mishkin', 'Governor Donald L. Kohn', 'Governor Mark W. Olson', 'Vice Chairman Roger W. Ferguson, Jr.', 'Governor Elizabeth A. Duke', 'Governor Daniel K. Tarullo', 'Brian F. Madigan, Director, Division of Monetary Affairs']\nspeaker_list_fabio = ['Governor Michelle W. Bowman', 'Vice Chair for Supervision and Chair of the Financial Stability Board Randal K. Quarles', 'Vice Chairman for Supervision and Chair of the Financial Stability Board Randal K. Quarles', 'Governor Christopher J. Waller', 'Governor Randal K. Quarles', 'Vice Chair for Supervision Michael S. Barr', 'Governor Lisa D. Cook', 'Vice Chair Lael Brainard', 'Governor Philip N. Jefferson', 'Chair Pro Tempore Jerome H. Powell', 'Vice Chair Philip N. Jefferson', 'Governor A

In [56]:
def data_retrieve_minute(df, ticker):
    """
    Retrieve minute-level time series data for a specified ticker using the Twelve Data API.

    Parameters:
    df : pandas.DataFrame
        A dataframe containing at least two columns:
        - 'date': Dates for which data needs to be retrieved.
        - 'timestamp': Timestamps corresponding to the data entries.
    ticker : str
        The symbol of the financial instrument to retrieve data for.

    Returns:
    dffinal : pandas.DataFrame
        A concatenated DataFrame containing minute-level data for the specified ticker
        across all the unique dates in the provided dataframe `df`. The DataFrame will have:
        - 'date' as the index
        - A 'timestamp' column representing the minute-level time series.
    """
    
    timelist = df.date.unique().tolist()
    count = 0
    dffinal = pd.DataFrame()

    for date in timelist:
        print(f"Processing date: {date}")
        start = df[df['date'] == date].timestamp.min() - pd.Timedelta(minutes=15)
        end = df[df['date'] == date].timestamp.max() + pd.Timedelta(minutes=15)

        if count % 8 == 0 and count > 0:
            time.sleep(60)

        try:
            etf = td.time_series(
                symbol=ticker,
                interval="1min",
                start_date=start,
                end_date=end,
                outputsize=5000    
            ).as_pandas()
            
            # If the API call returns data, concatenate it to the final DataFrame
            if not etf.empty:
                # Add a 'date' column to the ETF DataFrame to use as an index later
                etf['date'] = date
                dffinal = pd.concat([dffinal, etf], ignore_index=False)
            else:
                print(f"No data available for {date}")

        except Exception as e:
            print(f"Error retrieving data for {date}: {e}")

        count += 1
    
    # Set the 'date' as the index and move 'timestamp' into a separate column
    dffinal.reset_index(inplace=True)
    dffinal.rename(columns={'index': 'timestamp'}, inplace=True)
    dffinal.set_index('date', inplace=True)
    
    return dffinal


In [57]:
def volatility_calculator(df):
    """
    Calculate daily volatility for the 'open' column in the dataframe.

    Parameters:
    df : pandas.DataFrame
        A dataframe containing at least 'date' and 'open' columns.

    Returns:
    pandas.Series
        A series with dates as the index and the standard deviation
        of 'open' values (volatility) as the values.
    """

    df['pct_change'] = df['close'].pct_change()
    volatility_series = df.groupby('date')['pct_change'].std()

    return volatility_series

In [58]:
def clean_df(df):
    """
    Clean the dataframe by sorting and filtering based on date and weekdays.

    Parameters:
    df : pandas.DataFrame
        A dataframe containing at least 'date' and 'timestamp' columns.

    Returns:
    pandas.DataFrame
        A dataframe sorted by 'date' and 'timestamp', filtered to include only
        rows with dates >= '2020-12-01' and weekdays (Monday to Friday).
    """
        
    df_ordered = df.sort_values(by=['date', 'timestamp'], ascending = [True, True])
    df_final = df_ordered[df_ordered.date >= "2024-01-01"]
    df_final = df_final[df_final['date'].dt.weekday<5]
    return df_final

In [59]:
def get_best_values(volatility, df, number):
    """
    Filter the dataframe for the top dates with the highest volatility.

    Parameters:
    volatility : pandas.Series
        A series with dates as the index and volatility as the values.
    df : pandas.DataFrame
        The dataframe to filter, containing a 'date' column.
    number : int
        The number of top volatility dates to select.

    Returns:
    pandas.DataFrame
        A filtered dataframe containing rows with dates matching the top 
        `number` volatility values.
    """
    
    # Sort the volatility in descending order
    volatility = volatility.sort_values(ascending=False)

    # Get the top `number` dates with the highest volatility
    top_dates = volatility.head(number).index.tolist()

    # Filter the dataframe for rows where 'date' is in the top_dates
    filtered_df = df[df['date'].isin(top_dates)]

    return filtered_df


In [60]:
df=clean_df(df)

In [61]:
#estimated time 30 mins
df_price=data_retrieve_minute(df,'SPY')

Processing date: 2024-01-08 00:00:00
Processing date: 2024-01-16 00:00:00
Processing date: 2024-01-17 00:00:00
Processing date: 2024-02-02 00:00:00
Processing date: 2024-02-07 00:00:00
Processing date: 2024-02-12 00:00:00
Processing date: 2024-02-13 00:00:00
Processing date: 2024-02-14 00:00:00
Processing date: 2024-02-15 00:00:00
Processing date: 2024-02-16 00:00:00
Processing date: 2024-02-22 00:00:00
Processing date: 2024-02-27 00:00:00
Processing date: 2024-03-01 00:00:00
Processing date: 2024-03-05 00:00:00
Processing date: 2024-03-07 00:00:00
Processing date: 2024-03-25 00:00:00
Processing date: 2024-03-27 00:00:00
Processing date: 2024-04-01 00:00:00
Processing date: 2024-04-02 00:00:00
Processing date: 2024-04-03 00:00:00
Processing date: 2024-04-04 00:00:00
Processing date: 2024-04-05 00:00:00
Processing date: 2024-04-16 00:00:00
Processing date: 2024-04-18 00:00:00
Processing date: 2024-05-03 00:00:00
Processing date: 2024-05-08 00:00:00
Processing date: 2024-05-10 00:00:00
P

In [65]:
volatility=volatility_calculator(df_price)
print(volatility)

newdf= get_best_values(volatility,df,10)

date
2024-01-08    0.000298
2024-01-16    0.002092
2024-01-17    0.000665
2024-02-02    0.005688
2024-02-07    0.001823
                ...   
2024-10-10    0.000763
2024-10-11    0.001113
2024-10-14    0.001651
2024-10-18    0.000409
2024-10-23    0.001019
Name: pct_change, Length: 69, dtype: float64


In [66]:
newdf

Unnamed: 0,date,speaker,title,link,text,timestamp,text_by_minute
24211,2024-02-02,Governor Michelle W. Bowman,The Future of Banking,/newsevents/speech/bowman20240202a.htm,Thank you for the invitation to join you today...,2024-02-02 10:00:00-05:00,Thank you for the invitation to join you today...
24212,2024-02-02,Governor Michelle W. Bowman,The Future of Banking,/newsevents/speech/bowman20240202a.htm,Thank you for the invitation to join you today...,2024-02-02 10:01:00-05:00,in running their banks and that regulators sho...
24213,2024-02-02,Governor Michelle W. Bowman,The Future of Banking,/newsevents/speech/bowman20240202a.htm,Thank you for the invitation to join you today...,2024-02-02 10:02:00-05:00,expenditures (PCE) inflation came in at 2.6 an...
24214,2024-02-02,Governor Michelle W. Bowman,The Future of Banking,/newsevents/speech/bowman20240202a.htm,Thank you for the invitation to join you today...,2024-02-02 10:03:00-05:00,"last year, when the average pace of job gains ..."
24215,2024-02-02,Governor Michelle W. Bowman,The Future of Banking,/newsevents/speech/bowman20240202a.htm,Thank you for the invitation to join you today...,2024-02-02 10:04:00-05:00,it will eventually become appropriate to gradu...
...,...,...,...,...,...,...,...
22885,2024-09-06,Governor Christopher J. Waller,The Time Has Come,/newsevents/speech/waller20240906a.htm,"Thank you, Eric, and thank you for the opportu...",2024-09-06 10:17:00-04:00,consecutive meetings. If the data suggests the...
22886,2024-09-06,Governor Christopher J. Waller,The Time Has Come,/newsevents/speech/waller20240906a.htm,"Thank you, Eric, and thank you for the opportu...",2024-09-06 10:18:00-04:00,Market Committee.
22811,2024-09-20,Governor Michelle W. Bowman,Statement by Governor Michelle W. Bowman,/newsevents/speech/bowman20240920a.htm,"On Wednesday, September 18, 2024, I dissented ...",2024-09-20 10:00:00-04:00,"On Wednesday, September 18, 2024, I dissented ..."
22812,2024-09-20,Governor Michelle W. Bowman,Statement by Governor Michelle W. Bowman,/newsevents/speech/bowman20240920a.htm,"On Wednesday, September 18, 2024, I dissented ...",2024-09-20 10:01:00-04:00,remain low. I see the normalization in labor m...


Top 10:
- 2024-02-02, Governor Michelle W. Bowman, volatility: 0.005687810721570463 (2)
- 2024-02-13, Governor Michelle W. Bowman, volatility: 0.0022360938435709203
- 2024-03-25, Governor Lisa D. Cook, volatility: 0.0021026977815426866
- 2024-04-16, Vice Chair Philip N. Jefferson, volatility: 0.0033502979431021926 (5)
- 2024-05-03, Governor Michelle W. Bowman, volatility: 0.002971970744676029
- 2024-06-14, Governor Lisa D. Cook, volatility: 0.0022751537610719947
- 2024-07-09, Governor Michelle W. Bowman/Vice Chair for Supervision Michael S. Barr, volatility: 0.002354437418123992
- 2024-07-24, Governor Michelle W. Bowman, volatility: 0.0037475454452706555 (4)
- 2024-09-06, Governor Christopher J. Waller, volatility: 0.004246426893978678 (3)
- 2024-09-20, Governor Michelle W. Bowman, volatility: 0.005912257414525222 (1)
