In [3]:
import pandas as pd
from helpermodules import memory_handling as mh
import numpy as np
from datetime import timedelta, datetime
import time

In [4]:
# downloading the pickle file 
file = "fedspeeches_preprocessed.pkl"
helper = mh.PickleHelper.pickle_load(file)
df = helper.obj

In [28]:
from twelvedata import TDClient
td = TDClient(apikey="ce7c5d1412ff42d1a2ca90e47bd3105f")

In [6]:
# choosing only the assigned speakers for the analysis
speaker_list_riccardo = [ 'Vice Chair Janet L. Yellen', 'Governor Sarah Bloom Raskin', 'Governor Jeremy C. Stein', 'Governor Jerome H. Powell', 'Chairman  Ben S. Bernanke', 'Governor Lael Brainard', 'Chair Janet L. Yellen', 'Vice Chairman Stanley Fischer', 'Vice Chairman for Supervision Randal K. Quarles', 'Chairman Jerome H. Powell', 'Vice Chairman Richard H. Clarida', 'Chair Jerome H. Powell', 'Vice Chair Richard H. Clarida', 'Vice Chair for Supervision Randal K. Quarles']
speaker_list = ['Governor Michelle W. Bowman', 'Vice Chair for Supervision and Chair of the Financial Stability Board Randal K. Quarles', 'Vice Chairman for Supervision and Chair of the Financial Stability Board Randal K. Quarles', 'Governor Christopher J. Waller', 'Governor Randal K. Quarles', 'Vice Chair for Supervision Michael S. Barr', 'Governor Lisa D. Cook', 'Vice Chair Lael Brainard', 'Governor Philip N. Jefferson', 'Chair Pro Tempore Jerome H. Powell', 'Vice Chair Philip N. Jefferson', 'Governor Adriana D. Kugler']
df = df[df['speaker'].isin(speaker_list)]

In [45]:
def data_retrieve_minute(df, ticker):
    """
    Retrieve minute-level time series data for a specified ticker using the Twelve Data API.

    Parameters:
    df : pandas.DataFrame
        A dataframe containing at least two columns:
        - 'date': Dates for which data needs to be retrieved.
        - 'timestamp': Timestamps corresponding to the data entries.
    ticker : str
        The symbol of the financial instrument to retrieve data for.

    Returns:
    dffinal : pandas.DataFrame
        A concatenated DataFrame containing minute-level data for the specified ticker
        across all the unique dates in the provided dataframe `df`. The DataFrame will have:
        - 'date' as the index
        - A 'timestamp' column representing the minute-level time series.
    """
    
    timelist = df.date.unique().tolist()
    count = 0
    dffinal = pd.DataFrame()

    for date in timelist:
        print(f"Processing date: {date}")
        start = df[df['date'] == date].timestamp.min() - pd.Timedelta(minutes=15)
        end = df[df['date'] == date].timestamp.max() + pd.Timedelta(minutes=15)

        if count % 8 == 0 and count > 0:
            time.sleep(60)

        try:
            etf = td.time_series(
                symbol=ticker,
                interval="1min",
                start_date=start,
                end_date=end,
                outputsize=5000    
            ).as_pandas()
            
            # If the API call returns data, concatenate it to the final DataFrame
            if not etf.empty:
                # Add a 'date' column to the ETF DataFrame to use as an index later
                etf['date'] = date
                dffinal = pd.concat([dffinal, etf], ignore_index=False)
            else:
                print(f"No data available for {date}")

        except Exception as e:
            print(f"Error retrieving data for {date}: {e}")

        count += 1
    
    # Set the 'date' as the index and move 'timestamp' into a separate column
    dffinal.reset_index(inplace=True)
    dffinal.rename(columns={'index': 'timestamp'}, inplace=True)
    dffinal.set_index('date', inplace=True)
    
    return dffinal


In [49]:
def volatility_calculator(df):
    """
    Calculate daily volatility for the 'open' column in the dataframe.

    Parameters:
    df : pandas.DataFrame
        A dataframe containing at least 'date' and 'open' columns.

    Returns:
    pandas.Series
        A series with dates as the index and the standard deviation
        of 'open' values (volatility) as the values.
    """

    volatility_series = df.groupby('date')['open'].std()
    return volatility_series

In [9]:
def clean_df(df):
    """
    Clean the dataframe by sorting and filtering based on date and weekdays.

    Parameters:
    df : pandas.DataFrame
        A dataframe containing at least 'date' and 'timestamp' columns.

    Returns:
    pandas.DataFrame
        A dataframe sorted by 'date' and 'timestamp', filtered to include only
        rows with dates >= '2020-12-01' and weekdays (Monday to Friday).
    """
        
    df_ordered = df.sort_values(by=['date', 'timestamp'], ascending = [True, True])
    df_final = df_ordered[df_ordered.date >= "2020-12-01"]
    df_final = df_final[df_final['date'].dt.weekday<5]
    return df_final

In [63]:
def get_best_values(volatility, df, number):
    """
    Filter the dataframe for the top dates with the highest volatility.

    Parameters:
    volatility : pandas.Series
        A series with dates as the index and volatility as the values.
    df : pandas.DataFrame
        The dataframe to filter, containing a 'date' column.
    number : int
        The number of top volatility dates to select.

    Returns:
    pandas.DataFrame
        A filtered dataframe containing rows with dates matching the top 
        `number` volatility values.
    """
    
    # Sort the volatility in descending order
    volatility = volatility.sort_values(ascending=False)

    # Get the top `number` dates with the highest volatility
    top_dates = volatility.head(number).index.tolist()

    # Filter the dataframe for rows where 'date' is in the top_dates
    filtered_df = df[df['date'].isin(top_dates)]

    return filtered_df


In [11]:
df=clean_df(df)

In [None]:
#estimated time 30 mins
df_price=data_retrieve_minute(df,'SPY')
'''picklehelper = mh.PickleHelper(df_price)
picklehelper.pickle_dump("fabio2020onwardpricemovements")'''

In [64]:
volatility=volatility_calculator(df_price)
newdf=get_best_values(volatility,df,5)

In [65]:
newdf

Unnamed: 0,date,speaker,title,link,text,timestamp,text_by_minute
20081,2021-02-16,Governor Michelle W. Bowman,My Perspective on Bank Regulation and Supervision,/newsevents/speech/bowman20210216a.htm,Good morning. I want to thank the American Ban...,2021-02-16 10:00:00-05:00,Good morning. I want to thank the American Ban...
20082,2021-02-16,Governor Michelle W. Bowman,My Perspective on Bank Regulation and Supervision,/newsevents/speech/bowman20210216a.htm,Good morning. I want to thank the American Ban...,2021-02-16 10:01:00-05:00,"continue to make, a huge difference in the liv..."
20083,2021-02-16,Governor Michelle W. Bowman,My Perspective on Bank Regulation and Supervision,/newsevents/speech/bowman20210216a.htm,Good morning. I want to thank the American Ban...,2021-02-16 10:02:00-05:00,improve oversight of community banks over the ...
20084,2021-02-16,Governor Michelle W. Bowman,My Perspective on Bank Regulation and Supervision,/newsevents/speech/bowman20210216a.htm,Good morning. I want to thank the American Ban...,2021-02-16 10:03:00-05:00,striking the right balance means that at some ...
20085,2021-02-16,Governor Michelle W. Bowman,My Perspective on Bank Regulation and Supervision,/newsevents/speech/bowman20210216a.htm,Good morning. I want to thank the American Ban...,2021-02-16 10:04:00-05:00,"should be effective, but also efficient, and t..."
20086,2021-02-16,Governor Michelle W. Bowman,My Perspective on Bank Regulation and Supervision,/newsevents/speech/bowman20210216a.htm,Good morning. I want to thank the American Ban...,2021-02-16 10:05:00-05:00,"availability of services in a community, as we..."
20087,2021-02-16,Governor Michelle W. Bowman,My Perspective on Bank Regulation and Supervision,/newsevents/speech/bowman20210216a.htm,Good morning. I want to thank the American Ban...,2021-02-16 10:06:00-05:00,respect for the reasonable application of them...
20088,2021-02-16,Governor Michelle W. Bowman,My Perspective on Bank Regulation and Supervision,/newsevents/speech/bowman20210216a.htm,Good morning. I want to thank the American Ban...,2021-02-16 10:07:00-05:00,will not be necessary because we encourage com...
20089,2021-02-16,Governor Michelle W. Bowman,My Perspective on Bank Regulation and Supervision,/newsevents/speech/bowman20210216a.htm,Good morning. I want to thank the American Ban...,2021-02-16 10:08:00-05:00,aimed at curbing proprietary trading by large ...
20090,2021-02-16,Governor Michelle W. Bowman,My Perspective on Bank Regulation and Supervision,/newsevents/speech/bowman20210216a.htm,Good morning. I want to thank the American Ban...,2021-02-16 10:09:00-05:00,streamline the examination process for lower r...
