In [None]:
class msg1_api:
    
    '''
    Class of functions to retrieve data from MSG1.
    
    Note: Import the following packages and start a Bloomber query before calling the functions.
    
    import pandas as pd
    import numpy as np
    import blp
    from blp import blp
    import xbbg
    from xbbg import blp as blp_1

    bquery = blp.BlpQuery().start()
       
    '''

    @staticmethod
    def get_runs(date : str, start_time : str, end_time : str, isin : str):

        '''
        Return RUNs for a given isin, date range and time range.

        date (str): date in format "yyyy-mm-dd"
        start_time, end_time (str): start and end time in format "Thh:mm:ss" (end_time > start_time)
        isin (str): security unique identifier

        Note: UTC time is assumed, United Kingdom is UTC+01:00. For example, if you want a run for 15:00 UK time, you must
        specify the time as 14:00 UTC.

        '''
  
        temp = bquery.bdit(isin + " @MSG1 Corp", 
                    
                            ["BID", "ASK"],

                            date + start_time,

                            date + end_time,

                            options = {"includeBrokerCodes" : True,

                                       "includeSpreadPrice" : True
                                       
                                       }
                            )
    
        temp["Date"] = date
    
        if temp.empty:
            
            print("Warning: No data available for " + isin + " on the " + date + ". Returning an empty DataFrame.")
            
            return pd.DataFrame(columns = ["Date", "time", "type", "value", "size", "brokerBuyCode", "spreadPrice", "brokerSellCode"])

        return temp
        
    @staticmethod
    def clean_runs(dataframe):
            
        '''
        Remove empty spaces from brokercode, collapse brokerBuyCode and brokerSellCode to a single column, drop RUNs
        with size of zero.
        
        dataframe: call 'get_runs()' function as your input.
        
        '''

        temp = dataframe.copy()

        temp = temp[temp["size"] > 0]
               
        temp["brokerBuyCode"] = temp["brokerBuyCode"].str.lstrip()
        temp["brokerBuyCode"] = temp["brokerBuyCode"].str.rstrip()
        temp["brokerSellCode"] = temp["brokerSellCode"].str.lstrip()
        temp["brokerSellCode"] = temp["brokerSellCode"].str.rstrip()

        temp = temp.fillna(value = "")

        temp["Dealer"] = temp["brokerBuyCode"] + temp["brokerSellCode"]

        temp.drop(["brokerBuyCode", "brokerSellCode"], axis = 1, inplace = True)

        temp.reset_index(drop = True, inplace = True)
        
        return temp
   
    @staticmethod
    def last_runs(dataframe):
        
        '''
        Return axes size, weighted average spread, number of unique dealers and top 5 dealers by size.
        
        dataframe: call 'clean_runs(get_runs())' as your input
        
        '''
       
        temp = dataframe.copy()
        
        index = temp.groupby(["Dealer", "type"])["time"].max()
        
        temp = pd.merge(index, temp, on = ['Dealer', "type", 'time'], how = 'left')
        
        dealer, bid_size, ask_size, bid_spd, ask_spd, date = [], [], [], [], [], []
        
        for dlr in set(temp["Dealer"]):
            
            dealer.append(dlr)
            
            bid_subframe = temp[(temp["Dealer"] == dlr) & (temp["type"] == "BID")]
            ask_subframe = temp[(temp["Dealer"] == dlr) & (temp["type"] == "ASK")]

            bid_size.append(bid_subframe["size"].sum() if not bid_subframe.empty else 0)
            ask_size.append(ask_subframe["size"].sum() if not ask_subframe.empty else 0)

            bid_spd.append(int(bid_subframe["spreadPrice"].mean()) if not bid_subframe.empty else 0)
            ask_spd.append(int(ask_subframe["spreadPrice"].mean()) if not ask_subframe.empty else 0)              
            
            date.append(temp["Date"][0])

        temp = pd.DataFrame({"Date" : date, 
                             "Dealer" : dealer, 
                             "Bid Size" : bid_size, 
                             "Ask Size" : ask_size, 
                             "Bid Spd" : bid_spd, 
                             "Ask Spd" : ask_spd
                            }
                           )
       
        return temp
    
    @staticmethod
    def aggregate_runs(dataframe):
        
        '''
        Return aggregate information about last runs from each dealer.
        
        '''
        
        if dataframe.empty:
            
            return pd.DataFrame(columns = ['Date', 'Dealer', 'Bid Size', 'Ask Size', 'Bid Spd', 'Ask Spd', 'Largest Bid',
                                           '2nd Bid', '3rd Bid', 'Largest Ask', '2nd Ask', '3rd Ask', 'n Bids', 'n Asks', 
                                           'Net Axe', 'Gross Axe'])
        
        temp = dataframe.copy()
        
        wgt_avg_bid_spd = (temp["Bid Spd"] * temp["Bid Size"] / temp["Bid Size"].sum()).sum()
        wgt_avg_ask_spd = (temp["Ask Spd"] * temp["Ask Size"] / temp["Ask Size"].sum()).sum()
        
        tot_bid_size = temp["Bid Size"].sum()
        tot_ask_size = temp["Ask Size"].sum()
           
        bid_rank_3 = list(temp[temp["Bid Size"] > 0].sort_values(by = "Bid Size", ascending = False)["Dealer"][:3])
        bid_rank_3 += [0] * (3 - len(bid_rank_3))

        ask_rank_3 = list(temp[temp["Ask Size"] > 0].sort_values(by = "Ask Size", ascending = False)["Dealer"][:3])
        ask_rank_3 += [0] * (3 - len(ask_rank_3))
        
        n_bids = temp[temp["Bid Size"] > 0]["Dealer"].count()
        n_asks = temp[temp["Ask Size"] > 0]["Dealer"].count()
        
        tot_net_axe = tot_bid_size - tot_ask_size
        tot_gross_axe = tot_bid_size + tot_ask_size
        
        date = dataframe["Date"][0]
                
        return pd.DataFrame({"Date" : date,
                             "Dealer" : ["Aggregate"],
                             "Bid Size" : [tot_bid_size],
                             "Ask Size" : [tot_ask_size],
                             "Bid Spd" : [wgt_avg_bid_spd],
                             "Ask Spd" : [wgt_avg_ask_spd],
                             "Largest Bid" : [bid_rank_3[0]],
                             "2nd Bid" : [bid_rank_3[1]],
                             "3rd Bid" : [bid_rank_3[2]],
                             "Largest Ask" : [ask_rank_3[0]],
                             "2nd Ask" : [ask_rank_3[1]],
                             "3rd Ask" : [ask_rank_3[2]],
                             "n Bids" : [n_bids],
                             "n Asks" : [n_asks],
                             "Net Axe" : [tot_net_axe],
                             "Gross Axe" : [tot_gross_axe]  
                            })
    
    @staticmethod
    def time_series_aggregate_runs(start_date : str, end_date : str, start_time : str, end_time : str, isin : str):
        
        '''
        Return a time series of the aggregate information collected from function msg1_api.aggregate_runs.       
        
        start_date,end_date (str): date in format "yyyy-mm-dd"
        start_time, end_time (str): start and end time in format "Thh:mm:ss" (end_time > start_time)
        isin (str): security unique identifier 
        
        '''
    
        dates = list(pd.bdate_range(start = start_date, end = end_date).strftime('%Y-%m-%d'))            
        
        all_dates = pd.to_datetime(dates)   

        dictionary = dict()       
        
        for date in all_dates:
                       
            try:
            
                dictionary[date] = msg1_api.aggregate_runs(
                
                    msg1_api.last_runs(
                    
                        msg1_api.clean_runs(
                        
                            msg1_api.get_runs(date = str(date.date()), 
                                              start_time = start_time, 
                                              end_time = end_time, 
                                              isin = isin))))
                
            except KeyError as e:
                
                print(f"Error on {date}: {e}")
                
                continue
        
        temp = pd.concat(dictionary, axis = 0).reset_index(level = 1, drop = True)
        
        temp["Momentum"] = temp["Net Axe"].diff(periods = 1)
        
        temp.fillna(0, inplace = True)
        
        return temp
    
    @staticmethod
    def multi_bond_time_series_aggregate_runs(start_date: str, end_date: str, start_time: str, end_time: str, isins: list):

        '''
        Return a time series of the aggregate information collected from function msg1_api.aggregate_runs for multiple ISINs.

        start_date,end_date (str): date in format "yyyy-mm-dd"
        start_time, end_time (str): start and end time in format "Thh:mm:ss" (end_time > start_time)
        isins(list): securities unique identifiers 

        '''        

        dates = list(pd.bdate_range(start = start_date, end = end_date).strftime('%Y-%m-%d'))            
        
        all_dates = pd.to_datetime(dates)

        dictionary = dict()
        
        passed_bonds = {}  # Dictionary to store skipped bonds and the respective dates due to BAD_SEC errors
        
        date_index = []
                
        columns_to_sum = ["Bid Size", "Ask Size", "n Bids", "n Asks", "Net Axe", "Gross Axe"]
        
        for isin in isins:
                        
            passed_dates = []
            
            df_temp = pd.DataFrame(columns = ["Date", "Dealer", "Bid Size", "Ask Size", "n Bids", "n Asks", "Net Axe", "Gross Axe"])
            
            for date in dates:
                    
                # Try to retrieve the time series aggregate runs for the current bond and date
                bond_data = msg1_api.aggregate_runs(
                    msg1_api.last_runs(
                        msg1_api.clean_runs(
                            msg1_api.get_runs(date = date, 
                                              start_time = start_time, 
                                              end_time = end_time, 
                                              isin = isin
                                             )
                        )
                    )
                )

                df_temp = pd.concat([df_temp, bond_data], ignore_index = True)
                
            dictionary[isin] = df_temp
        
        df_temp = pd.concat(dictionary).reset_index(level = 1, drop = True).reset_index()
        
        df_res = df_temp.groupby("Date")[columns_to_sum].sum()
        
        df_res["Momentum"] = df["Net Axe"].diff(periods = 1)
        
        return df_res
