# Monthly Panel Data Set

This notebook merges the monthly fund ownership data with the monthly stock level data. The following stock level variables are constructed in the process:
- monthly standard deviation
- amihud ratio
- past 12 months returns
- bid ask spread
- add country and industry fixed effects

The output file of this notebook is monthly_panel_v1.csv


#### merge formatted_final.csv with m_stock_level_data.csv --> monthly_panel_v1.csv

In [1]:
import pandas as pd

def import_func(subset = "none"):
    if subset == "none":
        ownership_path = "/Users/jonathanzeh/Library/CloudStorage/OneDrive-Personal/BA_Thesis/BA_coding/datasets/eikon_data/fund_holdings_data/formatted_final.csv"
        output_path = "/Users/jonathanzeh/Library/CloudStorage/OneDrive-Personal/BA_Thesis/BA_coding/datasets/eikon_data/variable_data/monthly_panel_v1.csv"

    if subset == "europe":
        ownership_path = "/Users/jonathanzeh/Library/CloudStorage/OneDrive-Personal/BA_Thesis/BA_coding/datasets/eikon_data/fund_holdings_data/formatted_final_europe_van.csv"
        output_path = "/Users/jonathanzeh/Library/CloudStorage/OneDrive-Personal/BA_Thesis/BA_coding/datasets/eikon_data/variable_data/monthly_panel_v1_europe_van.csv"
    
    ############### import formatted_final
    df_ownership = pd.read_csv(ownership_path)

    ############### import m_stock_level_data
    path = "/Users/jonathanzeh/Library/CloudStorage/OneDrive-Personal/BA_Thesis/BA_coding/datasets/eikon_data/stock_level_data/m_stock_level_data.csv"
    df_add = pd.read_csv(path)

    df_add['date'] = pd.to_datetime(df_add['date'])
    df_ownership["date"] = pd.to_datetime(df_ownership['date'])

    result_df = pd.merge(df_ownership, df_add[['date', 
                                               'stock_RIC', 
                                               'price', 
                                               "return1Mo", 
                                               "gross_profit", 
                                               "price_to_BV",
                                               "min_monthly_return",
                                               "max_monthly_return"]], on=['date', 'stock_RIC'], how='left')

    ############# export as monthly_panel_v1.csv
    result_df.to_csv(output_path, index=False)

    display(result_df)

#import_func(subset = "none")
import_func(subset = "europe")

Unnamed: 0,date,stock_RIC,index_member_600,index_member_50,stock_value_held,percent_of_traded_shares,FUND_stock_value_held,FUND_percent_of_traded_shares,INDEX_FUND_stock_value_held,INDEX_FUND_percent_of_traded_shares,...,ETF_ownership,FUND_ownership,INDEX_FUND_ownership,ACTIVE_FUND_ownership,price,return1Mo,gross_profit,price_to_BV,min_monthly_return,max_monthly_return
0,2009-12-31,0MW4EUR.xbo^K15,0,0,3.073419,0.1655,101.889071,4.7543,3.486577,0.1713,...,,,,,,,,,,
1,2010-01-31,0MW4EUR.xbo^K15,0,0,3.036434,0.1629,98.684809,4.5919,3.486577,0.1713,...,,,,,,,,,,
2,2010-02-28,0MW4EUR.xbo^K15,0,0,3.036434,0.1629,97.881422,4.5459,3.184193,0.1540,...,,,,,,,,,,
3,2010-03-31,0MW4EUR.xbo^K15,0,0,2.824438,0.1533,106.506665,4.8721,3.270862,0.1546,...,,,,,,,,,,
4,2010-04-30,0MW4EUR.xbo^K15,0,0,2.850560,0.1539,96.433853,4.3504,5.939661,0.2685,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
196219,2023-07-31,ZURN.S,1,1,2052.542650,3.1067,11438.503679,17.3140,3199.414024,4.8451,...,0.031069,0.173143,0.048429,0.124714,439.079073,-0.964933,,2.772687,-0.015040,0.016244
196220,2023-08-31,ZURN.S,1,1,2039.457827,3.2077,11315.144184,17.7587,3195.650139,5.0353,...,0.032147,0.178354,0.050371,0.127983,433.477862,-1.378327,,2.907803,-0.016848,0.008277
196221,2023-09-30,ZURN.S,1,1,1996.051120,3.1388,11221.412654,17.5905,3117.850540,4.9036,...,0.031395,0.176496,0.049039,0.127457,434.412512,1.669894,,2.841227,-0.010187,0.018290
196222,2023-10-31,ZURN.S,1,1,2101.553892,3.2116,11505.237587,17.6709,3250.279289,4.9704,...,0.032108,0.175778,0.049658,0.126120,447.220479,2.499405,,2.926635,-0.021812,0.014813


# Variable construction

In [2]:
import pandas as pd
from scipy.stats import skew
import pandas as pd
import numpy as np

def load_data(file_path):
    df = pd.read_csv(file_path)
    df['date'] = pd.to_datetime(df['date'], errors='coerce')
    df.dropna(subset=['date', 'return0D'], inplace=True)
    
    return df

def calculate_monthly_std_dev(df):
    def median_based_skew(series):
        # Ensure the series is a pandas Series and has numeric type
        if not isinstance(series, pd.Series):
            series = pd.Series(series)
        
        mean = series.mean()
        median = series.median()
        std_dev = series.std()

        # Calculate median-based skewness
        if std_dev != 0:  # To avoid division by zero
            skewness = 3 * (mean - median) / std_dev
        else:
            skewness = 0  # If std_dev is 0, skewness calculation does not make sense
        
        return skewness

    # Group by 'stock_RIC' and 'date' (rounded to month), and aggregate
    monthly_stats = df.groupby(['stock_RIC', pd.Grouper(key='date', freq='M')])['return0D'].agg(
        lag_max_monthly_return='max',
        lag_min_monthly_return='min',
        monthly_std_dev='std',
        count_returns='count',
        skewness=median_based_skew
    ).reset_index()

    print(monthly_stats)
    return monthly_stats

def merge_with_panel(df, df_panel):
    ## store original date in new colum
    df['DATE_monthly_std_dev'] = df['date']

    ## shift date column back by one month
    df['date'] = df['date'].dt.date
    df['date'] = pd.to_datetime(df['date'])
    df['date'] = df["date"] - pd.offsets.MonthEnd(1)
    
    ## use year month identifier for merging (if last day of month does not align)
    df['year_month'] = df['date'].dt.to_period('M')
    df.drop(columns=['date'], inplace=True, errors='ignore')
    

    df_panel['date'] = pd.to_datetime(df_panel['date'])
    df_panel['year_month'] = df_panel['date'].dt.to_period('M')
    
    merged_df = pd.merge(df_panel, df, on=['year_month', 'stock_RIC'], how='left')

    return merged_df

def main_std_calculation(subset = "none"):

    stock_level_file = "/Users/jonathanzeh/Library/CloudStorage/OneDrive-Personal/BA_Thesis/BA_coding/datasets/eikon_data/stock_level_data/stock_level_data.csv"
    if subset == "none": panel_data_file = "/Users/jonathanzeh/Library/CloudStorage/OneDrive-Personal/BA_Thesis/BA_coding/datasets/eikon_data/variable_data/monthly_panel_v1.csv"
    if subset == "europe": panel_data_file = "/Users/jonathanzeh/Library/CloudStorage/OneDrive-Personal/BA_Thesis/BA_coding/datasets/eikon_data/variable_data/monthly_panel_v1_europe_van.csv"
    
    df = load_data(stock_level_file)
    df_panel = pd.read_csv(panel_data_file)

    ## calculate monthly variables
    monthly_std_dev = calculate_monthly_std_dev(df)
    display(monthly_std_dev)

    ## merge
    df_merged = merge_with_panel(monthly_std_dev, df_panel)
    display(df_merged)

    df_merged.to_csv(panel_data_file, index = False)

#main_std_calculation(subset = "none")
main_std_calculation(subset = "europe")

              stock_RIC       date  lag_max_monthly_return  \
0       0MW4EUR.xbo^K15 2014-07-31                0.026549   
1       0MW4EUR.xbo^K15 2014-08-31                0.022472   
2       0MW4EUR.xbo^K15 2014-09-30                0.019475   
3       0MW4EUR.xbo^K15 2014-10-31                0.054108   
4       0MW4EUR.xbo^K15 2014-11-30                0.034640   
...                 ...        ...                     ...   
144239           ZURN.S 2023-08-31                0.008277   
144240           ZURN.S 2023-09-30                0.018290   
144241           ZURN.S 2023-10-31                0.014813   
144242           ZURN.S 2023-11-30                0.018180   
144243           ZURN.S 2023-12-31                0.014312   

        lag_min_monthly_return  monthly_std_dev  count_returns  skewness  
0                    -0.017391         0.013699             13  0.940350  
1                    -0.021525         0.012185             20  0.070334  
2                    -0.028846

Unnamed: 0,stock_RIC,date,lag_max_monthly_return,lag_min_monthly_return,monthly_std_dev,count_returns,skewness
0,0MW4EUR.xbo^K15,2014-07-31,0.026549,-0.017391,0.013699,13,0.940350
1,0MW4EUR.xbo^K15,2014-08-31,0.022472,-0.021525,0.012185,20,0.070334
2,0MW4EUR.xbo^K15,2014-09-30,0.019475,-0.028846,0.013004,22,-0.119033
3,0MW4EUR.xbo^K15,2014-10-31,0.054108,-0.034213,0.020221,23,-0.138026
4,0MW4EUR.xbo^K15,2014-11-30,0.034640,-0.025998,0.012503,20,0.358263
...,...,...,...,...,...,...,...
144239,ZURN.S,2023-08-31,0.008277,-0.016848,0.006852,22,-0.564718
144240,ZURN.S,2023-09-30,0.018290,-0.010187,0.006906,21,-0.309496
144241,ZURN.S,2023-10-31,0.014813,-0.021812,0.009319,22,0.211551
144242,ZURN.S,2023-11-30,0.018180,-0.006356,0.006680,22,1.221607


Unnamed: 0,date,stock_RIC,index_member_600,index_member_50,stock_value_held,percent_of_traded_shares,FUND_stock_value_held,FUND_percent_of_traded_shares,INDEX_FUND_stock_value_held,INDEX_FUND_percent_of_traded_shares,...,price_to_BV,min_monthly_return,max_monthly_return,year_month,lag_max_monthly_return,lag_min_monthly_return,monthly_std_dev,count_returns,skewness,DATE_monthly_std_dev
0,2009-12-31,0MW4EUR.xbo^K15,0,0,3.073419,0.1655,101.889071,4.7543,3.486577,0.1713,...,,,,2009-12,,,,,,NaT
1,2010-01-31,0MW4EUR.xbo^K15,0,0,3.036434,0.1629,98.684809,4.5919,3.486577,0.1713,...,,,,2010-01,,,,,,NaT
2,2010-02-28,0MW4EUR.xbo^K15,0,0,3.036434,0.1629,97.881422,4.5459,3.184193,0.1540,...,,,,2010-02,,,,,,NaT
3,2010-03-31,0MW4EUR.xbo^K15,0,0,2.824438,0.1533,106.506665,4.8721,3.270862,0.1546,...,,,,2010-03,,,,,,NaT
4,2010-04-30,0MW4EUR.xbo^K15,0,0,2.850560,0.1539,96.433853,4.3504,5.939661,0.2685,...,,,,2010-04,,,,,,NaT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
196219,2023-07-31,ZURN.S,1,1,2052.542650,3.1067,11438.503679,17.3140,3199.414024,4.8451,...,2.772687,-0.015040,0.016244,2023-07,0.008277,-0.016848,0.006852,22.0,-0.564718,2023-08-31
196220,2023-08-31,ZURN.S,1,1,2039.457827,3.2077,11315.144184,17.7587,3195.650139,5.0353,...,2.907803,-0.016848,0.008277,2023-08,0.018290,-0.010187,0.006906,21.0,-0.309496,2023-09-30
196221,2023-09-30,ZURN.S,1,1,1996.051120,3.1388,11221.412654,17.5905,3117.850540,4.9036,...,2.841227,-0.010187,0.018290,2023-09,0.014813,-0.021812,0.009319,22.0,0.211551,2023-10-31
196222,2023-10-31,ZURN.S,1,1,2101.553892,3.2116,11505.237587,17.6709,3250.279289,4.9704,...,2.926635,-0.021812,0.014813,2023-10,0.018180,-0.006356,0.006680,22.0,1.221607,2023-11-30


### calculate monthly standard devation and merge it to monthly panel

### Amihud ratio

In [4]:
import pandas as pd

def main_amihud_ratio(subset = "none"):
    if subset == "none": panel_path = "/Users/jonathanzeh/Library/CloudStorage/OneDrive-Personal/BA_Thesis/BA_coding/datasets/eikon_data/variable_data/monthly_panel_v1.csv"
    if subset == "europe": panel_path = "/Users/jonathanzeh/Library/CloudStorage/OneDrive-Personal/BA_Thesis/BA_coding/datasets/eikon_data/variable_data/monthly_panel_v1_europe_van.csv"
    panel_df = pd.read_csv(panel_path)

    stock_level_path = "/Users/jonathanzeh/Library/CloudStorage/OneDrive-Personal/BA_Thesis/BA_coding/datasets/eikon_data/stock_level_data/stock_level_data.csv"
    stock_level_df = pd.read_csv(stock_level_path)

    #####################################################
    stock_level_df['date'] = pd.to_datetime(stock_level_df['date'])
    panel_df['date'] = pd.to_datetime(panel_df['date'])

    # Calculate the Amihud ratio
    stock_level_df['amihud_ratio'] = 10**6 * abs(stock_level_df['return0D']) / (stock_level_df['volume'] * stock_level_df['price'])

    # Group by stock and month, and calculate the mean Amihud ratio for each month
    stock_level_df['month'] = stock_level_df['date'].dt.to_period('M')
    monthly_amihud = stock_level_df.groupby(['stock_RIC', 'month']).agg({
        'amihud_ratio': 'mean',
        'date': 'max'  # Takes the last available date in the month, which might not be the last day
    }).reset_index()

    # Set each date to the last day of the respective month
    monthly_amihud['date'] = monthly_amihud['month'].dt.to_timestamp(how='end').dt.date

    # Drop the 'month' column as it's no longer needed
    monthly_amihud.drop('month', axis=1, inplace=True)
    monthly_amihud['date'] = pd.to_datetime(monthly_amihud['date'])

    ######### merge this shit
    merged_df = pd.merge(panel_df, monthly_amihud, on=['date', 'stock_RIC'], how='left')
    display(merged_df)

    # Save the merged DataFrame
    merged_df.to_csv(panel_path, index = False)

#main_amihud_ratio(subset = "none")
main_amihud_ratio(subset = "europe")

Unnamed: 0,date,stock_RIC,index_member_600,index_member_50,stock_value_held,percent_of_traded_shares,FUND_stock_value_held,FUND_percent_of_traded_shares,INDEX_FUND_stock_value_held,INDEX_FUND_percent_of_traded_shares,...,min_monthly_return,max_monthly_return,year_month,lag_max_monthly_return,lag_min_monthly_return,monthly_std_dev,count_returns,skewness,DATE_monthly_std_dev,amihud_ratio
0,2009-12-31,0MW4EUR.xbo^K15,0,0,3.073419,0.1655,101.889071,4.7543,3.486577,0.1713,...,,,2009-12,,,,,,,
1,2010-01-31,0MW4EUR.xbo^K15,0,0,3.036434,0.1629,98.684809,4.5919,3.486577,0.1713,...,,,2010-01,,,,,,,
2,2010-02-28,0MW4EUR.xbo^K15,0,0,3.036434,0.1629,97.881422,4.5459,3.184193,0.1540,...,,,2010-02,,,,,,,
3,2010-03-31,0MW4EUR.xbo^K15,0,0,2.824438,0.1533,106.506665,4.8721,3.270862,0.1546,...,,,2010-03,,,,,,,
4,2010-04-30,0MW4EUR.xbo^K15,0,0,2.850560,0.1539,96.433853,4.3504,5.939661,0.2685,...,,,2010-04,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
196219,2023-07-31,ZURN.S,1,1,2052.542650,3.1067,11438.503679,17.3140,3199.414024,4.8451,...,-0.015040,0.016244,2023-07,0.008277,-0.016848,0.006852,22.0,-0.564718,2023-08-31,0.000072
196220,2023-08-31,ZURN.S,1,1,2039.457827,3.2077,11315.144184,17.7587,3195.650139,5.0353,...,-0.016848,0.008277,2023-08,0.018290,-0.010187,0.006906,21.0,-0.309496,2023-09-30,0.000063
196221,2023-09-30,ZURN.S,1,1,1996.051120,3.1388,11221.412654,17.5905,3117.850540,4.9036,...,-0.010187,0.018290,2023-09,0.014813,-0.021812,0.009319,22.0,0.211551,2023-10-31,0.000060
196222,2023-10-31,ZURN.S,1,1,2101.553892,3.2116,11505.237587,17.6709,3250.279289,4.9704,...,-0.021812,0.014813,2023-10,0.018180,-0.006356,0.006680,22.0,1.221607,2023-11-30,0.000082


### past 12-month return

In [5]:
import pandas as pd

def main_past_12_months_return(subset = "none"):
    if subset == "none": panel_path = "/Users/jonathanzeh/Library/CloudStorage/OneDrive-Personal/BA_Thesis/BA_coding/datasets/eikon_data/variable_data/monthly_panel_v1.csv"
    if subset == "europe": panel_path = "/Users/jonathanzeh/Library/CloudStorage/OneDrive-Personal/BA_Thesis/BA_coding/datasets/eikon_data/variable_data/monthly_panel_v1_europe_van.csv"

    df = pd.read_csv(panel_path)

    # Convert 'date' to datetime format and ensure it is sorted
    df['date'] = pd.to_datetime(df['date'])
    df.sort_values(by=['stock_RIC', 'date'], inplace=True)

    # Calculate the price 12 months ago by shifting the prices within each stock group
    df['price_12_months_ago'] = df.groupby('stock_RIC')['price'].shift(12)

    # Calculate the 12-month cumulative return
    df['cumulative_return_12m'] = (df['price'] - df['price_12_months_ago']) / df['price_12_months_ago']

    # Drop the helper column if it's no longer needed
    df.drop('price_12_months_ago', axis=1, inplace=True)
    df = df.loc[:, ~df.columns.str.contains('Unnamed')]

    df.to_csv(panel_path, index=False)

#main_past_12_months_return(subset = "none")
main_past_12_months_return(subset = "europe")

### bid ask spread

In [6]:
import pandas as pd

def calculate_monthly_bid_ask_spread(subset = "none"):

    ################################## import stock_level_data
    stock_level_path = "/Users/jonathanzeh/Library/CloudStorage/OneDrive-Personal/BA_Thesis/BA_coding/datasets/eikon_data/stock_level_data/stock_level_data.csv"
    df = pd.read_csv(stock_level_path)

    df = df[["stock_RIC", "date", "bid_price", "ask_price"]].copy(deep = True)
    # Ensure the date column is in datetime format
    df['date'] = pd.to_datetime(df['date'])

    # Calculate the daily bid-ask spread and the midpoint
    df['bid_ask_spread'] = df['ask_price'] - df['bid_price']
    df['midpoint'] = (df['ask_price'] + df['bid_price']) / 2
    df['relative_spread'] = df['bid_ask_spread'] / df['midpoint']

    # Set date as index
    df.set_index('date', inplace=True)

    # Calculate the monthly average of the relative spread
    monthly_spread = df.groupby([pd.Grouper(freq='M'), 'stock_RIC'])['relative_spread'].mean().reset_index()

    # Rename columns for clarity
    monthly_spread.rename(columns={'relative_spread': 'monthly_relative_spread'}, inplace=True)

    ################################## import monthly_panel
    if subset == "none": panel_path = "/Users/jonathanzeh/Library/CloudStorage/OneDrive-Personal/BA_Thesis/BA_coding/datasets/eikon_data/variable_data/monthly_panel_v1.csv"
    if subset == "europe": panel_path = "/Users/jonathanzeh/Library/CloudStorage/OneDrive-Personal/BA_Thesis/BA_coding/datasets/eikon_data/variable_data/monthly_panel_v1_europe_van.csv"

    panel_df = pd.read_csv(panel_path)
    panel_df['date'] = pd.to_datetime(panel_df['date'])

    ################################## merge monthly_spread with monthly_panel
    monthly_spread['year_month'] = monthly_spread['date'].dt.to_period('M')
    panel_df['year_month'] = panel_df['date'].dt.to_period('M')
    monthly_spread = monthly_spread.drop(columns=['date'])

    merged_df = pd.merge(panel_df, monthly_spread, on = ["stock_RIC", "year_month"], how = "left")
    merged_df.to_csv(panel_path, index=False)
    display(merged_df)

#calculate_monthly_bid_ask_spread(subset = "none")
calculate_monthly_bid_ask_spread(subset = "europe")

Unnamed: 0,date,stock_RIC,index_member_600,index_member_50,stock_value_held,percent_of_traded_shares,FUND_stock_value_held,FUND_percent_of_traded_shares,INDEX_FUND_stock_value_held,INDEX_FUND_percent_of_traded_shares,...,year_month,lag_max_monthly_return,lag_min_monthly_return,monthly_std_dev,count_returns,skewness,DATE_monthly_std_dev,amihud_ratio,cumulative_return_12m,monthly_relative_spread
0,2009-12-31,0MW4EUR.xbo^K15,0,0,3.073419,0.1655,101.889071,4.7543,3.486577,0.1713,...,2009-12,,,,,,,,,
1,2010-01-31,0MW4EUR.xbo^K15,0,0,3.036434,0.1629,98.684809,4.5919,3.486577,0.1713,...,2010-01,,,,,,,,,
2,2010-02-28,0MW4EUR.xbo^K15,0,0,3.036434,0.1629,97.881422,4.5459,3.184193,0.1540,...,2010-02,,,,,,,,,
3,2010-03-31,0MW4EUR.xbo^K15,0,0,2.824438,0.1533,106.506665,4.8721,3.270862,0.1546,...,2010-03,,,,,,,,,
4,2010-04-30,0MW4EUR.xbo^K15,0,0,2.850560,0.1539,96.433853,4.3504,5.939661,0.2685,...,2010-04,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
196219,2023-07-31,ZURN.S,1,1,2052.542650,3.1067,11438.503679,17.3140,3199.414024,4.8451,...,2023-07,0.008277,-0.016848,0.006852,22.0,-0.564718,2023-08-31,0.000072,0.028789,0.000240
196220,2023-08-31,ZURN.S,1,1,2039.457827,3.2077,11315.144184,17.7587,3195.650139,5.0353,...,2023-08,0.018290,-0.010187,0.006906,21.0,-0.309496,2023-09-30,0.000063,-0.018435,0.000242
196221,2023-09-30,ZURN.S,1,1,1996.051120,3.1388,11221.412654,17.5905,3117.850540,4.9036,...,2023-09,0.014813,-0.021812,0.009319,22.0,0.211551,2023-10-31,0.000060,0.063450,0.000238
196222,2023-10-31,ZURN.S,1,1,2101.553892,3.2116,11505.237587,17.6709,3250.279289,4.9704,...,2023-10,0.018180,-0.006356,0.006680,22.0,1.221607,2023-11-30,0.000082,0.035510,0.000247


### lagged monthly standard deviation

In [7]:
import pandas as pd

def add_lagged_monthly_std_dev(subset = "none"):
    if subset == "none": path = "/Users/jonathanzeh/Library/CloudStorage/OneDrive-Personal/BA_Thesis/BA_coding/datasets/eikon_data/variable_data/monthly_panel_v1.csv"
    if subset == "europe": path = "/Users/jonathanzeh/Library/CloudStorage/OneDrive-Personal/BA_Thesis/BA_coding/datasets/eikon_data/variable_data/monthly_panel_v1_europe_van.csv"

    df = pd.read_csv(path, index_col=False)
    # Ensure the date column is in datetime format
    df['date'] = pd.to_datetime(df['date'])

    # Sort the DataFrame by stock_RIC and date
    df = df.sort_values(by=['stock_RIC', 'date'])

    # Group by stock_RIC and shift the monthly_std_dev column by one period to get the lagged value
    df['T1_monthly_std_dev'] = df.groupby('stock_RIC')['monthly_std_dev'].shift(1)
    df['T2_monthly_std_dev'] = df.groupby('stock_RIC')['monthly_std_dev'].shift(2)
    df['T3_monthly_std_dev'] = df.groupby('stock_RIC')['monthly_std_dev'].shift(3)

    df.to_csv(path, index=False)

#add_lagged_monthly_std_dev(subset = "none")
add_lagged_monthly_std_dev(subset = "europe")

# Add country and industry fixed effects

### merge country and industry variables to quarterly data

In [8]:
import pandas as pd

def add_country_fe(subset = "none"):
    if subset == "none": path = "/Users/jonathanzeh/Library/CloudStorage/OneDrive-Personal/BA_Thesis/BA_coding/datasets/eikon_data/variable_data/monthly_panel_v1.csv"
    if subset == "europe": path = "/Users/jonathanzeh/Library/CloudStorage/OneDrive-Personal/BA_Thesis/BA_coding/datasets/eikon_data/variable_data/monthly_panel_v1_europe_van.csv"
    df = pd.read_csv(path, index_col=False)

    df_fixed_vars = pd.read_csv("/Users/jonathanzeh/Library/CloudStorage/OneDrive-Personal/BA_Thesis/BA_coding/datasets/eikon_data/stock_level_data/stock_level_data_countries.csv", index_col=False)

    merged_df = pd.merge(df, df_fixed_vars, on='stock_RIC', how='left')
    display(merged_df)

    merged_df.to_csv(path, index=False)

#add_country_fe(subset = "none")
add_country_fe(subset = "europe")

Unnamed: 0,date,stock_RIC,index_member_600,index_member_50,stock_value_held,percent_of_traded_shares,FUND_stock_value_held,FUND_percent_of_traded_shares,INDEX_FUND_stock_value_held,INDEX_FUND_percent_of_traded_shares,...,amihud_ratio,cumulative_return_12m,monthly_relative_spread,T1_monthly_std_dev,T2_monthly_std_dev,T3_monthly_std_dev,headquarters_country,exchange_country,business_sector,economic_sector
0,2009-12-31,0MW4EUR.xbo^K15,0,0,3.073419,0.1655,101.889071,4.7543,3.486577,0.1713,...,,,,,,,Italy,Italy,Automobiles & Auto Parts,Consumer Cyclicals
1,2010-01-31,0MW4EUR.xbo^K15,0,0,3.036434,0.1629,98.684809,4.5919,3.486577,0.1713,...,,,,,,,Italy,Italy,Automobiles & Auto Parts,Consumer Cyclicals
2,2010-02-28,0MW4EUR.xbo^K15,0,0,3.036434,0.1629,97.881422,4.5459,3.184193,0.1540,...,,,,,,,Italy,Italy,Automobiles & Auto Parts,Consumer Cyclicals
3,2010-03-31,0MW4EUR.xbo^K15,0,0,2.824438,0.1533,106.506665,4.8721,3.270862,0.1546,...,,,,,,,Italy,Italy,Automobiles & Auto Parts,Consumer Cyclicals
4,2010-04-30,0MW4EUR.xbo^K15,0,0,2.850560,0.1539,96.433853,4.3504,5.939661,0.2685,...,,,,,,,Italy,Italy,Automobiles & Auto Parts,Consumer Cyclicals
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
196219,2023-07-31,ZURN.S,1,1,2052.542650,3.1067,11438.503679,17.3140,3199.414024,4.8451,...,0.000072,0.028789,0.000240,0.008561,0.006432,0.012204,Switzerland,Switzerland,Insurance,Financials
196220,2023-08-31,ZURN.S,1,1,2039.457827,3.2077,11315.144184,17.7587,3195.650139,5.0353,...,0.000063,-0.018435,0.000242,0.006852,0.008561,0.006432,Switzerland,Switzerland,Insurance,Financials
196221,2023-09-30,ZURN.S,1,1,1996.051120,3.1388,11221.412654,17.5905,3117.850540,4.9036,...,0.000060,0.063450,0.000238,0.006906,0.006852,0.008561,Switzerland,Switzerland,Insurance,Financials
196222,2023-10-31,ZURN.S,1,1,2101.553892,3.2116,11505.237587,17.6709,3250.279289,4.9704,...,0.000082,0.035510,0.000247,0.009319,0.006906,0.006852,Switzerland,Switzerland,Insurance,Financials
