# Event Study

In [2]:
# Importing relevant packages

import pandas as pd
import datetime as dt
import csv
import os
import glob

In [4]:
# Define the folder containing the CSV files
folder_path = '/Users/jomarjordas/Documents/MSFIN299/MSFIN299-Research/_data/test_stockdata/stocks'
index_path = '/Users/jomarjordas/Documents/MSFIN299/MSFIN299-Research/_data/test_stockdata/PSEi.csv'
ar_dates = '/Users/jomarjordas/Documents/MSFIN299/MSFIN299-Research/_data/test_stockdata/ar_dates.csv'

In [6]:
# defining initializing functions

def add_index_column(file_path):
    """
    Adds an index column to a CSV file.

    Parameters:
    file_path (str): The path to the CSV file to modify.

    Returns:
    None. The function modifies the CSV file in place.

    Example:
    >>> add_index_column('/path/to/file.csv')
    """

    df = pd.read_csv(file_path)
    df.insert(0, 'index', range(1, len(df) + 1))
    df.to_csv(file_path, index=False)


In [7]:
# add index

for filename in os.listdir(folder_path):
    if filename.endswith('.csv'):
        file_path = os.path.join(folder_path, filename)
        add_index_column(file_path)

ValueError: cannot insert index, already exists

## Step 1: Calculating Returns

In [392]:
# defining return functions

def add_return(folder_path):
    '''
    A function that takes in a folder path containing stock data files,
    calculates the returns of the price, and adds a fourth column with the results
    to the same file, excluding rows with errors.

    Parameters:
    folder_path (str): The path to the folder containing the stock data files.

    Returns:
    None
    '''
    # Loop through all files in the folder
    for filename in os.listdir(folder_path):
        if filename.endswith('.csv'): # only consider csv files
            # Read in the data file
            filepath = os.path.join(folder_path, filename)
            data = pd.read_csv(filepath)

            # Convert 'price' column to numeric type
            data['price'] = pd.to_numeric(data['price'], errors='coerce')

            # Calculate returns and add as fourth column, excluding rows with errors
            try:
                returns = data['price'].pct_change()
                data['returns'] = returns
            except TypeError:
                print(f"Error calculating returns for file {filename}. Skipping rows with errors...")
                data['returns'] = pd.NA

            # Save the updated data to the same file
            data.to_csv(filepath, index=False)

def add_psei(psei_path, folder_path):
    """
    Add PSEi returns to each CSV file in the given folder, based on the date column.
    
    Parameters:
    psei_path (str): The file path of the PSEi CSV file, which contains the date and returns columns.
    folder_path (str): The path of the folder containing the CSV files to be updated.
    
    Returns:
    None: The function does not return anything, but it updates each CSV file in place.
    """
    # Load PSEi.csv into a DataFrame
    psei_df = pd.read_csv(psei_path, parse_dates=['date'])
    psei_df = psei_df[['date', 'returns']]
    psei_df = psei_df.rename(columns={'returns': 'psei_returns'})

    # Iterate over the files in folder_path
    for filename in os.listdir(folder_path):
        if filename.endswith('.csv'):
            file_path = os.path.join(folder_path, filename)
            df = pd.read_csv(file_path, parse_dates=['date'])

            # Merge with psei_df on date
            df = pd.merge(df, psei_df, on='date', how='left')

            # Write back to file
            df.to_csv(file_path, index=False)

### Calculating returns

In [393]:
add_return(folder_path)

In [394]:
add_psei(index_path,folder_path)

## Step 2: Detemining the Relevant Period

In [10]:
import pandas as pd
import numpy as np

# Get the dates from ar_dates.csv

# read the CSV file into a DataFrame
df = pd.read_csv(ar_dates)

# convert the date column to datetime format
df['date'] = pd.to_datetime(df['date'])

# group the data by ticker and date
result = df.groupby(['ticker', 'date']).size().reset_index(name='count')

# remove the third column from the result
result = result.iloc[:, :2]

# group the data by ticker and get the unique dates for each ticker
dates_per_ticker = df.groupby('ticker')['date'].unique()

# create an empty DataFrame to store the result
export_df = pd.DataFrame(columns=['ticker', 'dates'])

# loop through the tickers
for ticker, dates in dates_per_ticker.items():
    # convert the dates to strings and add them to the list
    dates = [np.datetime_as_string(date, unit='D') for date in dates]
    dates = sorted(set(dates))
    dates_string = ', '.join(dates)
    
    # append the ticker and dates to the export DataFrame
    export_df = export_df.append({'ticker': ticker, 'dates': dates_string}, ignore_index=True)

# export the DataFrame to a CSV file
export_df.to_csv('output.csv', index=False)


  export_df = export_df.append({'ticker': ticker, 'dates': dates_string}, ignore_index=True)
  export_df = export_df.append({'ticker': ticker, 'dates': dates_string}, ignore_index=True)
  export_df = export_df.append({'ticker': ticker, 'dates': dates_string}, ignore_index=True)
  export_df = export_df.append({'ticker': ticker, 'dates': dates_string}, ignore_index=True)
  export_df = export_df.append({'ticker': ticker, 'dates': dates_string}, ignore_index=True)
  export_df = export_df.append({'ticker': ticker, 'dates': dates_string}, ignore_index=True)
  export_df = export_df.append({'ticker': ticker, 'dates': dates_string}, ignore_index=True)
  export_df = export_df.append({'ticker': ticker, 'dates': dates_string}, ignore_index=True)
  export_df = export_df.append({'ticker': ticker, 'dates': dates_string}, ignore_index=True)
  export_df = export_df.append({'ticker': ticker, 'dates': dates_string}, ignore_index=True)
  export_df = export_df.append({'ticker': ticker, 'dates': dates_strin

In [5]:
import pandas as pd
import numpy as np

# Get the dates from ar_dates.csv

# read the CSV file into a DataFrame
df = pd.read_csv('ar_dates.csv')

# convert the date column to datetime format
df['date'] = pd.to_datetime(df['date'])

# group the data by ticker and date
result = df.groupby(['ticker', 'date']).size().reset_index(name='count')

# remove the third column from the result
result = result.iloc[:, :2]

# group the data by ticker and get the unique dates for each ticker
dates_per_ticker = df.groupby('ticker')['date'].unique()

# create an empty DataFrame to store the result
export_df = pd.DataFrame(columns=['ticker', 'dates'])

# loop through the tickers
for ticker, dates in dates_per_ticker.items():
    # convert the dates to strings and add them to the list
    dates = [np.datetime_as_string(date, unit='D') for date in dates]
    dates = sorted(set(dates))
    dates_string = ', '.join(dates)
    
    # append the ticker and dates to the export DataFrame
    export_df = export_df.append({'ticker': ticker, 'dates': dates_string}, ignore_index=True)

# export the DataFrame to a CSV file
export_df.to_csv('output.csv', index=False)


FileNotFoundError: [Errno 2] No such file or directory: 'ar_dates.csv'

In [412]:
# set the path to the folder containing the stock data
data_folder = "/Users/jomarjordas/Documents/MSFIN299/MSFIN299-Research/_data/test_stockdata/stocks/"

# get a list of all csv files in the folder
csv_files = [f for f in os.listdir(data_folder) if f.endswith('.csv')]

# read the CSV file into a DataFrame
df_dates = pd.read_csv(ar_dates)

# convert the date column to datetime format
df_dates['date'] = pd.to_datetime(df_dates['date'])

# group the data by ticker and get the unique dates for each ticker
dates_per_ticker = df_dates.groupby('ticker')['date'].unique()

# loop through each csv file
for csv_file in csv_files:
    # extract the ticker from the file name
    ticker = csv_file[:-4]
    
    # get the list of dates for the current ticker
    dates = dates_per_ticker[ticker]
    dates = pd.to_datetime(dates)

    # load the data for the ticker
    df_ticker = pd.read_csv(os.path.join(data_folder, csv_file))
    df_ticker['date'] = pd.to_datetime(df_ticker['date'])

    # loop through the dates for the ticker
    for i, date in enumerate(dates):
        # convert the date to datetime format
        date = pd.to_datetime(date)

        # check if the date exists in the df_ticker['date'] column
        if date not in df_ticker['date'].values:
            print(f"No data for ticker {ticker} and date {date}")
            continue
        
        # find the indexes of the dates
        index = (df_ticker.index[df_ticker['date'] == date][0])
        end_date_est = index - 3
        str_date_est = end_date_est - 59

        # find the indexes of the anticipation window
        end_date_ant = index - 1
        str_date_ant = end_date_ant - 1

        # find the indexes of the adjustment window
        end_date_adj = index + 2
        str_date_adj = end_date_adj - 1
        
        # print a message if there's not enough data for the estimation period
        if index < 63:
            print(f"Not enough data for ticker {ticker} and date {date}")
            continue
        
        end_date_estimate = df_ticker.iloc[end_date_est]['date']
        start_date_estimate = df_ticker.iloc[str_date_est]['date']
        
        # get the rows for the estimation period
        rows_est = df_ticker.iloc[str_date_est:end_date_est+1]

        end_date_anticipate = df_ticker.iloc[end_date_ant]['date']
        start_date_anticipate = df_ticker.iloc[str_date_ant]['date']

        # get the rows for the anticipation period
        rows_ant = df_ticker.iloc[end_date_est+1:end_date_adj]

        end_date_adjustment = df_ticker.iloc[end_date_adj]['date']
        start_date_adjustment = df_ticker.iloc[str_date_adj]['date']

        # get the rows for the adjustment period
        rows_adj = df_ticker.iloc[end_date_adj:end_date_adj+3]
        
        # get the data for the ticker and estimation period
        df_estimation = df_ticker[(df_ticker['date'] >= start_date_estimate) & (df_ticker['date'] <= end_date_estimate)]
        returns_est = df_estimation['returns']
        returns_PSEi_est = df_estimation['psei_returns']
        avg_returns_est = returns_est.mean()*100

        # get the data for the ticker and anticipation period
        df_anticipate = df_ticker[(df_ticker['date'] >= start_date_anticipate) & (df_ticker['date'] <= end_date_anticipate)]
        returns_ant = df_anticipate['returns']
        returns_PSEi_ant = df_anticipate['psei_returns']
        
        # get the data for the ticker and adjustment period
        df_adjustment = df_ticker[(df_ticker['date'] >= start_date_adjustment) & (df_ticker['date'] <= end_date_adjustment)]
        returns_adj = df_adjustment['returns']
        returns_PSEi_adj = df_adjustment['psei_returns']

        from scipy.stats import linregress
        slope, intercept, r_value, p_value, std_err = linregress(returns_PSEi_est, returns_est)
        alpha = intercept
        beta = slope

        # compute CAPM AR value
        returns_all = pd.concat([returns_est, returns_ant, returns_adj])
        psei_all = pd.concat([returns_PSEi_est, returns_PSEi_ant, returns_PSEi_adj])
        caaapm = returns_all - (alpha + beta * psei_all)

        # df_new = pd.DataFrame({'filename': [ticker + '.csv'],
        #                'ticker': [ticker],
        #                'date': [date],
        #                'ep_strt': [start_date_estimate],
        #                'ep_end': [end_date_estimate],
        #                'ant_w_strt': [start_date_anticipate],
        #                'ant_w_end': [end_date_anticipate],
        #                'adj_w_strt': [start_date_adjustment],
        #                'adj_w_end': [end_date_adjustment],
        #                'ave_ret': [avg_returns_est],
        #                'alpha': [alpha],
        #                'beta': [beta]})
        # df_new.to_csv(1 + '.csv', index=False)
        
        # print the results
        print(f"Ticker: {ticker}")
        print(f"Estimation Period: {start_date_estimate.strftime('%Y-%m-%d')} to {end_date_estimate.strftime('%Y-%m-%d')}")
        print(f"Anticipation Window: {start_date_anticipate.strftime('%Y-%m-%d')} to {end_date_anticipate.strftime('%Y-%m-%d')}")
        print(f"Date: {date.strftime('%Y-%m-%d')}")
        print(f"Adjustment Window: {start_date_adjustment.strftime('%Y-%m-%d')} to {end_date_adjustment.strftime('%Y-%m-%d')}")
        print(f"Average Return: {avg_returns_est:.2f}%")
        print(f"Alpha: {alpha}")
        print(f"Beta: {beta}")
        print("")


Ticker: MEG
Estimation Period: 2018-01-12 to 2018-04-11
Anticipation Window: 2018-04-12 to 2018-04-13
Date: 2018-04-16
Adjustment Window: 2018-04-17 to 2018-04-18
Average Return: -0.17%
Alpha: -0.0003378006555979846
Beta: 0.7869139114773713

Ticker: MEG
Estimation Period: 2019-01-15 to 2019-04-11
Anticipation Window: 2019-04-12 to 2019-04-15
Date: 2019-04-16
Adjustment Window: 2019-04-17 to 2019-04-22
Average Return: 0.21%
Alpha: 0.0021562974155547717
Beta: 0.576067605220615

Ticker: MEG
Estimation Period: 2020-01-14 to 2020-04-08
Anticipation Window: 2020-04-13 to 2020-04-14
Date: 2020-04-15
Adjustment Window: 2020-04-16 to 2020-04-17
Average Return: -0.65%
Alpha: nan
Beta: nan

Ticker: MEG
Estimation Period: 2021-02-08 to 2021-05-07
Anticipation Window: 2021-05-11 to 2021-05-12
Date: 2021-05-14
Adjustment Window: 2021-05-17 to 2021-05-18
Average Return: -0.45%
Alpha: -0.0027650232498358106
Beta: 0.9187590900463368

Ticker: MEG
Estimation Period: 2022-02-10 to 2022-05-11
Anticipation 

In [404]:
# print the relevant period
print(f"Ticker: {ticker}")
print(f"Date: {date.strftime('%Y-%m-%d')}")
print("Relevant Period")
print(rows_est)
print(rows_ant)
print(rows_adj)

Ticker: MPI
Date: 2022-04-19
Relevant Period
      index       date  price   returns  psei_returns       crm
1716   1717 2022-01-17   3.90 -0.010152     -0.005166       NaN
1717   1718 2022-01-18   3.91  0.002564      0.016630       NaN
1718   1719 2022-01-19   3.88 -0.007673     -0.011223       NaN
1719   1720 2022-01-20   3.88  0.000000     -0.003065       NaN
1720   1721 2022-01-21   3.89  0.002577      0.007492       NaN
1721   1722 2022-01-24   3.84 -0.012853     -0.005605       NaN
1722   1723 2022-01-25   3.81 -0.007812      0.004904       NaN
1723   1724 2022-01-26   3.79 -0.005249     -0.004747       NaN
1724   1725 2022-01-27   3.79  0.000000      0.002745       NaN
1725   1726 2022-01-28   3.78 -0.002639     -0.002963       NaN
1726   1727 2022-01-31   3.85  0.018519      0.015124       NaN
1727   1728 2022-02-02   3.90  0.012987      0.013223       NaN
1728   1729 2022-02-03   3.86 -0.010256     -0.010219       NaN
1729   1730 2022-02-04   3.81 -0.012953      0.009966      

## Step 3: Calculating Abnormal Returns

### Market Adjusted Model

In [None]:
# defining AR functions

def compute_mam(folder_path):
    """
    Computes the difference between 'returns' and 'psei_returns' columns of each file in a folder,
    and adds a new column named 'mam' with the result.
    
    Args:
    folder_path (str): Path to the folder containing the files.
    
    Returns:
    None
    """
    # Get a list of all files in the folder
    file_list = os.listdir(folder_path)
    
    # Loop over all files
    for file_name in file_list:
        
        # Read the file into a pandas DataFrame
        file_path = os.path.join(folder_path, file_name)
        df = pd.read_csv(file_path)
        
        # Compute the difference between 'returns' and 'psei_returns'
        mam = df['returns'] - df['psei_returns']
        
        # Add the new column to the DataFrame
        df['mam'] = mam
        
        # Save the updated DataFrame back to the file
        df.to_csv(file_path, index=False)


In [None]:
compute_mam(folder_path)

### CAPM

In [None]:
Logic:



# 

In [11]:
file_path = '/Users/jomarjordas/Documents/MSFIN299/MSFIN299-Research/_data/17a_exports/JGS_2020.txt'

# Read the file
with open(file_path, 'r') as file:
    content = file.read()

# Remove newlines
content = content.replace('\n', '')

# Write the updated content back to the file
with open(file_path, 'w') as file:
    file.write(content)


In [None]:
import re

file_path = '/Users/jomarjordas/Documents/MSFIN299/MSFIN299-Research/_data/17a_exports/JGS_2020.txt'

# Read the file
with open(file_path, 'r') as file:
    content = file.read()

# Replace '-xx-' with a space
content = re.sub(r'-\d{2}-', ' ', content)

# Write the updated content back to the file
with open(file_path, 'w') as file:
    file.write(content)
