# Abnormal Returns 

In [1]:
#imports 
from script import preprocess_isin, to_date # useful functions
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from os import path
from sklearn.linear_model import LinearRegression
%matplotlib inline


In [2]:
# Raw data and results location, can be changed if needed
data_folder = "data/" 
result_folder = "results/"

**We first read and preprocess the data containing price index, main equities, and dates of targets release**

In [3]:
## Price index and main equities
equities = preprocess_isin( path.join( data_folder, 'equities.csv' ), decimal = ',' , low_memory = False )
indices= pd.read_csv( path.join( data_folder, 'indices.csv' )).set_index('Date')

## Making the two dataframe compatible :
indices.drop(['10/10/2013'] , axis = 0, inplace = True)
equities.drop(['11/10/2023', '12/10/2023', '13/10/2023'], axis = 1, inplace = True)
equities.drop(equities.columns[0], axis = 1, inplace = True) #colonne inutile
equities.set_index('ISIN', inplace = True)

## Target release dates
dates = preprocess_isin('data/dates.csv', sep =';') # filtrage selon les ISIN
dates.set_index('ISIN', inplace = True)

## grouping by countries : (country code = 2 first letters of the ISIN)
country = [ str[:2] for str in dates.index]
country_df = pd.DataFrame( {'Country' : country}, index = dates.index)
df = pd.concat([country_df, dates], axis = 1 )
groups = df.groupby('Country')


**We create a dictionary to link the countries with their corresponding market price index**

In [4]:
## Creation of the dictionary
#print(indices.columns)

dict = {'FR' : 'SBF 120 - PRICE INDEX'  , 'US' : 'S&P 500 VALUE - PRICE INDEX' , 
        'DE' : 'DAX PERFORMANCE - PRICE INDEX', 'AU' : 'S&P/ASX 200 ', 'AT' :'Austrian Traded Index',
        'BE' : 'BEL 20 Index', 'CA': 'S&P/TSX 60 INDEX', 'CL': 'S&P/CLX IGPA (CLP) Index' ,
        'CO': 'Colombia Price Index', 'DK': 'OMXC 25 CAP Index', 'ES': 'IBEX 35 Index', 
        'FI': 'OMX Helsinki', 'GR': 'Athex' , 'HU': 'Budapest SE Index', 'IE':'ISEQ' , 
        'IS': 'OMX Iceland', 'JP': 'TOPIX ' , 'LT': 'OMX Vilnius', 'LU' : 'LuxX Index', 'MX': 'S&P/Bmv Ipc', 
        'NO': 'Oslo Stock Exchange Equity Index' , 'NL': 'Amsterdam', 'PT': ' Lisbon PSI Index', 
        'SE': 'OMX Stockholm', 'CH': 'Swiss Market Index', 'GB': 'FTSE 100 Index', 'CZ': 'PX Prague', 
        'TR': 'BIST 100 Index' , 'CN': 'Shanghai SE 50' }

## We so have a quick access to all the data, exemple for France :
France = groups.get_group('FR') # contains ISIN and correspondaing announcement dates
indice_fr = indices[dict['FR']] # contains the SBF120 price index

### Abnormal Returns calculation

The considered abnormal returns are abnormal returns 2 days after the release of the emission mitigation plan, that way the release is fully integrated in the market price. The average return of two days is : $ r = \sqrt{(1+r_1)(1+r_2)} - 1 $, where $r_1$ is the return of day 1, $r_2$ the return of day 2.


In [5]:
# We define a function that calculates the abnormal return for a firm i, 
# with t days used for the linear regression, given a dataframe country 
# that contains the firms and the annoucement date, and given the 
# corresponding price index.

def abnormal_return(country, indice_country, i, t):
    isin = country.index[i]
    date_annonce = to_date(dates.loc[isin])
    date = np.array(indice_country.index , dtype = str)

    # testing if equity price is available
    try :
        equities.loc[isin]
    except :
        return 'price not available'
    
    Y_company = equities.loc[isin][1:] 
    Y_indice = indice_country
    filter = np.logical_and( Y_company.notna(), Y_indice.notna() ) # dates where index and equity price is available
    date = to_date(np.array(date[filter]))
    Y_company = np.array(Y_company[filter])
    Y_indice = np.array(Y_indice[filter])

    #daily returns
    R_company = (Y_company[1:] - Y_company[:-1])/Y_company[:-1] #daily returns of company 
    R_indice = (Y_indice[1:] - Y_indice[:-1])/Y_indice[:-1] #daily returns of index
    
    # testing if data is available on the studied period
    try :  
       indice = np.where( date >= date_annonce)[0][0] # n° of date
    except :
        return 'no data on time period'
      
    # anormal return
    #Data for linear regression
    y = R_company[indice - t - 1 :indice] # t days before release
    x = R_indice[indice - t - 1 :indice].reshape((-1,1))
    # linear model
    model_linReg = LinearRegression()

    #testing if data exists for linear regression 
    try:
        model_linReg.fit(x, y)
    except :
        return 'no data for regression'
    
    #training model
    x_test_1 = R_indice[indice].reshape((-1,1)) #  1 day after
    x_test_2 = R_indice[indice + 1].reshape((-1,1)) #  1 day after
    prediction_1 = model_linReg.predict(x_test_1)[0]
    prediction_2 = model_linReg.predict(x_test_2)[0]
    observation_1 = R_company[indice]
    observation_2 = R_company[indice + 1]

    real = ((1 + observation_1)*(1 + observation_2))**0.5 -1# observed average return on 2 days
    expected = ((1 + prediction_1)*(1 + prediction_2))**0.5 -1# predicted average return on 2 days
    ar = (real - expected)*100
    return ar

### Exemple 
abnormal_return(France, indice_fr, 0, 10)

0.8563179548335298

In [10]:
### for all the company i of a country:

def Abnormal_return(country, indice_country, t):
    AR =[]
    for i in range(len(country.index)):
        ar = abnormal_return(country, indice_country, i , t)
        AR.append(ar) 
    return AR

### Exemple 

AR = Abnormal_return(France, indice_fr, 50)
print(AR)


[0.5185819665263214, 0.3808643212821927, -0.7975389197911187, -2.4466548842628444, 'no data on time period', -1.9879899893196717, 4.458781317533478, 0.46741164523065315, 0.7259450824253499, -2.0227808562953253, -1.2165220943794464, -0.4293491449965492, 0.5767948934560563, 'price not available', 'price not available', -2.340297219158971, -2.0749225892374934, 4.548994769261727, 1.1801611884125762, -0.006363819056276476, 0.24691860058845272, 0.2021897283524643, -1.3436087788742945, -0.2821541734617794, -0.5390173539358223, -1.1208280582240215, -1.3262067548538314, 0.364772458510465, -0.31768044969027187, -0.011105535851085868, 1.2210746535110895, -0.8705254882700575, 0.3943117190483125, 'no data on time period', 0.11920400113218932, -0.7793470736925934, 'price not available', 1.4708005852540063, -0.9598886612966595, -0.08580125664462424, 1.0489327180131847, 'price not available', 2.4973128193862104, -1.887635421700784, -0.017596282060994284, 2.2340813583960006, -0.6956990855683243, -1.032

In [11]:
# We want a dataframe with the abnormal returns and regression scores, for one country, with different time length
# used to calculate the linear regression ( here from 50 to 250 days, with a 50 days increment). It correspond to one 
# trading week and one trading year 

def Result_DataFrame( country, indice ):
    isin = np.array(country.index)
    df_resultat = pd.DataFrame( {'ISIN' : isin})
    df_resultat.set_index('ISIN', inplace = True)
    # Calculation of different anormal returns
    for i in range(1,6):
       ar  = Abnormal_return(country, indice, 50*i)
       df_temporaire = pd.DataFrame({ 'ISIN' : isin , f"AR_{50*i}" : ar})
       df_resultat = df_resultat.join( df_temporaire.set_index('ISIN'))
    return df_resultat

# Exemple (Germany here)
Country = groups.get_group('DE')
Indice = indices[dict['DE']]

df_resultat = Result_DataFrame(Country, Indice)
print( df_resultat)  

                               AR_50                  AR_100  \
ISIN                                                           
DE000A3CNK42  no data for regression  no data for regression   
DE000A1EWWW0               -1.168443               -1.176283   
DE0005103006                -0.38608               -0.086822   
DEXXXXXALDI0     price not available     price not available   
DE000A0LD2U1               -0.337131               -0.297551   
DE0006766504               -1.659549               -1.482205   
DE000BAY0017               -0.395258               -0.737986   
DE0005200000                0.230342                0.309226   
DE0005224901     price not available     price not available   
DE0005261606                -1.61778               -1.694569   
DE0005190003               -1.047834               -1.047772   
DE0005403901               -0.519744               -0.604645   
DE0005439004               -0.947918               -0.713934   
DE0008220005     price not available    

In [12]:
# Now we have a dataframe for each country
# We will create and merge all these dataframe to have a big database the will be used for the heterogeneity study
# very long to run, the results have already been saved in the 'results/AR.csv' file
result = pd.DataFrame()

for country in dict :
    Country = groups.get_group(country)
    Indice = indices[dict[country]]
    df_temporaire = Result_DataFrame( Country, Indice )
    result = pd.concat([result, df_temporaire])

print(result)


                               AR_50                  AR_100  \
ISIN                                                           
FR0000120404                0.518582                0.465339   
FR0000120073                0.380864                0.524448   
FR0010220475               -0.797539               -0.477403   
FR0000051732               -2.446655               -2.554238   
FR0000121857  no data on time period  no data on time period   
...                              ...                     ...   
TRAARCLK91H5                0.326661                0.329921   
TRABRISA91E3                1.234675                1.347348   
TRAVESTL91H6               -0.601994                 0.18265   
CNE0000019B0                0.180896                0.145385   
CNE100000NC4                -1.97536               -2.430862   

                              AR_150                  AR_200  \
ISIN                                                           
FR0000120404                0.573488   

In [9]:
# To save the result :
result.to_csv( path.join( result_folder , 'AR.csv'))