# Abnormal Returns 

In [2]:
#imports 
from script import preprocess_isin, to_date # useful functions
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from os import path
from sklearn.linear_model import LinearRegression
%matplotlib inline


In [3]:
# Raw data and results location, can be changed if needed
data_folder = "data/" 
result_folder = "results/"

**We first read and preprocess the data containing price index, main equities, and dates of targets release**

In [4]:
## Price index and main equities
equities = preprocess_isin( path.join( data_folder, 'equities.csv' ), decimal = ',' , low_memory = False )
indices= pd.read_csv( path.join( data_folder, 'indices.csv' )).set_index('Date')

## Making the two dataframe compatible :
indices.drop(['10/10/2013'] , axis = 0, inplace = True)
equities.drop(['11/10/2023', '12/10/2023', '13/10/2023'], axis = 1, inplace = True)
equities.drop(equities.columns[0], axis = 1, inplace = True) #colonne inutile
equities.set_index('ISIN', inplace = True)

## Target release dates
dates = preprocess_isin('data/dates.csv', sep =';') # filtrage selon les ISIN
dates.set_index('ISIN', inplace = True)

## grouping by countries : (country code = 2 first letters of the ISIN)
country = [ str[:2] for str in dates.index]
country_df = pd.DataFrame( {'Country' : country}, index = dates.index)
df = pd.concat([country_df, dates], axis = 1 )
groups = df.groupby('Country')


**We create a dictionary to link the countries with their corresponding market price index**

In [5]:
## Creation of the dictionary
#print(indices.columns)

dict = {'FR' : 'SBF 120 - PRICE INDEX'  , 'US' : 'S&P 500 VALUE - PRICE INDEX' , 
        'DE' : 'DAX PERFORMANCE - PRICE INDEX', 'AU' : 'S&P/ASX 200 ', 'AT' :'Austrian Traded Index',
        'BE' : 'BEL 20 Index', 'CA': 'S&P/TSX 60 INDEX', 'CL': 'S&P/CLX IGPA (CLP) Index' ,
        'CO': 'Colombia Price Index', 'DK': 'OMXC 25 CAP Index', 'ES': 'IBEX 35 Index', 
        'FI': 'OMX Helsinki', 'GR': 'Athex' , 'HU': 'Budapest SE Index', 'IE':'ISEQ' , 
        'IS': 'OMX Iceland', 'JP': 'TOPIX ' , 'LT': 'OMX Vilnius', 'LU' : 'LuxX Index', 'MX': 'S&P/Bmv Ipc', 
        'NO': 'Oslo Stock Exchange Equity Index' , 'NL': 'Amsterdam', 'PT': ' Lisbon PSI Index', 
        'SE': 'OMX Stockholm', 'CH': 'Swiss Market Index', 'GB': 'FTSE 100 Index', 'CZ': 'PX Prague', 
        'TR': 'BIST 100 Index' , 'CN': 'Shanghai SE 50' }

## We so have a quick access to all the data, exemple for France :
France = groups.get_group('FR') # contains ISIN and correspondaing announcement dates
indice_fr = indices[dict['FR']] # contains the SBF120 price index

### Abnormal Returns calculation

The considered abnormal returns are abnormal returns 2 days after the release of the emission mitigation plan, that way the release is fully integrated in the market price. The average return of two days is : $ r = \sqrt{(1+r_1)(1+r_2)} - 1 $, where $r_1$ is the return of day 1, $r_2$ the return of day 2.


In [12]:
# We define a function that calculates the abnormal return for a firm i, 
# with t days used for the linear regression, given a dataframe country 
# that contains the firms and the annoucement date, and given the 
# corresponding price index.

def abnormal_return(country, indice_country, i, t):
    isin = country.index[i]
    date_annonce = to_date(dates.loc[isin])
    date = np.array(indice_country.index , dtype = str)

    # testing if equity price is available
    try :
        equities.loc[isin]
    except :
        return 'price not available'
    
    Y_company = equities.loc[isin][1:] 
    Y_indice = indice_country
    filter = np.logical_and( Y_company.notna(), Y_indice.notna() ) # dates where index and equity price is available
    date = to_date(np.array(date[filter]))
    Y_company = np.array(Y_company[filter])
    Y_indice = np.array(Y_indice[filter])

    #daily returns
    R_company = (Y_company[1:] - Y_company[:-1])/Y_company[:-1] #daily returns of company 
    R_indice = (Y_indice[1:] - Y_indice[:-1])/Y_indice[:-1] #daily returns of index
    
    # testing if data is available on the studied period
    try :  
       indice = np.where( date >= date_annonce)[0][0] # n° of date
    except :
        return 'no data on time period'
      
    # anormal return
    #Data for linear regression
    y = R_company[indice - t - 1 :indice] # t days before release
    x = R_indice[indice - t - 1 :indice].reshape((-1,1))
    # linear model
    model_linReg = LinearRegression()

    #testing if data exists for linear regression 
    try:
        model_linReg.fit(x, y)
    except :
        return 'no data for regression'
    
    #training model
    x_test_1 = R_indice[indice].reshape((-1,1)) #  1 day after
    x_test_2 = R_indice[indice + 1].reshape((-1,1)) #  1 day after
    prediction_1 = model_linReg.predict(x_test_1)[0]
    prediction_2 = model_linReg.predict(x_test_2)[0]
    observation_1 = R_company[indice]
    observation_2 = R_company[indice + 1]

    real = ((1 + observation_1)*(1 + observation_2))**0.5 -1# observed average return on 2 days
    expected = ((1 + prediction_1)*(1 + prediction_2))**0.5 -1# predicted average return on 2 days
    ar = (real - expected)*100
    return ar

### Exemple 
abnormal_return(France, indice_fr, 0, 10)

0.8563179548335298

In [13]:
### for all the company i of a country:

def Abnormal_return(country, indice_country, t):
    AR =[]
    for i in range(len(country.index)):
        ar = abnormal_return(country, indice_country, i , t)
        AR.append(ar) 
    return AR

### Exemple 

AR = Abnormal_return(France, indice_fr, 10)
print(AR)


[0.8563179548335298, 0.4412259630084181, -1.1054438509813136, -2.4213402306923926, 'no data on time period', -2.202218874031725, 4.428259982832383, -0.1480840274261208, 0.34890485147942574, -0.7883544618190186, 1.543994069292387, -0.9092018662578472, 0.5508313572824219, 'price not available', 'price not available', -2.4603552655876215, -2.0829750727954455, 5.329368329701289, 0.1571584559332484, 0.0073560201930722435, -0.6383280853220263, -0.7238477917109076, -1.5063951763796823, -0.5311935637322796, -0.3325600347872659, -1.9073413012661966, -1.8078273458346228, -0.06849141255231839, -0.9737352665076471, 1.6509574801722993, 2.195183013486024, -1.6873889889101323, 1.5796322612453895, 'no data on time period', 1.2947084147528876, -0.9061114901372957, 'price not available', -0.20756868552408436, -0.5562698780301023, 0.1695530985651783, 0.7428218577461232, 'price not available', 1.8915021397998943, -1.9121560667791027, 0.15409974216252786, 2.0689789242333223, -0.17394980559981565, 0.0406664

In [14]:
# We want a dataframe with the abnormal returns and regression scores, for one country, with different time length
# used to calculate the linear regression ( here from 50 to 250 days, with a 5 days increment). It correspond to one 
# trading week and one trading year 

def Result_DataFrame( country, indice ):
    isin = np.array(country.index)
    df_resultat = pd.DataFrame( {'ISIN' : isin})
    df_resultat.set_index('ISIN', inplace = True)
    # Calculation of different anormal returns
    for i in range(5,16):
       ar  = Abnormal_return(country, indice, 2*i)
       df_temporaire = pd.DataFrame({ 'ISIN' : isin , f"AR_{2*i}" : ar})
       df_resultat = df_resultat.join( df_temporaire.set_index('ISIN'))
    return df_resultat

# Exemple (Germany here)
Country = groups.get_group('DE')
Indice = indices[dict['DE']]

df_resultat = Result_DataFrame(Country, Indice)
print( df_resultat)  

                               AR_10                   AR_12  \
ISIN                                                           
DE000A3CNK42  no data for regression  no data for regression   
DE000A1EWWW0               -1.112967               -1.325766   
DE0005103006               -0.491469                -0.63757   
DEXXXXXALDI0     price not available     price not available   
DE000A0LD2U1               -0.208004                0.241476   
DE0006766504               -1.686372               -1.333622   
DE000BAY0017               -0.528099               -0.389467   
DE0005200000               -0.027719               -0.012938   
DE0005224901     price not available     price not available   
DE0005261606                -1.43739               -1.396541   
DE0005190003               -1.611353               -1.516872   
DE0005403901               -0.592834                -0.68265   
DE0005439004                -0.85923               -0.988177   
DE0008220005     price not available    

In [15]:
# Now we have a dataframe for each country
# We will create and merge all these dataframe to have a big database the will be used for the heterogeneity study
# very long to run, the results have already been saved in the 'results/AR.csv' file
result = pd.DataFrame()

for country in dict :
    Country = groups.get_group(country)
    Indice = indices[dict[country]]
    df_temporaire = Result_DataFrame( Country, Indice )
    result = pd.concat([result, df_temporaire])

print(result)


                               AR_10                   AR_12  \
ISIN                                                           
FR0000120404                0.856318                0.185147   
FR0000120073                0.441226                0.529231   
FR0010220475               -1.105444               -0.773695   
FR0000051732                -2.42134                -2.65986   
FR0000121857  no data on time period  no data on time period   
...                              ...                     ...   
TRAARCLK91H5                0.723833                0.690854   
TRABRISA91E3                0.278867                0.837426   
TRAVESTL91H6               -0.855821               -0.357735   
CNE0000019B0                0.335875                0.304375   
CNE100000NC4               -3.350383               -2.548008   

                               AR_14                   AR_16  \
ISIN                                                           
FR0000120404                0.200557   

In [9]:
# To save the result :
result.to_csv( path.join( result_folder , 'AR.csv'))