# Anormal Returns 

In [2]:
#imports 
from script import preprocess_isin, to_date # useful functions
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from os import path
from sklearn.linear_model import LinearRegression
%matplotlib inline


In [3]:
# Raw data and results location, can be changed if needed
data_folder = "data/" 
result_folder = "results/"

**We first read and preprocess the data containing price index, main equities, and dates of targets release**

In [4]:
## Price index and main equities
equities = preprocess_isin( path.join( data_folder, 'equities.csv' ), decimal = ',' , low_memory = False )
indices= pd.read_csv( path.join( data_folder, 'indices.csv' )).set_index('Date')

## Making the two dataframe compatible :
indices.drop(['10/10/2013'] , axis = 0, inplace = True)
equities.drop(['11/10/2023', '12/10/2023', '13/10/2023'], axis = 1, inplace = True)
equities.drop(equities.columns[0], axis = 1, inplace = True) #colonne inutile
equities.set_index('ISIN', inplace = True)

## Target release dates
dates = preprocess_isin('data/dates.csv', sep =';') # filtrage selon les ISIN
dates.set_index('ISIN', inplace = True)

## grouping by countries : (country code = 2 first letters of the ISIN)
country = [ str[:2] for str in dates.index]
country_df = pd.DataFrame( {'Country' : country}, index = dates.index)
df = pd.concat([country_df, dates], axis = 1 )
groups = df.groupby('Country')


**We create a dictionary to link the countries with their corresponding market price index**

In [5]:
## Creation of the dictionary
#print(indices.columns)

dict = {'FR' : 'SBF 120 - PRICE INDEX'  , 'US' : 'S&P 500 VALUE - PRICE INDEX' , 
        'DE' : 'DAX PERFORMANCE - PRICE INDEX', 'AU' : 'S&P/ASX 200 ', 'AT' :'Austrian Traded Index',
        'BE' : 'BEL 20 Index', 'CA': 'S&P/TSX 60 INDEX', 'CL': 'S&P/CLX IGPA (CLP) Index' ,
        'CO': 'Colombia Price Index', 'DK': 'OMXC 25 CAP Index', 'ES': 'IBEX 35 Index', 
        'FI': 'OMX Helsinki', 'GR': 'Athex' , 'HU': 'Budapest SE Index', 'IE':'ISEQ' , 
        'IS': 'OMX Iceland', 'JP': 'TOPIX ' , 'LT': 'OMX Vilnius', 'LU' : 'LuxX Index', 'MX': 'S&P/Bmv Ipc', 
        'NO': 'Oslo Stock Exchange Equity Index' , 'NL': 'Amsterdam', 'PT': ' Lisbon PSI Index', 
        'SE': 'OMX Stockholm', 'CH': 'Swiss Market Index', 'GB': 'FTSE 100 Index', 'CZ': 'PX Prague', 
        'TR': 'BIST 100 Index' , 'CN': 'Shanghai SE 50' }

## We so have a quick access to all the data, exemple for France :
France = groups.get_group('FR') # contains ISIN and correspondaing announcement dates
indice_fr = indices[dict['FR']] # contains the SBF120 price index

### Anormal Returns calculation

The considered anormal returns are anormal returns 2 days after the release of the emission mitigation plan, that way the release is fully integrated in the market price.

In [6]:
# We define a function that calculates the anormal return for a firm i, 
# with t days used for the linear regression, given a dataframe country 
# that contains the firms and the annoucement date, and given the 
# corresponding price index.

def anormal_return(country, indice_country, i, t):
    isin = country.index[i]
    date_annonce = to_date(dates.loc[isin])
    date = np.array(indice_country.index , dtype = str)

    # testing if equity price is available
    try :
        equities.loc[isin]
    except :
        return 'price not available'
    
    Y_company = equities.loc[isin][1:] 
    Y_indice = indice_country
    filter = np.logical_and( Y_company.notna(), Y_indice.notna() ) # dates where index and equity price is available
    date = to_date(np.array(date[filter]))
    Y_company = np.array(Y_company[filter])
    Y_indice = np.array(Y_indice[filter])

    R_company = (Y_company[1:] - Y_company[:-1])/Y_company[:-1] #daily returns of company 
    R_indice = (Y_indice[1:] - Y_indice[:-1])/Y_indice[:-1] #daily returns of index
    
    # testing if data is available on the studied period
    try :  
       indice = np.where( date >= date_annonce)[0][0] # n° of date
    except :
        return 'no data on time period'
      
    # anormal return

    #Data for linear regression
    y = R_company[indice - t - 1 :indice] # t days before release
    x = R_indice[indice - t - 1 :indice].reshape((-1,1))
    # linear model
    model_linReg = LinearRegression()

    #testing if data exists for linear regression 
    try:
        model_linReg.fit(x, y)
    except :
        return 'no data for regression'
    
    #training model
    x_test = R_indice[indice + 1].reshape((-1,1)) #  2 days after
    prediction = model_linReg.predict(x_test)[0]
    observation = R_company[indice + 1]
    
    real = ( observation- Y_company[indice -1]) # observed return
    expected = ( prediction - Y_company[indice -1]) # predicted return
    ar = (real - expected)*100
    return ar

### Exemple 
anormal_return(France, indice_fr, 0, 10)

2.170937237546866

In [10]:
### for all the company i of a country:

def Anormal_return(country, indice_country, t):
    AR =[]
    for i in range(len(country.index)):
        ar = anormal_return(country, indice_country, i , t)
        AR.append(ar) 
    return AR

### Exemple 

AR = Anormal_return(France, indice_fr, 10)
print(AR)


[-0.7418456838105075, 0.2532644930185821, -0.5610508747610155, -5.591985376014049, 'no data on time period', -7.111919871585035, 3.272502071292753, 4.015449339707236, -1.6423349939792289, 0.42912054290819257, 5.553723545308023, -3.7671556607449697, 0.43733718862819776, 'price not available', 'price not available', -1.3307477665918583, 0.10896771774545179, 4.1981478018069, -1.9776580701337028, 1.3297393456820572, 6.307656855180883, 0.8164401142290547, -1.687487199903791, 3.968610871448272, 0.5517770404319919, -7.698855079053072, -1.1917872276387798, 0.3157952046413032, 2.5550324581449537, -0.15896850051507466, -3.646367771649006, 1.377777239185158, -3.2870009265493008, 'no data on time period', 0.34039087239100907, -1.1927403655349091, 'price not available', 1.538874421827634, -9.336953188129767, -3.9315958858032283, 6.100390049673964, 'price not available', 8.485747798303926, 3.335936981305228, 1.4145886182599405, 0.7739760901230073, -1.3152798975641335, -0.4495007565843525, 9.49709726

In [11]:
# We want a dataframe with the anormal returns and regression scores, for one country, with different time length
# used to calculate the linear regression ( here from 10 to 30 days, with a 2 days increment) 

def Result_DataFrame( country, indice ):
    isin = np.array(country.index)
    df_resultat = pd.DataFrame( {'ISIN' : isin})
    df_resultat.set_index('ISIN', inplace = True)
    # Calculation of different anormal returns
    for i in range(5,16):
       ar  = Anormal_return(country, indice, 2*i)
       df_temporaire = pd.DataFrame({ 'ISIN' : isin , f"AR_{2*i}" : ar})
       df_resultat = df_resultat.join( df_temporaire.set_index('ISIN'))
    return df_resultat

# Exemple (Germany here)
Country = groups.get_group('DE')
Indice = indices[dict['DE']]

df_resultat = Result_DataFrame(Country, Indice)
print( df_resultat)  

                               AR_10                   AR_12  \
ISIN                                                           
DE000A3CNK42  no data for regression  no data for regression   
DE000A1EWWW0               -2.874119               -2.771363   
DE0005103006                2.330756                4.051738   
DEXXXXXALDI0     price not available     price not available   
DE000A0LD2U1               -1.049978               -1.695403   
DE0006766504               -2.494441                0.953505   
DE000BAY0017               -0.276918               -0.682244   
DE0005200000                1.167348                1.447576   
DE0005224901     price not available     price not available   
DE0005261606               -0.769914               -0.696883   
DE0005190003               -6.592446               -6.261938   
DE0005403901                0.709649                 0.57729   
DE0005439004               -0.501042               -0.409427   
DE0008220005     price not available    

In [12]:
# Now we have a dataframe for each country
# We will create and merge all these dataframe to have a big database the will be used for the heterogeneity study
# very long to run, the results have already been saved in the 'results/AR.csv' file
result = pd.DataFrame()

for country in dict :
    Country = groups.get_group(country)
    Indice = indices[dict[country]]
    df_temporaire = Result_DataFrame( Country, Indice )
    result = pd.concat([result, df_temporaire])

print(result)


                               AR_10                   AR_12  \
ISIN                                                           
FR0000120404               -0.741846               -1.917785   
FR0000120073                0.253264                0.159228   
FR0010220475               -0.561051               -0.459996   
FR0000051732               -5.591985               -5.947152   
FR0000121857  no data on time period  no data on time period   
...                              ...                     ...   
TRAARCLK91H5               -0.258637               -0.168936   
TRABRISA91E3               -3.477419               -2.790915   
TRAVESTL91H6                0.332757                0.308619   
CNE0000019B0               -0.560285               -0.453999   
CNE100000NC4               -5.479184               -5.443405   

                               AR_14                   AR_16  \
ISIN                                                           
FR0000120404               -3.759092   

In [9]:
# To save the result :
result.to_csv( path.join( result_folder , 'AR.csv'))