In [7]:
import numpy as np
import matplotlib.pyplot as plt
import math
import pandas as pd 
from scipy import stats

In [15]:
russel = pd.read_csv("RUSSEL2000.csv")
russel_2017_2019 = russel[(russel['Date'] >= '2017-01-01') & (russel['Date'] <= '2019-12-31')]
russel_2020_2022 = russel[(russel['Date'] >= '2020-01-01') & (russel['Date'] <= '2022-12-31')]

# Exporter les données filtrées vers de nouveaux fichiers CSV
russel_2017_2019.to_csv('russel_2017_2019.csv', index=False)
russel_2020_2022.to_csv('russel_2020_2022.csv', index=False)

russel_2017_2019['Returns'] = russel_2017_2019['Close'].pct_change() * 100 #méthode utilisée pour calculer le pourcentage de changement entre les lignes
russel_2020_2022['Returns'] = russel_2020_2022['Close'].pct_change() * 100

cac = pd.read_csv("CAC40.csv")
cac_2017_2019 = cac[(cac['Date'] >= '2017-01-01') & (cac['Date'] <= '2019-12-31')]
cac_2020_2022 = cac[(cac['Date'] >= '2020-01-01') & (cac['Date'] <= '2022-12-31')]

# Exporter les données filtrées vers de nouveaux fichiers CSV
cac_2017_2019.to_csv('cac_2017_2019.csv', index=False)
cac_2020_2022.to_csv('cac_2020_2022.csv', index=False)

cac_2017_2019['Returns'] = cac_2017_2019['Close'].pct_change() * 100
cac_2020_2022['Returns'] = cac_2020_2022['Close'].pct_change() * 100

#I. Supposons que il n'y a pas de dépendance linéaire entre les retours et qu'ils sont distribués normalement.
#calcul des moyennes et volatilités des returns
cac_mean_2017_2019, cac_mean_2020_2022 = np.mean(cac_2017_2019['Returns']), np.mean(cac_2020_2022['Returns'])
rus_mean_2017_2019, rus_mean_2020_2022 = np.mean(russel_2017_2019['Returns']), np.mean(russel_2020_2022['Returns'])

cac_volatility_2017_2019, cac_volatility_2020_2022= cac_2017_2019['Returns'].std(), cac_2020_2022['Returns'].std()
rus_volatility_2017_2019, rus_volatility_2020_2022= russel_2017_2019['Returns'].std(), russel_2020_2022['Returns'].std()

#data lengh
cac_n_2017_2019, cac_n_2020_2022 = len(cac_2017_2019['Returns']), len(cac_2020_2022['Returns'])
rus_n_2017_2019, rus_n_2020_2022 = len(russel_2017_2019['Returns']), len(russel_2020_2022['Returns'])

# t-test statistique between cac and russel : Hypothesis H0 = Equality of the means
t_stat_2017_2019 = (cac_mean_2017_2019 - rus_mean_2017_2019) / np.sqrt((cac_volatility_2017_2019**2/cac_n_2017_2019) + (rus_volatility_2017_2019**2/rus_n_2017_2019))
t_stat_2020_2022 = (cac_mean_2020_2022 - rus_mean_2020_2022) / np.sqrt((cac_volatility_2020_2022**2/cac_n_2020_2022) + (rus_volatility_2020_2022**2/rus_n_2020_2022))

# Freedom Degree
df_2017_2019 = cac_n_2017_2019 + rus_n_2017_2019 - 2
df_2020_2022 = cac_n_2020_2022 + rus_n_2020_2022 - 2

# p-value
p_value_2017_2019 = 2 * (1 - stats.t.cdf(np.abs(t_stat_2017_2019), df=df_2017_2019))
p_value_2020_2022 = 2 * (1 - stats.t.cdf(np.abs(t_stat_2020_2022), df=df_2020_2022))


#Print results
print("\nFor 2017-2019 period :")
print(f"Test Statistic: {t_stat_2017_2019}")
print(f"P-value: {p_value_2017_2019}")

print("\nFor 2020-2022 period:")
print(f"Test Statistic: {t_stat_2020_2022}")
print(f"P-value: {p_value_2020_2022}")

if p_value_2017_2019 < 0.05:
    print("There is a significant difference in volatility between cac and russel for the period 2017-2019.")
else:
    print("There is no significant difference in volatility between cac and russel for the period 2017-2019.")

if p_value_2020_2022 < 0.05:
    print("There is a significant difference in volatility between cac and russel for the period 2020-2022.")
else:
    print("There is no significant difference in volatility between cac and russel for the period 2020-2022.")

# t-test statistique between 2017/2019 and 2020/2022 : Hypothesis H0 = Equality of the means
t_stat_cac = (cac_mean_2017_2019 - cac_mean_2020_2022) / np.sqrt((cac_volatility_2017_2019**2/cac_n_2017_2019) + (cac_volatility_2020_2022**2/cac_n_2020_2022))
t_stat_rus = (rus_mean_2017_2019 - rus_mean_2020_2022) / np.sqrt((rus_volatility_2017_2019**2/rus_n_2017_2019) + (rus_volatility_2020_2022**2/rus_n_2020_2022))

# Degree of freedom
df_cac = cac_n_2017_2019 + cac_n_2020_2022 - 2
df_rus = rus_n_2017_2019 + rus_n_2020_2022 - 2

# p-value
p_value_cac = 2 * (1 - stats.t.cdf(np.abs(t_stat_cac), df=df_cac))
p_value_rus = 2 * (1 - stats.t.cdf(np.abs(t_stat_rus), df=df_rus))


#Print results
print("\n CAC before and after covid :")
print(f"Test Statistic: {t_stat_cac}")
print(f"P-value: {p_value_cac}")

print("\nRussel before and after covid :")
print(f"Test Statistic: {t_stat_rus}")
print(f"P-value: {p_value_rus}")

if p_value_cac < 0.05:
    print("There is a significant difference in volatility between before and after the covid for the cac.")
else:
    print("There is no significant difference in volatility between before and after the covid for the cac.")

if p_value_rus < 0.05:
    print("There is a significant difference in volatility between before and after the covid for the russel index.")
else:
    print("There is no significant difference in volatility between cac and russel for the russel index.")





For 2017-2019 period :
Test Statistic: -0.03925152567140213
P-value: 0.968695015063306

For 2020-2022 period:
Test Statistic: -0.08085788399898496
P-value: 0.9355655767956659
There is no significant difference in volatility between cac and russel for the period 2017-2019.
There is no significant difference in volatility between cac and russel for the period 2020-2022.

 CAC before and after covid :
Test Statistic: 0.147947774441521
P-value: 0.8824034028184164

Russel before and after covid :
Test Statistic: 0.04303226354222375
P-value: 0.9656815119735895
There is no significant difference in volatility between before and after the covid for the cac.
There is no significant difference in volatility between cac and russel for the russel index.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  russel_2017_2019['Returns'] = russel_2017_2019['Close'].pct_change() * 100 #méthode utilisée pour calculer le pourcentage de changement entre les lignes
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  russel_2020_2022['Returns'] = russel_2020_2022['Close'].pct_change() * 100
  cac_2017_2019['Returns'] = cac_2017_2019['Close'].pct_change() * 100
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://p

In [16]:
the cac.")
else:
    print("There is no significant difference in volatility between cac and russel for the period 2020-2022.")




SyntaxError: unterminated string literal (detected at line 1) (3092190311.py, line 1)