# Statistics Tutorial – TD1

## 1 – Data

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm

# Define the main path
mainpath = r"C:/Users/Emile/Desktop/cours/ESILV/A4/S2/SPECIALISATION_IF/ML&AM/TD1/"
# Load the data
all_assets_prices = pd.read_csv(f"{mainpath}DataForStatsTutorial1.csv", index_col="Dates", sep=";", parse_dates=["Dates"], dayfirst=True)
#all_assets_prices = pd.read_csv("DataForStatsTutorial1.csv", index_col=0, sep=";", parse_dates=True, dayfirst=True)


## 2 – Data Exploration

In [None]:
# Plot the data
plt.figure(figsize=(10, 5))
plt.plot(all_assets_prices[['DMEquitiesEUR', 'DMEquitiesUSD']])
plt.yscale("log")
plt.title("ES50 and SP500")
plt.xlabel("Date")
plt.ylabel("Price")
plt.legend(['DMEquitiesEUR', 'DMEquitiesUSD'])
plt.show()

# Plot a subset of the data
subset = all_assets_prices.loc['2019':'2022', ['DMEquitiesEUR', 'DMEquitiesUSD']]
plt.figure(figsize=(10, 5))
plt.plot(subset)
plt.title("ES50 and SP500 - From 2019 to 2022")
plt.xlabel("Date")
plt.ylabel("Price")
plt.legend(['DMEquitiesEUR', 'DMEquitiesUSD'])
plt.show()

# Change frequency
all_assets_prices_daily = all_assets_prices
all_assets_prices_weekly = all_assets_prices.resample('W').last()
all_assets_prices_monthly = all_assets_prices.resample('M').last()

print("Daily Prices Head:")
print(all_assets_prices_daily.head())
print("\nWeekly Prices Head:")
print(all_assets_prices_weekly.head())
print("\nMonthly Prices Head:")
print(all_assets_prices_monthly.head())

def compute_return(price):
    ret = price / price.shift(1) - 1
    return ret

prices_sp = all_assets_prices_daily['DMEquitiesUSD']
returns_sp = compute_return(prices_sp)
returns_sp_check = prices_sp.pct_change()

print("\nReturns SP Head:")
print(returns_sp.head())


## 3 – Usual Statistics

In [None]:
def compute_cagr(price, ann_multiple=252):
    n = len(price)
    cagr = (price.iloc[-1] / price.iloc[0]) ** (ann_multiple / n) - 1
    return cagr

cagr_sp = compute_cagr(all_assets_prices['DMEquitiesUSD'])
print(f'CAGR of SP 500 is: {round(cagr_sp * 100, 2)}%')

def compute_vol(price, ann_multiple=252):
    ret = price / price.shift(-1) - 1
    n = len(price)
    mu = ret.mean()
    sigma_daily = np.sqrt((ret - mu).pow(2).sum() / (n - 1))
    sigma = np.sqrt(252) * sigma_daily
    return sigma

vol_sp = compute_vol(all_assets_prices['DMEquitiesUSD'])
print(f'Volatility of SP 500 is: {round(vol_sp * 100, 2)}%')

sp_returns = all_assets_prices['DMEquitiesUSD'].pct_change()
vol_sp_check = np.sqrt(252)*sp_returns.std()

def compute_dd_np(price):
    price = price.values
    drawdown = price / np.maximum.accumulate(price) - 1
    return drawdown

def compute_dd_pd(df,series):
    series_to_retain = df[series]
    out_df = pd.DataFrame()
    out_df[series] = series_to_retain
    out_df['Cumulative'] = out_df / out_df.iloc[0]
    out_df['Max'] = out_df['Cumulative'].cummax()
    out_df['Drawdown'] = (out_df['Cumulative'] - out_df['Max']) / out_df['Max']
    return out_df

dd_np = compute_dd_np(all_assets_prices['DMEquitiesUSD'])
dd_df = compute_dd_pd(all_assets_prices,'DMEquitiesUSD')

plt.figure(figsize=(10, 5))
plt.plot(dd_df.Drawdown, linewidth=2)
plt.title("SP500 Drawdown")
plt.xlabel("Date")
plt.ylabel("Drawdown")
plt.show()

mdd = np.min(dd_np)
print(f"Max Drawdown of SP 500 = {round(mdd * 100, 2)}%")
print(f"Max Drawdown of SP 500 = {round(dd_df.Drawdown.min() * 100, 2)}%")

def compute_sr(price, ret_without_risk=0.00, ann_multiple=252):
    cagr = compute_cagr(price, ann_multiple)
    vol = compute_vol(price, ann_multiple)
    sr = (cagr - ret_without_risk) / vol
    return sr

rf = 0.02
sr = compute_sr(all_assets_prices['DMEquitiesUSD'], rf, 252)
print(f"Sharpe Ratio of SP 500 = {round(sr, 2)}")

prix_sp_subset1 = all_assets_prices.loc["2001-01-01":"2003-12-31", "DMEquitiesUSD"]
prix_sp_subset2 = all_assets_prices.loc["2001-04-01":"2004-03-31", "DMEquitiesUSD"]
prix_sp_subset3 = all_assets_prices.loc["2001-07-01":"2004-06-30", "DMEquitiesUSD"] 

print(f"Sharpe Ratio of SP500 Index (2000-2007) = {round(compute_sr(prix_sp_subset1, rf, 252), 2)}")
print(f"Sharpe Ratio of SP500 Index (2008-2009) = {round(compute_sr(prix_sp_subset2, rf, 252), 2)}")
print(f"Sharpe Ratio of SP500 Index (2010-2022) = {round(compute_sr(prix_sp_subset3, rf, 252), 2)}")

rolling_sr = all_assets_prices['DMEquitiesUSD'].rolling(window=252).apply(compute_sr, raw=False)
plt.figure(figsize=(10, 5))
plt.plot(rolling_sr)
plt.title("Rolling Sharpe Ratio")
plt.xlabel("Date")
plt.ylabel("Sharpe Ratio")
plt.show()

expanding_sr = all_assets_prices['DMEquitiesUSD'].expanding().apply(compute_sr, raw=False)
expanding_sr[~np.isfinite(expanding_sr)] = 0
plt.figure(figsize=(10, 5))
plt.plot(expanding_sr)
plt.title("Expanding Sharpe Ratio")
plt.xlabel("Date")
plt.ylabel("Sharpe Ratio")
plt.show()

expanding_sr = all_assets_prices['DMEquitiesUSD'].expanding().apply(lambda x: compute_sr(x), raw=False)
expanding_sr[~np.isfinite(expanding_sr)] = 0

plt.figure(figsize=(10, 5))
plt.plot(expanding_sr)
plt.title("Expanding Sharpe Ratio")
plt.xlabel("Date")
plt.ylabel("Sharpe Ratio")
plt.show()
