In [None]:
#Install the necessary packages

!pip install yfinance
!pip install matplotlib==3.5.3
!pip install powerlaw

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting yfinance
  Downloading yfinance-0.2.4-py2.py3-none-any.whl (51 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.4/51.4 KB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting html5lib>=1.1
  Downloading html5lib-1.1-py2.py3-none-any.whl (112 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m112.2/112.2 KB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
Collecting requests>=2.26
  Downloading requests-2.28.2-py3-none-any.whl (62 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.8/62.8 KB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
Collecting beautifulsoup4>=4.11.1
  Downloading beautifulsoup4-4.11.1-py3-none-any.whl (128 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m128.2/128.2 KB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting frozendict>=2.3.4
  Downloading frozendict-2.3.4-cp38-cp38-

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting powerlaw
  Downloading powerlaw-1.5-py3-none-any.whl (24 kB)
Installing collected packages: powerlaw
Successfully installed powerlaw-1.5


In [None]:
#Standard packages
import numpy as np
import pandas as pd

#Dates
from datetime import datetime, timedelta

#Finance packages
import yfinance as yf

#Statistics
from scipy.stats import skew, kurtosis
import statsmodels.tsa.api as smt
import statsmodels.api as sm

#Plotting packages
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D

from matplotlib import rcParams

rcParams["font.size"] = 20
rcParams["axes.labelsize"] = 30

rcParams["xtick.labelsize"] = 16
rcParams["ytick.labelsize"] = 16

rcParams["figure.figsize"] = (8,6)

# Exercise 1. Autocorrelation and correlation time of financial time-series

The autocorrelation of a time series for a given lag $s$ is given by

\begin{equation}
\rho(s)=\frac{\left\langle\left(x(t)-\langle x\rangle_T\right)\left(x(t+s)-\langle x\rangle_T\right)\right\rangle_T}{\sigma^2}
\end{equation}

As you have seen in the lectures, the autocorrelation of the log returns decay very fast. To be able to properly see the decay we need to use high frequency data, which is not easily accessible. To make all this clear, we will use both low and high frequency data and plot the autocorrelation function.

1. Download low and high frequency data for the S&P500 index. 

In [None]:
df_LF = yf.download("^GSPC", start=datetime(1910, 1, 1), end=datetime.today(), period="1d", progress=False)

start = datetime.today() - timedelta(30)
end = start + timedelta(7)

df_HF = yf.download("^GSPC", start=start, end=end, interval="1m",
                     progress=False)

for i in range(3):

  start = end + timedelta(1)
  end = start + timedelta(7)

  df_new = yf.download("^GSPC", start=start, end=end, interval="1m",
                  progress=False)

  df_HF = pd.concat([df_HF, df_new], ignore_index=False)

2. Compute the autocorrelation function of the log returns of the Adjusted Close price.

**Indications:** Use 100 lags

**Clue 1:** Although the formula can be easily coded using some loops, it is infeasible to implement it like this in Phyton (but not in other programming languages like Julia, C or Fortran). Fortunately, we can make use of pandas **autocorr** built-in method (surely written in C in the end) to efficiently compute the autocorrelation function of a time series at any desired lag.

*Given a pandas Series, s.autocorr(k) computes the Pearson correlation between the Series and its k-shifted self.*

**Clue 2:** *The column of a dataframe is a pandas Series.*

In [None]:
df_LF["LogRet"] = #CODE
df_HF["LogRet"] = #CODE

df_LF = df_LF.dropna()
df_HF = df_HF.dropna()

lags = np.arange(0, 40, 1)

acor_LF = #CODE
acor_HF = #CODE

In [None]:
fig, ax = plt.subplot_mosaic("""AB""", figsize=(8*2, 6))

ax["A"].scatter(lags, acor_LF)

ax["A"].set_xlabel("Days")
ax["A"].set_ylabel("ACF")

ax["A"].set_title("LF data")

ax["B"].scatter(lags, acor_HF)

ax["B"].set_xlabel("Minutes")
ax["B"].set_ylabel("ACF")

ax["B"].set_title("HF data")

ax["B"].set_yscale("log")

plt.subplots_adjust(wspace=0.3)

# Exercise 2. Power Spectrum of financial time-series

The power spectrum of a time series indicates the intensity of the signal for each frequency $\omega$, it can be computed as the Fourier transform of the time-series' autocovariance, $C(s)$, 

\begin{equation}
P(\omega)=\int_{-\infty}^{\infty} C(s) e^{-i \omega s} d s
\end{equation}

In practice, it can be easily computed using the **scipy.signal.periodogram** function (https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.periodogram.html).

In the lectures you have seen that the power spectrum of the logarithm of the prices is well described by the functional form

\begin{equation}
P(\omega)\sim \omega^{-2}
\end{equation}

Can you show it empirically with the previous data?

In [None]:
from scipy import signal
  
#CODE

# Exercise 3. Moving average and volatility

The moving average and moving volatility are mesures of these quantities over fixed intervals, which are usually named the "window" over which the measures are taken. These can be useful for smoothing the data for visual analysis or even for quantitative analysis. The moving average and moving volatility can be defined as follows

* Moving average

\begin{equation}
\mu_M(j)=\frac{1}{M}\sum_{i=j-M+1}^jX(i)
\end{equation}

* Moving volatility

\begin{equation}
\sigma^2_M(j)=\frac{1}{M-1}\sum_{i=j-M+1}^j(X(i)-\mu_M(j))^2
\end{equation}

Indeed, we can define the moving measure for any observable $\mathcal{L}$ as

\begin{equation}
\mathcal{L}_M(j)\left\{X\right\}=\sum_{i=j-M+1}^j\mathcal{L}\left\{X(i)\right\}
\end{equation}

**1. Compute the moving average of the Adjusted Close price for the previous low frequency data with different window sizes (e.g. 30, 250, 500). Plot it together with the original time series (Adj Close price).**

**Clue:** *The pandas.Series.rolling(window).statistic() method computes the given statistic over the given window size* through all the series.*

*Example: df["Col1"].rolling(100).mean()*

In [None]:
window1 = 30
window2 = 252
window3 = 504

df_LF["MA"] = #CODE
df_LF["MA2"] = #CODE
df_LF["MA3"] = #CODE

plt.plot(df_LF["Adj Close"], color="k", lw=3)
plt.plot(df_LF["MA"], lw=3)
plt.plot(df_LF["MA2"], lw=3)
plt.plot(df_LF["MA3"], lw=3)

plt.ylabel("S&P500 value")

plt.yscale("log")

**2. Compute the moving volatility of the Log Returns for the Adj Close price for the previous low frequency data with different window sizes.**

In [None]:
window1 = 50
window2 = 252
window3 = 504

df_LF["MV"] = #CODE
df_LF["MV2"] = #CODE
df_LF["MV3"] = #CODE

plt.ylabel("S&P500 volatility")

plt.plot(df_LF["MV"], lw=3)
plt.plot(df_LF["MV2"], lw=3)
plt.plot(df_LF["MV3"], lw=3)

plt.yscale("log")

**3. Plot the distribution of the moving volatility for the HF data with a window size of 10, 50 and 100 days**

* Use numpy.histogram() method to compute the histogram (it returns hist_values and bin_edges), so that cou can plot it as a scatter plot.

* Use logarithmic bins.

* Plot the histograms for each window size in the same plot

**Clue:** *numpy.logspace(start, stop, num) returns numbers spaced evenly on a log scale.*


In [None]:
window_1 = 10
window_2 = 50
window_3 = 100

V_T_1 = #CODE

V_T_2 = #CODE

V_T_3 = #CODE

bins = np.logspace(-6, -2, 80)

bins_1, edges_1 = np.histogram(V_T_1, bins=bins, density=True);
bins_2, edges_2 = np.histogram(V_T_2, bins=bins, density=True);
bins_3, edges_3 = np.histogram(V_T_3, bins=bins, density=True);

fig, ax = plt.subplot_mosaic("""A""", figsize=(8, 6))

ax["A"].scatter(edges_1[1:], bins_1, color="C0", s=80, marker="o", label="Window=%.2f" % window_1)
ax["A"].scatter(edges_2[1:], bins_2, color="C1", s=80, marker="s", label="Window=%.2f" % window_2)
ax["A"].scatter(edges_3[1:], bins_3, color="C2", s=80, marker="^", label="Window=%.2f" % window_3)

ax["A"].set_ylabel(r"Probability $P(V_T(t))$")
ax["A"].set_xlabel(r"Volatility $V_T(t)$")

ax["A"].set_yscale("log")
ax["A"].set_xscale("log")

ax["A"].legend();

**4. Fit the tail of the distribution**

Although it could be fitted using the Ordinary Least Squares (OLS) method, this is not the most convinient way of fitting powerlaws. Statistical methods have been developed to perform convinient fits to heavy-tailed distributions. In Python, you can find the nice **powerlaw** package (https://pypi.org/project/powerlaw/).

In [None]:
import powerlaw

V_T_1 = #CODE

V_T_2 = #CODE

V_T_3 = #CODE

result1 = powerlaw.Fit(V_T_1, xmin=1e-6)
result2 = powerlaw.Fit(V_T_2, xmin=1e-6)
result3 = powerlaw.Fit(V_T_3, xmin=1e-6)

fit = powerlaw.Fit(V_T_1, xmin=1e-4)

alpha = fit.power_law.alpha

fig = result1.plot_pdf(ls="", marker="o", ms=12)
result2.plot_pdf(ax=fig, ls="", marker="o", ms=12)
result3.plot_pdf(ax=fig, ls="", marker="o", ms=12)

fig.set_ylabel(r"Probability $P(V_T(t))$")
fig.set_xlabel(r"Volatility $V_T(t)$")

fit.power_law.plot_pdf(ax=fig, lw=5, color="k", label=r"$\alpha=%.2f$" % fit.alpha)

plt.legend()

In the lectures you saw that the exponent of the powerlaw was near to 4, which does not coincide with our result of $\alpha\approx2$. This is because in that paper (https://journals.aps.org/pre/pdf/10.1103/PhysRevE.60.1390), the volatility is defined as the moving average of the absolute value of log returns,

\begin{equation}
V_T(t)=\frac{1}{n}\sum_{t'=t}^{t+n-1}\left|R(t')\right|
\end{equation}
 
 where $T=n\Delta t$ and $R(t)$ is the log return at time $t$.

**Repeat the previous exercise with this definition of the moving volatility. What do you obtain?**

In [None]:
window_1 = 10
window_2 = 50
window_3 = 100

V_T_1 = #CODE

V_T_2 = #CODE

V_T_3 = #CODE

result1 = powerlaw.Fit(V_T_1, xmin=1e-6)
result2 = powerlaw.Fit(V_T_2, xmin=1e-6)
result3 = powerlaw.Fit(V_T_3, xmin=1e-6)

fit = powerlaw.Fit(V_T_1, xmin=1e-2)

alpha = fit.power_law.alpha

fig = result1.plot_pdf(ls="", marker="o", ms=12)
result2.plot_pdf(ax=fig, ls="", marker="o", ms=12)
result3.plot_pdf(ax=fig, ls="", marker="o", ms=12)

fit.power_law.plot_pdf(ax=fig, lw=5, color="k", label=r"$\alpha=%.2f$" % fit.alpha)

fig.set_ylabel(r"Probability $P(V_T(t))$")
fig.set_xlabel(r"Volatility $V_T(t)$")

plt.legend()

# Exercise 4. Autocorrelation and power spectrum of volatility

As you have seen in the lectures, the ACF and power spectrum of the moving volatility decay slowly, showing long-range correlations. It has been empirically found that the ACF and power spectrum can be approximated by:

* ACF

\begin{equation}
ACF(\tau)\sim\frac{1}{1+t^{\gamma}}
\end{equation}

* Power spectrum

\begin{equation}
P(\omega)\sim\omega^{\gamma-1}
\end{equation}

with $\gamma\approx0.3$.

**Compute the autocorrelation and power spectrum of the LF data.**

* Use 1000 lags

* Plot the expected behavior of the ACF and power spectrum using the formulas above. For the power spectrum use a proportionality constant of 1e-8.

In [None]:
#CODE