In [144]:
import pandas as pd
import polars as pl
import numpy as np
import yfinance as yf
import scipy.stats as ss
from matplotlib import pyplot as plt
import seaborn

In [145]:
start="1990-01-01"
tickers=["SGLD.MI","SPY","AGGH.MI","XEON.MI"]
string=""
for i in range(len(tickers)):
    if i>0:
        string=string+" "
    string=string+tickers[i]
df=yf.download(string,start=start,interval="1mo")['Adj Close']
df=pl.from_pandas(df,include_index=True).drop_nulls()
df

[*********************100%%**********************]  4 of 4 completed


Date,AGGH.MI,SGLD.MI,SPY,XEON.MI
datetime[ns],f64,f64,f64,f64
2019-07-01 00:00:00,5.178,124.650002,275.790955,137.026001
2019-08-01 00:00:00,5.289,135.119995,271.17334,136.965698
2019-09-01 00:00:00,5.241,130.770004,275.178925,136.913498
2019-10-01 00:00:00,5.219,131.639999,282.560486,136.841599
2019-11-01 00:00:00,5.2,128.869995,292.788727,136.780197
2019-12-01 00:00:00,5.182,131.110001,299.821808,136.722794
2020-01-01 00:00:00,5.262,139.0,301.174164,136.655701
2020-02-01 00:00:00,5.308,140.490005,277.331543,136.606995
2020-03-01 00:00:00,5.235,142.100006,241.281937,136.536499
2020-04-01 00:00:00,5.306,150.880005,273.519714,136.005096


In [146]:
price_norm=df.clone()
price_var=df.select(pl.col("Date").alias("Prev Date"))
price_var=price_var.with_columns(pl.col("Prev Date").dt.offset_by("1mo").alias("Date"))
price_var=price_var.join(df,left_on="Prev Date",right_on="Date")
price_var=price_var.join(df,left_on="Date",right_on="Date")

columns = price_var.columns
columns.remove("Date")

In [147]:
for i in range(1,len(tickers)+1):
    ticker = tickers[i-1]
    first_price=price_norm.item(0,ticker)
    print(ticker,first_price)
    price_norm=price_norm.with_columns((100*pl.col(ticker)/first_price).alias(ticker))
    price_var=price_var.with_columns(((pl.col(ticker+"_right")-pl.col(ticker))/pl.col(ticker)).alias(ticker+"_var"))
price_var=price_var.drop(columns)

SGLD.MI 124.6500015258789
SPY 275.79095458984375
AGGH.MI 5.177999973297119
XEON.MI 137.0260009765625


In [148]:
price_norm.head()

Date,AGGH.MI,SGLD.MI,SPY,XEON.MI
datetime[ns],f64,f64,f64,f64
2019-07-01 00:00:00,100.0,100.0,100.0,100.0
2019-08-01 00:00:00,102.143686,108.399513,98.325683,99.955992
2019-09-01 00:00:00,101.21669,104.909749,99.778082,99.917897
2019-10-01 00:00:00,100.791809,105.6077,102.454588,99.865425
2019-11-01 00:00:00,100.424871,103.385474,106.163281,99.820615


In [149]:
price_var.head()

Date,SGLD.MI_var,SPY_var,AGGH.MI_var,XEON.MI_var
datetime[ns],f64,f64,f64,f64
2019-08-01 00:00:00,0.083995,-0.016743,0.021437,-0.00044
2019-09-01 00:00:00,-0.032194,0.014771,-0.009075,-0.000381
2019-10-01 00:00:00,0.006653,0.026825,-0.004198,-0.000525
2019-11-01 00:00:00,-0.021042,0.036198,-0.003641,-0.000449
2019-12-01 00:00:00,0.017382,0.024021,-0.003461,-0.00042


In [150]:
price_norm.plot(x="Date")

In [151]:
price_var.describe().drop("Date")

statistic,SGLD.MI_var,SPY_var,AGGH.MI_var,XEON.MI_var
str,f64,f64,f64,f64
"""count""",58.0,58.0,58.0,58.0
"""null_count""",0.0,0.0,0.0,0.0
"""mean""",0.009786,0.012448,-0.001663,0.000566
"""std""",0.035315,0.053751,0.014432,0.001652
"""min""",-0.078245,-0.129987,-0.034483,-0.003892
"""25%""",-0.008407,-0.02103,-0.010411,-0.000491
"""50%""",0.010719,0.024021,-0.002813,-0.000368
"""75%""",0.030421,0.042585,0.006101,0.002205
"""max""",0.086773,0.13361,0.033311,0.003819


In [152]:
price_var.plot.bar(x="Date")

In [153]:
price_corr=price_var.drop("Date")
price_corr.corr()

SGLD.MI_var,SPY_var,AGGH.MI_var,XEON.MI_var
f64,f64,f64,f64
1.0,-0.053245,0.253746,0.024624
-0.053245,1.0,0.555722,-0.011587
0.253746,0.555722,1.0,0.082342
0.024624,-0.011587,0.082342,1.0


In [154]:
# verify autocorrelation
lags = [6,12,18,24,30,36,42,48,54,60]
timeline = price_norm.select(pl.col(tickers[0])).to_numpy()
timeline = np.reshape(timeline,timeline.size)
#mean = price_var.mean()
#price_var = price_var-mean
corr=[]
for l in lags:
    arr1=nprice_var[:-l]
    arr2=nprice_var[l:]
    correl = np.corrcoef(arr1,arr2)[0][1]
    corr.append([l,correl])
corr_df=pl.DataFrame(corr)
corr_df=corr_df.rename({"column_0":"lag (months)","column_1":"correlation"})
corr_df.plot.bar(x="lag (months)",y="correlation",title="Autocorrelation",ylim=[-1,1])