In [185]:
import pandas as pd
import numpy as np
import plotly.express as px
from scipy.stats import t, norm
from plotly.subplots import make_subplots

In [186]:
full_df = pd.read_csv('../../data/full_data/full_df.csv')
full_df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Name
0,2009-12-31,7.611786,7.619643,7.52,7.526071,6.444381,352410800,AAPL
1,2010-01-04,7.6225,7.660714,7.585,7.643214,6.544686,493729600,AAPL
2,2010-01-05,7.664286,7.699643,7.616071,7.656429,6.556002,601904800,AAPL
3,2010-01-06,7.656429,7.686786,7.526786,7.534643,6.451719,552160000,AAPL
4,2010-01-07,7.5625,7.571429,7.466071,7.520714,6.439793,477131200,AAPL


In [187]:
import datetime

In [188]:
aapl_df = full_df[(full_df['Name'] == 'AAPL') &\
     (full_df['Date'] >= '2015-01-01')].copy()

aapl_df.tail()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Name
3050,2022-02-11,172.330002,173.080002,168.039993,168.639999,168.639999,98566000,AAPL
3051,2022-02-14,167.369995,169.580002,166.559998,168.880005,168.880005,86062800,AAPL
3052,2022-02-15,170.970001,172.949997,170.25,172.789993,172.789993,64286300,AAPL
3053,2022-02-16,171.850006,173.339996,170.050003,172.550003,172.550003,61067300,AAPL
3054,2022-02-17,171.850006,171.720001,170.75,171.514999,171.514999,5299200,AAPL


In [189]:
aapl_df['log_return'] = np.log(aapl_df['Adj Close']) - np.log(aapl_df['Adj Close'].shift(1))
aapl_df['pct change'] = aapl_df['Adj Close'].pct_change()
aapl_df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Name,log_return,pct change
1259,2015-01-02,27.8475,27.860001,26.8375,27.3325,24.71451,212818400,AAPL,,
1260,2015-01-05,27.0725,27.1625,26.352501,26.5625,24.018259,257142000,AAPL,-0.028576,-0.028172
1261,2015-01-06,26.635,26.8575,26.157499,26.565001,24.020523,263188400,AAPL,9.4e-05,9.4e-05
1262,2015-01-07,26.799999,27.049999,26.674999,26.9375,24.357342,160423600,AAPL,0.013925,0.014022
1263,2015-01-08,27.307501,28.0375,27.174999,27.9725,25.293205,237458000,AAPL,0.037702,0.038422


In [190]:
fig = px.histogram(aapl_df['log_return'])
fig.update_layout(title = 'Distribution of log retutn values',
                template = 'plotly_dark', 
                width=1000, 
                height=500)
fig.show()

In [191]:
aapl_df.shape

(1796, 10)

In [192]:
x_ls = np.linspace(
    aapl_df['log_return'].min(),
    aapl_df['log_return'].max(),
    100
)
params = t.fit(aapl_df['log_return'].dropna())

In [193]:
df, loc, scale = params

In [194]:
y_ls = t.pdf(x_ls, df, loc, scale)

In [197]:
import plotly.graph_objects as go
fig = go.Figure()
fig.add_trace(go.Histogram(x =aapl_df['log_return'], nbinsx = 1000))
fig.add_trace(go.Scatter(x = x_ls, y = y_ls, mode = 'lines'))
fig.update_layout(template = 'plotly_dark', 
                width=1000, 
                height=500)
fig.show()