In [42]:
import pandas as pd
import yfinance as yf
from yahoofinancials import YahooFinancials

SYMBOL = '^GSPC'

In [43]:
df = yf.download(SYMBOL, 
                      start='2000-01-01', 
                      end='2022-01-31', 
                      progress=False,
)
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1999-12-31,1464.469971,1472.420044,1458.189941,1469.25,1469.25,374050000
2000-01-03,1469.25,1478.0,1438.359985,1455.219971,1455.219971,931800000
2000-01-04,1455.219971,1455.219971,1397.430054,1399.420044,1399.420044,1009000000
2000-01-05,1399.420044,1413.27002,1377.680054,1402.109985,1402.109985,1085500000
2000-01-06,1402.109985,1411.900024,1392.099976,1403.449951,1403.449951,1092300000


In [44]:
def get_prices_of(month, year):
    return df.loc[str(year)+'-'+str(month)]['Close'] 

In [128]:
def deviation_of(month, year):
    '''  Return a new dataframe with the distance between the daily price 
         to the monthly mean ('deviation') and normaled value of it'''
    prices = get_prices_of(month, year)
    prices = pd.DataFrame(prices)
    mean = prices.mean()
    dist = (prices - mean)
    prices['deviation'] = dist
    prices['deviation_norm'] = dist/mean
    prices.attrs['month'] = month
    prices.attrs['year'] = year
    prices.index = prices.index.map(lambda date: date.day)
    prices.index.name = 'day'
    return prices

In [169]:
import plotly.express as px
import plotly.io as pio
pio.renderers.default = "notebook_connected"

prices = deviation_of(1, 2000)
fig = px.scatter(x=prices.index, y=prices['deviation_norm'], title='Price during 01-2000')
fig.show()


In [185]:
dev_df = pd.DataFrame(index=pd.Index(range(1,32),name='day'))
for year in range(2000, 2022):
    for month in range(1,13):
        month_dev = deviation_of(month, year).deviation_norm
        title = str(month_dev.attrs['month']) + '-' + str(month_dev.attrs['year'])
        month_dev.name = title
        month_dev = pd.DataFrame(month_dev)
        dev_df = pd.concat([dev_df, month_dev], axis=1)    

In [347]:
def squared_deviation_sum(sum, new_val, mean):
    sum = 0 if sum != sum else sum
    return sum if new_val != new_val else (new_val - mean)**2 + sum

import numpy as np
devs=dev_df.copy()
prices_len = len(devs.columns)
prices_len = 3## for debugging len(devs.columns)
devs = devs.iloc[:,:prices_len] # for debugging on smaller df
prices = devs.iloc[:,:prices_len]
devs['mean'] = prices.mean(axis=1)
devs['my_mean'] = prices.apply(axis=1, 
                               func=lambda row: row.sum() / row.count())
devs['sum_dev_sqr'] = prices.apply(
    lambda row, means:
        sum(row.dropna().apply(lambda x: (x - means[row.name])**2)),
    axis=1, result_type='reduce', means=devs['mean'])
devs['my_variance'] = devs['sum_dev_sqr'] / prices.count(axis=1)
devs['variance'] = np.nanvar(prices, axis=1)
devs['my_std_dev'] = np.sqrt(abs(devs['variance']))
devs['std_dev'] = prices.std(axis=1, ddof=0, numeric_only=True)
devs['std_dev_from_variance'] = np.sqrt(devs['variance'])
devs

Unnamed: 0_level_0,1-2000,2-2000,3-2000,mean,my_mean,sum_dev_sqr,my_variance,variance,my_std_dev,std_dev,std_dev_from_variance
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,,0.014692,-0.043699,-0.014503,-0.014503,0.001705,0.000852,0.000852,0.029195,0.029195,0.029195
2,,0.014577,-0.041917,-0.01367,-0.01367,0.001596,0.000798,0.000798,0.028247,0.028247,0.028247
3,0.020788,0.025989,-0.022911,0.007955,0.007955,0.001443,0.000481,0.000481,0.021929,0.021929,0.021929
4,-0.018354,0.025557,,0.003601,0.003601,0.000964,0.000482,0.000482,0.021956,0.021956,0.021956
5,-0.016467,,,-0.016467,-0.016467,0.0,0.0,0.0,0.0,0.0,0.0
6,-0.015527,,-0.035316,-0.025421,-0.025421,0.000196,9.8e-05,9.8e-05,0.009894,0.009894,0.009894
7,0.011142,0.025463,-0.060041,-0.007812,-0.007812,0.004194,0.001398,0.001398,0.037392,0.037392,0.037392
8,,0.038049,-0.052359,-0.007155,-0.007155,0.004087,0.002043,0.002043,0.045204,0.045204,0.045204
9,,0.016442,-0.028098,-0.005828,-0.005828,0.000992,0.000496,0.000496,0.02227,0.02227,0.02227
10,0.022457,0.020128,-0.032688,0.003299,0.003299,0.001945,0.000648,0.000648,0.025464,0.025464,0.025464


In [308]:
devs.index.name = 'day' #  the concat prob destroyed the name of the index
fig = px.scatter(devs, x=devs.index, y=devs['mean'], title='Mean price per day of month')
fig.show()
fig = px.scatter(devs, x=devs.index, y=devs['mean'], error_y=devs.sjtd_dev, title='Mean price per day of month + std dev')
fig.show()
