In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import scipy.stats as st
import quandl

In [2]:
quandl.ApiConfig.api_key = '59jzyL6L1Q5X8VqkGpTx'

In [3]:
aapl_table = quandl.get('WIKI/AAPL')

In [4]:
aapl_table.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Ex-Dividend,Split Ratio,Adj. Open,Adj. High,Adj. Low,Adj. Close,Adj. Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2018-03-21,175.04,175.09,171.26,171.27,35247358.0,0.0,1.0,175.04,175.09,171.26,171.27,35247358.0
2018-03-22,170.0,172.68,168.6,168.845,41051076.0,0.0,1.0,170.0,172.68,168.6,168.845,41051076.0
2018-03-23,168.39,169.92,164.94,164.94,40248954.0,0.0,1.0,168.39,169.92,164.94,164.94,40248954.0
2018-03-26,168.07,173.1,166.44,172.77,36272617.0,0.0,1.0,168.07,173.1,166.44,172.77,36272617.0
2018-03-27,173.68,175.15,166.92,168.34,38962839.0,0.0,1.0,173.68,175.15,166.92,168.34,38962839.0


In [5]:
aapl_total = aapl_table[['Open', 'Close']]

In [6]:
aapl_total.tail()

Unnamed: 0_level_0,Open,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-03-21,175.04,171.27
2018-03-22,170.0,168.845
2018-03-23,168.39,164.94
2018-03-26,168.07,172.77
2018-03-27,173.68,168.34


In [7]:
aapl_log_return = np.log(aapl_total['Close']).diff().dropna()

In [8]:
aapl_log_return.tail()

Date
2018-03-21   -0.022915
2018-03-22   -0.014260
2018-03-23   -0.023399
2018-03-26    0.046379
2018-03-27   -0.025975
Name: Close, dtype: float64

In [9]:
print('Population mean:', np.mean(aapl_log_return))

Population mean: 0.00018803574598787862


In [10]:
print('Population standard deviation:', np.std(aapl_log_return))

Population standard deviation: 0.03737468260774587


In [11]:
print('10 days sample returns:', np.mean(aapl_log_return.tail(10)))

10 days sample returns: -0.006680442654968121


In [12]:
print('10 days sample standard deviation:', np.std(aapl_log_return.tail(10)))

10 days sample standard deviation: 0.019948507891523214


In [13]:
print('1000 days sample returns:', np.mean(aapl_log_return.tail(1000)))

1000 days sample returns: -0.001163340140818109


In [14]:
print('1000 days sample standard deviation:', np.std(aapl_log_return.tail(1000)))

1000 days sample standard deviation: 0.06269581584713586


In [15]:
bottom_1 = np.mean(aapl_log_return.tail(10))-1.96*np.std(aapl_log_return.tail(10))/(np.sqrt(len((aapl_log_return.tail(10)))))
upper_1 = np.mean(aapl_log_return.tail(10))+1.96*np.std(aapl_log_return.tail(10))/(np.sqrt(len((aapl_log_return.tail(10)))))

bottom_2 = np.mean(aapl_log_return.tail(1000))-1.96*np.std(aapl_log_return.tail(1000))/(np.sqrt(len((aapl_log_return.tail(1000)))))
upper_2 = np.mean(aapl_log_return.tail(1000))+1.96*np.std(aapl_log_return.tail(1000))/(np.sqrt(len((aapl_log_return.tail(1000)))))


In [16]:
print('10 days 95% confidence inverval:', (bottom_1,upper_1))

10 days 95% confidence inverval: (-0.01904465594334319, 0.005683770633406946)


In [17]:
print('1000 days 95% confidence inverval:', (bottom_2,upper_2))

1000 days 95% confidence inverval: (-0.005049267066470906, 0.0027225867848346876)


In [18]:
# Hypothesis testing

In [19]:
mean_1000 = np.mean(aapl_log_return.tail(1000))
std_1000 = np.std(aapl_log_return.tail(1000))
mean_10 = np.mean(aapl_log_return.tail(10))
std_10 = np.std(aapl_log_return.tail(10))

s = pd.Series([mean_10, std_10, mean_1000, std_1000], index=['mean_10', 'std_10','mean_1000','std_1000'])
print(s)

mean_10     -0.006680
std_10       0.019949
mean_1000   -0.001163
std_1000     0.062696
dtype: float64


In [20]:
# 90% confidence interval

bottom = 0 - 1.64 * std_1000/np.sqrt(1000)
upper = 0 + 1.64 * std_1000/np.sqrt(1000)

print(bottom, upper)

-0.0032514898765666255 0.0032514898765666255


In [21]:
# 95% confidence interval

bottom = 0 - 1.96 * std_1000/np.sqrt(1000)
upper = 0 + 1.96 * std_1000/np.sqrt(1000)

print(bottom, upper)

-0.0038859269256527967 0.0038859269256527967


In [22]:
# Z-score

print(np.sqrt(1000)*(mean_1000 - 0)/std_1000)

-0.5867703432484006


In [23]:
print(1 - st.norm.cdf(1.9488))

0.02565965688799665


In [24]:
mean_1200 = np.mean(aapl_log_return.tail(1200))
std_1200 = np.std(aapl_log_return.tail(1200))
z_score = np.sqrt(1200)*(mean_1200 - 0)/std_1200

print('z-score = ', z_score)

z-score =  -0.4723030820553395


In [25]:
p_value = (1 - st.norm.cdf(z_score))

print('p_value = ', p_value)

p_value =  0.6816447646119563
