# Confidence Interval

In [1]:
import pandas as pd
import numpy as np
from scipy.stats import norm

In [2]:
ms = pd.read_csv('./data/msft.csv')
ms.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2015-11-18,53.0,53.98,52.98,53.849998,49.26849,29710000
1,2015-11-19,53.990002,54.66,53.779999,53.939999,49.350838,28149200
2,2015-11-20,54.25,54.299999,53.27,54.189999,49.579559,37147600
3,2015-11-23,54.25,54.459999,53.75,54.189999,49.579559,28235900
4,2015-11-24,53.919998,54.439999,53.580002,54.25,49.634457,24600000


## Estimate the average stock return with 90% Confidence Interval

In [3]:
# we will use log return for average stock return of Microsoft

ms['logReturn'] = np.log(ms['Close'].shift(-1)) - np.log(ms['Close'])

In [4]:
# Lets build 90% confidence interval for log return
sample_size = ms['logReturn'].shape[0]
sample_mean = ms['logReturn'].mean()
sample_std = ms['logReturn'].std(ddof=1) / sample_size**0.5

# left and right quantile
z_left = norm.ppf(0.05)
z_right = norm.ppf(0.95)

# upper and lower bound
interval_left = sample_mean + sample_std * z_left
interval_right = sample_mean + sample_std * z_right

In [5]:
# 90% confidence interval tells you that there will be 90% chance that the average stock return lies between "interval_left"
# and "interval_right".

print('90% confidence interval is ', (interval_left, interval_right))

90% confidence interval is  (0.0002849169750699989, 0.001912095595629406)


** Expected output: ** 90% confidence interval is  (0.0002849169750699989, 0.001912095595629406)