In [1]:
!pip install statsmodels



- Run the below to import the data
- The data contains All stocks in the S&P 500 index, and their historical prices for the year 2011

In [2]:
import pandas as pd
dataFrame = pd.read_csv('dow_jones_index.data',parse_dates=["date"], index_col="date")
dataFrame.head()

Unnamed: 0_level_0,quarter,stock,open,high,low,close,volume,percent_change_price,percent_change_volume_over_last_wk,previous_weeks_volume,next_weeks_open,next_weeks_close,percent_change_next_weeks_price,days_to_next_dividend,percent_return_next_dividend
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2011-01-07,1,AA,$15.82,$16.72,$15.78,$16.42,239655616,3.79267,,,$16.71,$15.97,-4.42849,26,0.182704
2011-01-14,1,AA,$16.71,$16.71,$15.64,$15.97,242963398,-4.42849,1.380223,239655616.0,$16.19,$15.79,-2.47066,19,0.187852
2011-01-21,1,AA,$16.19,$16.38,$15.60,$15.79,138428495,-2.47066,-43.024959,242963398.0,$15.87,$16.13,1.63831,12,0.189994
2011-01-28,1,AA,$15.87,$16.63,$15.82,$16.13,151379173,1.63831,9.3555,138428495.0,$16.18,$17.14,5.93325,5,0.185989
2011-02-04,1,AA,$16.18,$17.39,$16.18,$17.14,154387761,5.93325,1.987452,151379173.0,$17.33,$17.37,0.230814,97,0.175029


Run the below cell to filter the closing price related to stock 'AA'

In [3]:
closeTS = dataFrame[(dataFrame.stock == 'AA')].close.str.replace('$',' ').astype(float)

- Using the data filtered in the above step list all the closing price from **jan to march** <br>
- assign the resulting series to variable 'close_AA'

In [4]:
close_AA = closeTS.loc['2011-01-01':'2011-04-01']
print (close_AA)

date
2011-01-07    16.42
2011-01-14    15.97
2011-01-21    15.79
2011-01-28    16.13
2011-02-04    17.14
2011-02-11    17.37
2011-02-18    17.28
2011-02-25    16.68
2011-03-04    16.58
2011-03-11    16.03
2011-03-18    16.11
2011-03-25    17.09
2011-04-01    17.47
Name: close, dtype: float64


- Upsample the data filtered in the above step month wise and fill the max value of closing price for each month
- return the samples of down sampled data to variable 'upsample'

In [5]:
upsample = closeTS.resample('M').max()
print(upsample)

date
2011-01-31    16.42
2011-02-28    17.37
2011-03-31    17.09
2011-04-30    17.92
2011-05-31    17.15
2011-06-30    15.92
Freq: M, Name: close, dtype: float64


- downsample the data filtered in the above step day wise and perform interpolation to forward fill the first two 'Nan' values.
- return the first 10 samples of downsampled data to variable 'downsample'

In [6]:
downsample = closeTS.resample('D').interpolate(method='pad', limit=4, limit_direction='forward')[0:10]
print (downsample)

date
2011-01-07    16.42
2011-01-08    16.42
2011-01-09    16.42
2011-01-10    16.42
2011-01-11    16.42
2011-01-12      NaN
2011-01-13      NaN
2011-01-14    15.97
2011-01-15    15.97
2011-01-16    15.97
Freq: D, Name: close, dtype: float64


Run the below cell to filter the opening price related to stock 'WMT'.The sample is returned to variable 'open_WMT_Ts'

In [7]:
open_WMT_Ts = dataFrame[(dataFrame.stock == 'WMT')].open.str.replace('$',' ').astype(float)

- from statsmodels import adfuller method

In [8]:
from statsmodels.tsa.stattools import adfuller

- perform stationarity check on WMT opening price using adfuller method and return the result to variable  'tsResult'
- Find the value of ADF Statistic from the above test result 

In [9]:
tsResult = adfuller(open_WMT_Ts)
print('ADF Statistic: %f' % tsResult[0])
print('p-value: %f' % tsResult[1])
for key, value in tsResult[4].items():
    print('\t%s: %.3f' % (key, value))

ADF Statistic: -3.892828
p-value: 0.002088
	1%: -3.924
	5%: -3.068
	10%: -2.674


Run the below cell to filter the opening price related to stock 'XOM'.The sample is returned to variable 'close_XOM_Ts'

In [10]:
close_XOM_Ts = dataFrame[(dataFrame.stock == 'XOM')].close.str.replace('$',' ').astype(float)

- perform stationarity check on XOM closing price using adfuller method and return the result to variable  'tsResult'
- Find the value of ADF Statistic from the above test result

In [11]:
tsResult = adfuller(close_XOM_Ts)
print('ADF Statistic: %f' % tsResult[0])
print('p-value: %f' % tsResult[1])
for key, value in tsResult[4].items():
    print('\t%s: %.3f' % (key, value))

ADF Statistic: 0.913251
p-value: 0.993261
	1%: -3.964
	5%: -3.085
	10%: -2.682
