In [1]:
import pandas as pd
import numpy as np
import matplotlib
%matplotlib inline
import scipy
import lxml
from bs4 import BeautifulSoup
import requests
from nsepy import get_history
from datetime import date
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure, show, output_file,output_notebook

In [2]:
#Stock history
sbin = get_history(symbol='SBIN',
                    start=date(2015,1,1), 
                    end=date(2015,12,31))

In [3]:
#index price history
nifty_next50 = get_history(symbol="NIFTY NEXT 50",
                            start=date(2015,1,1),
                            end=date(2015,12,31),
                            index=True)

# 1.Moving Averages

In [28]:
#Derive week number from date for Stock history
sbin['date'] = sbin.index
sbin['week'] = sbin['date'].apply(lambda x: str(x.isocalendar()[1]).zfill(2))
del sbin['date']

#Derive week number from date for index history
nifty_next50['date'] = nifty_next50.index
nifty_next50['week'] = nifty_next50['date'].apply(lambda x: str(x.isocalendar()[1]).zfill(2))
del nifty_next50['date']

In [30]:
#week_wise closing price
sbin_wk_close = sbin.groupby(['week']).agg({"Close": "sum"})
nifty_wk_close = nifty_next50.groupby(['week']).agg({"Close": "sum"})

#moving average function
def moving_avg(data,window):
    weights = np.repeat(1.0,window)/window
    return np.convolve(data,weights,'valid')

In [31]:
windows = [4,16,24,32,52]

#moving averages for stock prices
for window in windows:
    print(moving_avg(data = sbin_wk_close.iloc[:,0],window = window))

[1328.0875 1495.925  1486.2    1459.0375 1360.9    1480.575  1408.15
 1404.9    1453.6125 1341.525  1245.5625 1234.6125 1171.75   1192.4875
 1260.6875 1248.2625 1304.6875 1312.0875 1389.6125 1384.55   1358.2875
 1316.9625 1300.6625 1303.2    1320.2625 1339.025  1340.9    1333.575
 1352.5875 1348.2    1359.95   1346.1375 1283.3875 1234.125  1125.9875
 1055.8    1000.6125 1018.55   1090.6375 1103.25   1168.8625 1163.45
 1097.025  1149.8875 1089.9375 1099.55   1148.9375 1127.2    1112.3125
 1032.35  ]
[1328.5875   1377.628125 1350.15     1336.703125 1322.7375   1331.66875
 1326.003125 1318.08125  1322.084375 1290.765625 1299.13125  1292.65625
 1288.746875 1290.140625 1322.965625 1317.396875 1333.95625  1329.06875
 1347.78125  1341.865625 1328.63125  1309.578125 1281.875    1259.678125
 1239.2125   1234.975    1229.36875  1209.690625 1201.3625   1191.08125
 1168.4      1163.76875  1135.7      1128.91875  1115.646875 1109.034375
 1092.93125  1078.475   ]
[1329.55416667 1356.59375    1348.47

In [33]:
#moving averages for index prices
for window in windows:
    print(moving_avg(data = nifty_wk_close.iloc[:,0],window = window))
    print("\n")

[ 80173.7125  90380.3875  91431.6625  91965.75    87548.9375  97250.2
  92978.9375  93674.1125  98764.8     93529.0125  88177.6     88575.0125
  84077.4125  84217.0875  88665.7375  87436.4875  91458.725   92138.575
  97813.825   98101.6     97538.25    96693.8375  96713.4375  97517.5625
  99227.8625 100955.55   102102.75   102723.2125 104046.025  104729.7375
 105460.25   104577.2875 102510.075  100140.5125  93142.45    87843.675
  82634.5125  83653.0375  88938.4875  89557.275   94884.325   94455.2
  88968.175   93042.1625  87887.9     87833.9125  92413.8375  92477.9375
  92591.1625  87859.5625]


[ 87641.215625  91344.171875  90313.484375  90412.840625  90462.46875
  91783.71875   91909.025     91946.803125  92959.796875  91644.628125
  92842.65      92907.665625  93075.5625    93501.2625    96323.9375
  96444.715625  98067.715625  98629.425    100522.565625 100729.915625
 100830.553125 100629.909375  99354.721875  98165.434375  97104.61875
  97369.709375  97410.984375  96175.3625    9

# 2.Rolling Window

In [36]:
def roll_window(data,size=10):
    return pd.rolling_window(data,size)

In [37]:
print(roll_window(sbin))

           Symbol Series  Prev Close     Open     High      Low     Last  \
Date                                                                       
2015-01-01   SBIN     EQ         NaN      NaN      NaN      NaN      NaN   
2015-01-02   SBIN     EQ         NaN      NaN      NaN      NaN      NaN   
2015-01-05   SBIN     EQ         NaN      NaN      NaN      NaN      NaN   
2015-01-06   SBIN     EQ         NaN      NaN      NaN      NaN      NaN   
2015-01-07   SBIN     EQ         NaN      NaN      NaN      NaN      NaN   
2015-01-08   SBIN     EQ         NaN      NaN      NaN      NaN      NaN   
2015-01-09   SBIN     EQ         NaN      NaN      NaN      NaN      NaN   
2015-01-12   SBIN     EQ         NaN      NaN      NaN      NaN      NaN   
2015-01-13   SBIN     EQ         NaN      NaN      NaN      NaN      NaN   
2015-01-14   SBIN     EQ     307.415  308.105  310.365  304.285  306.910   
2015-01-15   SBIN     EQ     306.700  308.850  311.235  304.615  307.350   
2015-01-16  

	DataFrame.rolling(window=10,center=False,axis=0).mean()
  


In [37]:
print(roll_window(sbin,75))

	DataFrame.rolling(window=75,center=False,axis=0).mean()
  


Unnamed: 0_level_0,Symbol,Series,Prev Close,Open,High,Low,Last,Close,VWAP,Volume,Turnover,Trades,Deliverable Volume,%Deliverble
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2015-01-01,SBIN,EQ,,,,,,,,,,,,
2015-01-02,SBIN,EQ,,,,,,,,,,,,
2015-01-05,SBIN,EQ,,,,,,,,,,,,
2015-01-06,SBIN,EQ,,,,,,,,,,,,
2015-01-07,SBIN,EQ,,,,,,,,,,,,
2015-01-08,SBIN,EQ,,,,,,,,,,,,
2015-01-09,SBIN,EQ,,,,,,,,,,,,
2015-01-12,SBIN,EQ,,,,,,,,,,,,
2015-01-13,SBIN,EQ,,,,,,,,,,,,
2015-01-14,SBIN,EQ,,,,,,,,,,,,


In [38]:
print(roll_window(nifty_next50))

                 Open       High        Low      Close       Volume  \
Date                                                                  
2015-01-01        NaN        NaN        NaN        NaN          NaN   
2015-01-02        NaN        NaN        NaN        NaN          NaN   
2015-01-05        NaN        NaN        NaN        NaN          NaN   
2015-01-06        NaN        NaN        NaN        NaN          NaN   
2015-01-07        NaN        NaN        NaN        NaN          NaN   
2015-01-08        NaN        NaN        NaN        NaN          NaN   
2015-01-09        NaN        NaN        NaN        NaN          NaN   
2015-01-12        NaN        NaN        NaN        NaN          NaN   
2015-01-13        NaN        NaN        NaN        NaN          NaN   
2015-01-14  18682.725  18784.810  18537.155  18647.365   70044607.1   
2015-01-15  18711.540  18813.940  18556.920  18666.540   75749123.8   
2015-01-16  18731.620  18830.120  18569.715  18684.080   76760542.2   
2015-0

	DataFrame.rolling(window=10,center=False,axis=0).mean()
  


In [39]:
print(roll_window(nifty_next50,size = 75))

                    Open          High           Low         Close  \
Date                                                                 
2015-01-01           NaN           NaN           NaN           NaN   
2015-01-02           NaN           NaN           NaN           NaN   
2015-01-05           NaN           NaN           NaN           NaN   
2015-01-06           NaN           NaN           NaN           NaN   
2015-01-07           NaN           NaN           NaN           NaN   
2015-01-08           NaN           NaN           NaN           NaN   
2015-01-09           NaN           NaN           NaN           NaN   
2015-01-12           NaN           NaN           NaN           NaN   
2015-01-13           NaN           NaN           NaN           NaN   
2015-01-14           NaN           NaN           NaN           NaN   
2015-01-15           NaN           NaN           NaN           NaN   
2015-01-16           NaN           NaN           NaN           NaN   
2015-01-19          

	DataFrame.rolling(window=75,center=False,axis=0).mean()
  


# 3.1 Volume shocks

In [42]:
#starting with index 1 instead of zero of volume column for volume shocks time series

## 1 = Volume shock, 0 = no volume shock
# 1 = +ve direction of shock , 0 = -ve direction of shock

def volume_shocker(input):
    for i in range(1,len(input)):
        if input['Volume'][i] > input['Volume'][i-1] *1.10:
            input['Vol_shock'][i] = 1
            input['Vol_shock_direction'][i] = 1 
        
        elif input['Volume'][i] <= input['Volume'][i-1] *0.90:
            input['Vol_shock'][i] = 1
            input['Vol_shock_direction'][i] = 0
        
        else:
            input['Vol_shock'][i] = 0

# 3.2 Price shocks 

In [43]:
#starting with index 1 instead of zero of close price column for price shocks time series because previous day value not known

# 1 = price shock, 0 = no price shock
# 1 = +ve direction of shock , 0 = -ve direction of shock

def price_shocker(input):
    for i in range(1,len(input)):
        if input['Close'][i] >= input['Close'][i-1] *1.02:
            input['Close_shock'][i] = 1
            input['Close_shock_direction'][i] = 1
        
        elif input['Close'][i] <= input['Close'][i-1] *0.98:
            input['Close_shock'][i] = 1
            input['Close_shock_direction'][i] = 0
        
        else:
            input['Close_shock'][i] = 0

# 3.3 price shock vs volume shock 

In [44]:
#starting with index 1 instead of zero of close price column for price shocks time series
def price_volume_shock_compare(input):
    for i in range(1,len(input)):
        if (input['Close_shock'][i] == 1) and (input['Vol_shock'][i] == 0):
            input['price_w/o_vol_shock'][i] = 1 #if only price shock ,then 'price_w/o_vol_shock' value = 1 else 0
        else:
            input['price_w/o_vol_shock'][i] = 0

In [45]:
sbin['Vol_shock'] = np.nan
sbin['Vol_shock_direction'] = np.nan
sbin['Close_shock'] = np.nan
sbin['Close_shock_direction'] = np.nan
sbin['price_w/o_vol_shock'] = np.nan
volume_shocker(sbin)
price_shocker(sbin)
price_volume_shock_compare(sbin)
#index 0 value will be nan as no info on previos day's volume.

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  del sys.path[0]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/s

In [46]:
nifty_next50['Vol_shock'] = np.nan
nifty_next50['Vol_shock_direction'] = np.nan
nifty_next50['Close_shock'] = np.nan
nifty_next50['Close_shock_direction'] = np.nan
nifty_next50['price_w/o_vol_shock'] = np.nan

volume_shocker(nifty_next50)
price_shocker(nifty_next50)
price_volume_shock_compare(nifty_next50)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  del sys.path[0]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-doc

In [47]:
sbin.head(5)

Unnamed: 0_level_0,Symbol,Series,Prev Close,Open,High,Low,Last,Close,VWAP,Volume,Turnover,Trades,Deliverable Volume,%Deliverble,week,Vol_shock,Vol_shock_direction,Close_shock,Close_shock_direction,price_w/o_vol_shock
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2015-01-01,SBIN,EQ,311.85,312.45,315.0,310.7,314.0,314.0,313.67,6138488,192548900000000.0,58688,1877677,0.3059,1,,,,,
2015-01-02,SBIN,EQ,314.0,314.35,318.3,314.35,315.6,315.25,316.8,9935094,314738900000000.0,79553,4221685,0.4249,1,1.0,1.0,0.0,,0.0
2015-01-05,SBIN,EQ,315.25,316.25,316.8,312.1,312.8,312.75,313.84,9136716,286743200000000.0,88236,3845173,0.4208,2,0.0,,0.0,,0.0
2015-01-06,SBIN,EQ,312.75,310.0,311.1,298.7,299.9,299.9,305.14,15329257,467760100000000.0,169268,7424847,0.4844,2,1.0,1.0,1.0,0.0,0.0
2015-01-07,SBIN,EQ,299.9,300.0,302.55,295.15,301.4,300.15,299.95,15046745,451324300000000.0,147185,5631400,0.3743,2,0.0,,0.0,,0.0


In [48]:
nifty_next50.head(5)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Turnover,week,Vol_shock,Vol_shock_direction,Close_shock,Close_shock_direction,price_w/o_vol_shock
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2015-01-01,18655.65,18752.65,18638.0,18734.85,42634653,11707500000.0,1,,,,,
2015-01-02,18748.3,18964.3,18748.3,18883.15,65127256,23640600000.0,1,1.0,1.0,0.0,,0.0
2015-01-05,18902.4,19053.0,18822.35,18852.1,58256767,20939000000.0,2,1.0,0.0,0.0,,0.0
2015-01-06,18752.9,18752.9,18263.6,18312.35,81096398,27642200000.0,2,1.0,1.0,1.0,0.0,0.0
2015-01-07,18323.4,18424.0,18133.7,18308.85,75084759,27795500000.0,2,0.0,,0.0,,0.0


# Part 2 -Data Visualization

In [52]:
#time series of stocks closing prices
p = figure(x_axis_type="datetime", plot_width=800, plot_height=350)
p.line('Date', 'Close', source=sbin)
output_notebook()
show(p)

In [50]:
#time series of nifty closing prices
p = figure(x_axis_type="datetime", plot_width=800, plot_height=350)
p.line('Date', 'Close', source=nifty_next50,color ='blue')
#output_file("indices_close_price.html")#to view as html
output_notebook()
show(p)