In [3]:
## To import necessary python libraries
from nsepy import get_history
import pandas_datareader.data as web
import datetime
from datetime import date
import pandas as pd
import matplotlib.pyplot as plt

import seaborn as sns
import cufflinks as cf
import plotly.io as pio 

cf.go_offline()
cf.set_config_file(world_readable=True, theme='pearl')
pio.renderers.default = "notebook" # should change by looking into pio.renderers

## To display multiple output from one cell without overwriting the first
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [4]:
## NSE data reading using nsepy get_history module
# Function get_history fetches the price history of stocks/indices/derivatives and returns a pandas dataframe
# symbol = ['NIFTY 50', 'NIFTY AUTO', 'NIFTY IT', 'NIFTY BANK', 'NIFTY PHARMA','INDIA VIX', ]
# end = datetime.datetime.now()
# start = datetime.datetime(end.year-22,end.month-7,end.day-27)

# data1={}
# for s in (symbol):  
#     data=get_history(symbol=s,start=start, end=end, index=True) ## to read stocks remove the index=True command.
#     data.to_csv(f'{s}.csv', sep=',')
#     data={}

In [5]:
# creates a dataframe with different stocks data
# end = datetime.datetime.now()
# start = datetime.datetime(end.year-22,end.month-7,end.day-27)
# data = {}
# symbol = ['^N225', '^HSI']
# for s in (symbol):  
#     data = web.DataReader(s,'yahoo', start = start, end = end)
#     data.to_csv(f'{s}.csv', sep=',')
#     data ={}


In [6]:
# To create seperate dataframes with stock data
NIFTY_50_df = pd.read_csv('NIFTY 50.csv', usecols=['Date', 'Close'], parse_dates=['Date'], index_col=['Date'])
NIFTY_AUTO_df = pd.read_csv('NIFTY AUTO.csv', usecols=['Date', 'Close'], parse_dates=['Date'], index_col=['Date'])
NIFTY_IT_df = pd.read_csv('NIFTY IT.csv', usecols=['Date', 'Close'], parse_dates=['Date'], index_col=['Date'])
NIFTY_BANK_df = pd.read_csv('NIFTY BANK.csv', usecols=['Date', 'Close'], parse_dates=['Date'], index_col=['Date'])
NIFTY_PHARMA_df = pd.read_csv('NIFTY PHARMA.csv', usecols=['Date', 'Close'], parse_dates=['Date'], index_col=['Date'])
INDIA_VIX_df = pd.read_csv('INDIA VIX.csv', usecols=['Date', 'Close'], parse_dates=['Date'], index_col=['Date'])
N225_df = pd.read_csv('N225.csv', usecols=['Date', 'Close'], parse_dates=['Date'], index_col=['Date'])
HSI_df = pd.read_csv('HSI.csv', usecols=['Date', 'Close'], parse_dates=['Date'], index_col=['Date'])

NIFTY_50_df.rename(columns={'Close': 'NIFTY50'}, inplace=True)
NIFTY_IT_df.rename(columns={'Close': 'NIFTYIT'}, inplace=True)
NIFTY_AUTO_df.rename(columns={'Close': 'NIFTYAUTO'}, inplace=True)
NIFTY_BANK_df.rename(columns={'Close': 'NIFTYBANK'}, inplace=True)
NIFTY_PHARMA_df.rename(columns={'Close': 'NIFTYPHARMA'}, inplace=True)
INDIA_VIX_df.rename(columns={'Close': 'INDIAVIX'}, inplace=True)
N225_df.rename(columns={'Close': 'N225'}, inplace=True)
HSI_df.rename(columns={'Close': 'HSI'}, inplace=True)

# NIFTY_50_df.head(5)
# NIFTY_IT_df.head(5)
# NIFTY_AUTO_df.head(5)
# NIFTY_BANK_df.head(5)
# NIFTY_PHARMA_df.head(5)
# INDIA_VIX_df.head(5)
# N225_df.head(5)
# HSI_df.head(5)

FileNotFoundError: [Errno 2] No such file or directory: 'NIFTY 50.csv'

In [None]:
## Combining all index data of CLOSE columns into one single dataframe for ease of analysis
df  = NIFTY_50_df.merge(NIFTY_IT_df,on = 'Date',how ='outer').merge(NIFTY_AUTO_df, on = 'Date',how ='outer').merge(NIFTY_PHARMA_df, on = 'Date',how ='outer').merge(NIFTY_BANK_df, on = 'Date',how ='outer').merge(INDIA_VIX_df, on = 'Date',how ='outer').merge(N225_df, on = 'Date',how ='outer').merge(HSI_df, on = 'Date',how ='outer')
df.head(2)

In [None]:
## Arrange the data with increasing order of Date
df = df.sort_values(by='Date', ascending=True)
df.head(10)

In [None]:
colors = ('slategrey','rosybrown','firebrick','coral','tan','olive','darkseagreen','steelblue','plum','palevioletred')
df.iplot(kind="line",
        y=["NIFTY50", "NIFTYIT", "NIFTYAUTO", "NIFTYPHARMA", "NIFTYBANK","N225","HSI"],
        secondary_y = "INDIAVIX",
        color=colors,
        width=2,
        yTitle="Close", 
        subplots=True,
        title="Index closing values 2000 TO 2022")

In [None]:
colors = ('slategrey','rosybrown','firebrick','coral','tan','olive','darkseagreen','steelblue','plum','palevioletred')
df.iplot(kind="line",
        y=["NIFTY50", "NIFTYIT", "NIFTYAUTO", "NIFTYPHARMA", "NIFTYBANK","N225","HSI"],
        secondary_y = "INDIAVIX",
        color=colors,
        width=2,
        yTitle="Close", 
        title="Index closing values 2000 TO 2022 All in One plot" )

In [None]:
##This table shows correlation of different indexes. 
##The higher the number the better the correlation
from scipy.stats import pearsonr

## from https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.corr.html
##DataFrame.corr(method='pearson', min_periods=1)[source]
##Compute pairwise correlation of columns, excluding NA/null values.
#dfn = dfn.dropna(axis=0) ## this need not be done since for each pair of columns NA values are dropped in the .corr funtions
dfp = df.pct_change()*100 
corr = dfp.corr()
corr.style.background_gradient(cmap='coolwarm')


In [None]:
## The pearson coefficient is necessary to validate the correlation numbers 
## in the correlation table
## The pearson coefficient needs to be lower than 0.05.

def calculate_pvalues(df):
    df = df.dropna()._get_numeric_data()
    dfcols = pd.DataFrame(columns=df.columns)
    pvalues = dfcols.transpose().join(dfcols, how='outer')
    for r in df.columns:
        for c in df.columns:
            pvalues[r][c] = round(pearsonr(df[r], df[c])[1], 4)
    return pvalues

calculate_pvalues(dfp)

In [None]:
## plots of nifty50 with other indexes.

fig, axes = plt.subplots(sharey=True, nrows=2, ncols=2)

df.NIFTY50.plot(ax=axes[0,0],color="grey",label="NIFTY50", legend=True,ylim=(0, 50000),figsize=(20,20));
df.NIFTYIT.plot(ax=axes[0,0],color="purple",label="NIFTYIT", legend=True,figsize=(20,10));

df.NIFTY50.plot(ax=axes[0,1],color="grey",label="NIFTY50", legend=True);
df.NIFTYBANK.plot(ax=axes[0,1],color="dodgerblue",label="NIFTYBANK", legend=True);

df.NIFTY50.plot(ax=axes[1,0],color="grey",label="NIFTY50", legend=True);
df.NIFTYAUTO.plot(ax=axes[1,0],color="green",label="NIFTYAUTO", legend=True);

df.NIFTY50.plot(ax=axes[1,1],color="grey",label="NIFTY50", legend=True);
df.INDIAVIX.plot(ax=axes[1,1],color="tomato",secondary_y=True,label="INDIAVIX", legend=True);


In [None]:
# The indexes are treated as seperate dataframes as they have different sizes.
#(i.e many indexes are missing lot of data)
# finding the r and p values by selecting the columns in the dataframe as a cross verification only.
#=========================================
#====== run this for verification only====
#=========================================
df1 = df[['NIFTY50', 'NIFTYIT']]
df1p = df1.pct_change()*100
df1p = df1p.dropna(axis=0)
#df1p.info()

df2 = df[['NIFTY50', 'NIFTYAUTO']]
df2p = df2.pct_change()*100
df2p = df2p.dropna(axis=0)
#df2p.info()

df3 = df[['NIFTY50', 'NIFTYPHARMA']]
df3p = df3.pct_change()*100
df3p=df3p.dropna(axis=0)
#df3p.info()

df4 = df[['NIFTY50', 'NIFTYBANK']]
df4p = df4.pct_change()*100
df4p=df4p.dropna(axis=0)
#df4p.info()

df5 = df[['NIFTY50', 'INDIAVIX']]
df5p = df5.pct_change()*100
df5p=df5p.dropna(axis=0)
#df5p.info()

df6 = df[['NIFTY50', 'N225']]
df6p = df6.pct_change()*100
df6p=df6p.dropna(axis=0)
#df6p.info()

df7 = df[['NIFTY50', 'HSI']]
df7p = df7.pct_change()*100
df7p = df7p.dropna(axis=0)
#df7p.info()

res1 = pearsonr(df1p.NIFTY50, df1p.NIFTYIT)
res1
res2 = pearsonr(df2p.NIFTY50, df2p.NIFTYAUTO)
res2
res3 = pearsonr(df3p.NIFTY50, df3p.NIFTYPHARMA)
res3
res4 = pearsonr(df4p.NIFTY50, df4p.NIFTYBANK)
res4
res5 = pearsonr(df5p.NIFTY50, df5p.INDIAVIX)
res5
res6 = pearsonr(df6p.NIFTY50, df6p.N225)
res6
res7 = pearsonr(df7p.NIFTY50, df7p.HSI)
res7

In [None]:
import patchworklib as pw 
## patchworklib helps in creating jointplots in the form of subplots using 
#(((g0|g1)["g0"]/g3)["g3"]|g2).savefig("seaborn_subplots.png")
sns.set_theme()
pw.overwrite_axisgrid() 
df1p=df1.pct_change()*100
df2p=df2.pct_change()*100
df3p=df3.pct_change()*100
df4p=df4.pct_change()*100
df5p=df5.pct_change()*100
df6p=df6.pct_change()*100
df7p=df7.pct_change()*100

In [None]:
# ## The below plot is from the original individual dataframes to check if data merge and other 
# ##operations are done well without error

# #stock_list = ['NIFTY50', 'NIFTYAUTO', 'NIFTYIT', 'NIFTYBANK', 'NIFTYPHARMA','INDIAVIX','N225', 'HSI']
# colors = ('slategrey','rosybrown','firebrick','coral','tan','olive','darkseagreen','steelblue','plum','palevioletred')

# fig, axes = plt.subplots(nrows=4, ncols=2, figsize=(20, 10))

# NIFTY_50_df['NIFTY50'].plot(ax=axes[0,0]).title.set_text('NIFTY50')
# NIFTY_IT_df['NIFTYIT'].plot(ax=axes[0,1]).title.set_text('NIFTYIT')
# NIFTY_AUTO_df['NIFTYAUTO'].plot(ax=axes[1,0]).title.set_text('NIFTYAUTO')
# NIFTY_PHARMA_df['NIFTYPHARMA'].plot(ax=axes[1,1]).title.set_text('NIFTYPHARMA')
# NIFTY_BANK_df['NIFTYBANK'].plot(ax=axes[2,0]).title.set_text('NIFTYBANK')
# INDIA_VIX_df['INDIAVIX'].plot(ax=axes[2,1]).title.set_text('INDIAVIX')
# N225_df['N225'].plot(ax=axes[3,0]).title.set_text('N225')
# HSI_df['HSI'].plot(ax=axes[3,1]).title.set_text('HSI')

# plt.show();

In [None]:
# # ## plotting pairplot  
# # ## Note that the correlation analysis is performed on the daily percentage change(daily returns) 
# # ## of the stock price and not on the stock price.
# import seaborn as sns
# dfp = df.pct_change()*100 #for pairplot always calculate pct_change

# #dfp.dropna(inplace = True, axis = 0)# not necessary pairplot does this automatically
# #dfp.info()
# #sns.set(style = 'ticks', font_scale = 1.25)
# #sns.pairplot(dfp)


In [None]:
## scatter plots for visual display of correlation between nifty and other indexes.
g0 = sns.jointplot(x='NIFTY50', y='NIFTYIT', data=df1p, kind = 'scatter')
g0 = pw.load_seaborngrid(g0, label="g0")
g1 = sns.jointplot(x='NIFTY50', y='NIFTYAUTO', data=df2p, kind = 'scatter')
g1 = pw.load_seaborngrid(g1, label="g1")
g2 = sns.jointplot(x='NIFTY50', y='NIFTYPHARMA', data=df3p, kind = 'scatter')
g2 = pw.load_seaborngrid(g2, label="g2")
g3 = sns.jointplot(x='NIFTY50', y='NIFTYBANK', data=df4p, kind = 'scatter')
g3 = pw.load_seaborngrid(g3, label="g3")
#(((g0|g1)["g0"]/g3)["g3"]|g2).savefig("seaborn_subplots.png")
(g0|g1|g3).savefig("nifty50correlationimage1.png") ## Check for the image in the directory

In [None]:
g4 = sns.jointplot(x='NIFTY50', y='INDIAVIX', data=df5p, kind = 'scatter')
g4 = pw.load_seaborngrid(g4, label="g4")
g5 = sns.jointplot(x='NIFTY50', y='N225', data=df6p, kind = 'scatter')
g5 = pw.load_seaborngrid(g5, label="g5")
g6 = sns.jointplot(x='NIFTY50', y='HSI', data=df7p, kind = 'scatter')
g6 = pw.load_seaborngrid(g6, label="g6")
(g4|g5|g6).savefig("nifty50correlationimage2.png")

In [None]:
# #============================================
# ###   ===Do not use this method ======
# #============================================

# ## in this code NA values are dropped overall for the dataframe and not for columns in pairwise.
# ## hence pearsonr r values are slightly different. It is adviced to drop NA values for columns pairwise unless
# ## there are not too many NA values.

# import numpy as np
# import pandas as pd
# from scipy.stats import pearsonr

# # df1 = pd.DataFrame(np.random.rand(10, 5), columns=['Col1', 'Col2', 'Col3', 'Col4', 'Col5'])
# # df2 = pd.DataFrame(np.random.rand(10, 5), columns=['Col1', 'Col2', 'Col3', 'Col4', 'Col5'])

# dfn = df.pct_change()
# df1=dfn.dropna(axis=0);
# df2=dfn.dropna(axis=0);

# coeffmat = np.zeros((df1.shape[1], df2.shape[1]))
# pvalmat = np.zeros((df1.shape[1], df2.shape[1]))

# for i in range(df1.shape[1]):    
#     for j in range(df2.shape[1]):     
#         corrtest = pearsonr(df1[df1.columns[i]], df2[df2.columns[j]])  

#         coeffmat[i,j] = corrtest[0]
#         pvalmat[i,j] = corrtest[1]

# dfcoeff = pd.DataFrame(coeffmat, columns=df2.columns, index=df1.columns)
# print(dfcoeff)
# corr = dfcoeff
# corr.style.background_gradient(cmap='coolwarm')

# dfpvals = pd.DataFrame(pvalmat, columns=df2.columns, index=df1.columns)
# print(dfpvals)
# corr = dfpvals
# corr.style.background_gradient(cmap='coolwarm')