# Basic Imports

In [1]:
import math
import numpy as np
import pandas as pd
from pylab import plt
import plotly.graph_objs as go
import plotly.express as px
from plotly.offline import iplot
import cufflinks
cufflinks.set_config_file(offline=True)
pd.set_option('mode.chained_assignment', None)
%config InlineBackend.figure_format = 'svg'

- NIFTY 50 Represent the first 50 companies from the NIFTY 100  

In [2]:
csv = './data/NIFTY 50.csv'
nifty50 = pd.read_csv(csv, parse_dates=True)
nifty50.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Turnover,P/E,P/B,Div Yield
0,2000-01-03,1482.15,1592.9,1482.15,1592.2,25358322,8841500000.0,25.91,4.63,0.95
1,2000-01-04,1594.4,1641.95,1594.4,1638.7,38787872,19736900000.0,26.67,4.76,0.92
2,2000-01-05,1634.55,1635.5,1555.05,1595.8,62153431,30847900000.0,25.97,4.64,0.95
3,2000-01-06,1595.8,1639.0,1595.8,1617.6,51272875,25311800000.0,26.32,4.7,0.94
4,2000-01-07,1616.6,1628.25,1597.2,1613.3,54315945,19146300000.0,26.25,4.69,0.94


In [3]:
nifty50.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5353 entries, 0 to 5352
Data columns (total 10 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Date       5353 non-null   object 
 1   Open       5353 non-null   float64
 2   High       5353 non-null   float64
 3   Low        5353 non-null   float64
 4   Close      5353 non-null   float64
 5   Volume     5353 non-null   int64  
 6   Turnover   5353 non-null   float64
 7   P/E        5353 non-null   float64
 8   P/B        5353 non-null   float64
 9   Div Yield  5353 non-null   float64
dtypes: float64(8), int64(1), object(1)
memory usage: 418.3+ KB


#### Let’s take a look at its various columns for further analysis.

- **The Open and Close** columns indicate the opening and closing price of the stocks on a particular day.
- **The High and Low** columns provide the highest and the lowest price for the stock on a particular day, respectively.
- **The Volume** column tells us the total volume of stocks traded on a particular day.
- **The Turnover** column refers to the total value of stocks traded during a specific period of time. The time period may be    annually, quarterly, monthly or daily
-**P/E** also called as the price-earnings ratio relates a company's share price to its earnings per share.
-**P/B** also called as Price-To-Book ratio measures the market's valuation of a company relative to its book value.
-**Div Yield** or the dividend yield is the amount of money a company pays shareholders (over the course of a year) for owning a share of its stock divided by its current stock price—displayed as a percentage.

# Missing Values

In [4]:
nifty50.isnull().sum()

Date         0
Open         0
High         0
Low          0
Close        0
Volume       0
Turnover     0
P/E          0
P/B          0
Div Yield    0
dtype: int64

- No missing values


# Market performance 2019 onwards

In [5]:
nifty50_2019 = nifty50[nifty50['Date'] >= '2019-01-01']
nifty50_2019.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Turnover,P/E,P/B,Div Yield
4729,2019-01-01,10881.7,10923.6,10807.1,10910.1,159404542,86882600000.0,26.28,3.4,1.24
4730,2019-01-02,10868.85,10895.35,10735.05,10792.5,309665939,153522500000.0,26.0,3.36,1.25
4731,2019-01-03,10796.8,10814.05,10661.25,10672.25,286241745,150304500000.0,25.71,3.32,1.26
4732,2019-01-04,10699.7,10741.05,10628.65,10727.35,296596655,145167400000.0,25.84,3.34,1.26
4733,2019-01-07,10804.85,10835.95,10750.15,10771.8,269371080,127312900000.0,25.95,3.36,1.25


In [6]:
df = nifty50_2019
fig = go.Figure(data=[
    go.Candlestick(
    x=df['Date'],
        open=df['Open'],
        high=df['High'],
        low=df['Low'],
        close=df['Close']
    )])
fig.show()

 # Major single day falls - 2019 onwards

In [7]:
fig = px.line(nifty50_2019, x='Date', y='Close', title='Time Series with Range Slider and Selectors')

fig.update_xaxes(
    rangeslider_visible=False,
    rangeselector=dict(
        buttons=list([
            dict(count=1, label="1m", step="month", stepmode="backward"),
            dict(count=6, label="6m", step="month", stepmode="backward"),
            dict(count=1, label="YTD", step="year", stepmode="todate"),
            dict(count=1, label="1y", step="year", stepmode="backward"),
            dict(step="all")
        ])
    )
)

fig.update_layout(plot_bgcolor='rgb(250, 242, 242)',
    title='NIFTY_50 : Major single day falls -2019 onwards',
    yaxis_title='NIFTY 50 Stock',
    shapes = [dict(x0='2020-03-23', x1='2020-03-23', y0=0, y1=1, xref='x', yref='paper', line_width=2,opacity=0.3,line_color='red',editable=False),
             dict(x0='2019-09-3', x1='2019-09-3', y0=0, y1=1, xref='x', yref='paper',line_width=3,opacity=0.3,line_color='red'),
             dict(x0='2020-02-1', x1='2020-02-1', y0=0, y1=1, xref='x', yref='paper',line_width=3,opacity=0.3,line_color='red'),
             dict(x0='2020-03-12', x1='2020-03-12', y0=0, y1=1, xref='x', yref='paper',line_width=3,opacity=0.3,line_color='red')],
    annotations=[dict(x='2020-03-23', y=0.5, xref='x', yref='paper',
                    showarrow=False, xanchor='left', text='Lockdown Phase-1 announced'),
                dict(x='2019-09-3', y=0.05, xref='x', yref='paper',
                    showarrow=False, xanchor='left', text='Multiple PSU Bank Merger Announcements'),
                dict(x='2020-02-1', y=0.5, xref='x', yref='paper',
                    showarrow=False, xanchor='right', text='Union Budget,coronavirus pandemic'),
                dict(x='2020-03-12', y=0.3, xref='x', yref='paper',
                    showarrow=False, xanchor='right', text='Coronavirus declared Pandemic by WHO')]
)
fig.show()

- The effect can be seen majorly when the first phase of the lockdown started

 # Major single day gains - 2019 onwards

In [8]:
fig = px.line(nifty50_2019, x='Date', y='Close', title='Time Series with Range Slider and Selectors')

fig.update_xaxes(
    rangeslider_visible=False,
    rangeselector=dict(
        buttons=list([
            dict(count=1, label="1m", step="month", stepmode="backward"),
            dict(count=6, label="6m", step="month", stepmode="backward"),
            dict(count=1, label="YTD", step="year", stepmode="todate"),
            dict(count=1, label="1y", step="year", stepmode="backward"),
            dict(step="all")
        ])
    )
)
fig.update_layout(plot_bgcolor='rgb(250, 242, 242)',
    title='NIFTY_50 : Major single day gains -2019 onwards',
    yaxis_title='NIFTY 50 Stock',
    shapes = [dict(x0='2019-05-20', x1='2019-05-20', y0=0, y1=1, xref='x', yref='paper', line_width=2,opacity=0.3,line_color='green',editable=False),
             dict(x0='2020-05-23', x1='2020-05-23', y0=0, y1=1, xref='x', yref='paper',line_width=3,opacity=0.3,line_color='green'),
             dict(x0='2019-09-20', x1='2019-09-20', y0=0, y1=1, xref='x', yref='paper',line_width=3,opacity=0.3,line_color='green'),
             dict(x0='2020-04-07', x1='2020-04-07', y0=0, y1=1, xref='x', yref='paper',line_width=3,opacity=0.3,line_color='green')],
    annotations=[dict(x='2019-05-20', y=0.54, xref='x', yref='paper',
                    showarrow=False, xanchor='right', text='Exit-Polls predict majority'),
                 dict(x='2019-05-20', y=0.5, xref='x', yref='paper',
                    showarrow=False, xanchor='right', text='for BJP government'),
                dict(x='2019-09-3', y=0.08, xref='x', yref='paper',
                    showarrow=False, xanchor='left', text='2019 General Elections'),
                 dict(x='2019-09-3', y=0.05, xref='x', yref='paper',
                    showarrow=False, xanchor='left', text='results announced'),
                dict(x='2019-09-20', y=0.54, xref='x', yref='paper',
                    showarrow=False, xanchor='left', text='cut in the corporate tax rate announced'),
                dict(x='2020-04-07', y=0.3, xref='x', yref='paper',
                    showarrow=False, xanchor='right', text='Italy Coronavirus Nos went down')]
)
fig.show()

# Performace of other nifty sectoral indices in 2020

In [9]:
nifty_auto = pd.read_csv('./data/NIFTY AUTO.csv', parse_dates=['Date'])
nifty_bank = pd.read_csv('./data/NIFTY BANK.csv', parse_dates=['Date'])
nifty_fmcg = pd.read_csv('./data/NIFTY FMCG.csv', parse_dates=['Date'])
nifty_IT = pd.read_csv('./data/NIFTY IT.csv', parse_dates=['Date'])
nifty_metal = pd.read_csv('./data/NIFTY METAL.csv', parse_dates=['Date'])
nifty_pharma = pd.read_csv('./data/NIFTY PHARMA.csv', parse_dates=['Date'])

#fill in missing values
nifty_auto.fillna(method='ffill',inplace=True)
nifty_bank.fillna(method='ffill',inplace=True)
nifty_fmcg.fillna(method='ffill',inplace=True)
nifty_IT.fillna(method='ffill',inplace=True)
nifty_metal.fillna(method='ffill',inplace=True)
nifty_pharma.fillna(method='ffill',inplace=True)

# Compare closing price of different indices

In [10]:
nifty_auto_2019 = nifty_auto[nifty_auto['Date'] >= '2019-12-31']
nifty_bank_2019 = nifty_bank[nifty_bank['Date'] >= '2019-12-31']
nifty_fmcg_2019 = nifty_fmcg[nifty_fmcg['Date'] >= '2019-12-31']
nifty_IT_2019 = nifty_IT[nifty_IT['Date'] >= '2019-12-31']
nifty_metal_2019 = nifty_metal[nifty_metal['Date'] >= '2019-12-31']
nifty_pharma_2019 = nifty_pharma[nifty_pharma['Date'] >= '2019-12-31']
nifty_50_2019 = nifty50[nifty50['Date'] >= '2019-12-31']

In [11]:
d = {
    "NIFTY Auto index" : nifty_auto_2019['Close'].values,
    "NIFTY Bank index" : nifty_bank_2019['Close'].values,
    "NIFTY FMCG index": nifty_fmcg_2019['Close'].values,
    "NIFTY IT index": nifty_IT_2019['Close'].values,
    "NIFTY Pharma index": nifty_pharma_2019['Close'].values,
    "NIFTY Metal index": nifty_metal_2019['Close'].values,
    "NIFTY 50 index": nifty_50_2019['Close'].values
    
    }

In [12]:
df = pd.DataFrame(data=d)
df.index = nifty_auto_2019['Date']
df.head()

Unnamed: 0_level_0,NIFTY Auto index,NIFTY Bank index,NIFTY FMCG index,NIFTY IT index,NIFTY Pharma index,NIFTY Metal index,NIFTY 50 index
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2019-12-31,8248.3,32161.65,30121.5,15652.4,8040.15,2801.3,12168.45
2020-01-01,8210.1,32102.9,30234.25,15722.15,8047.1,2796.05,12182.5
2020-01-02,8267.45,32443.85,30266.2,15709.65,8053.95,2869.9,12282.2
2020-01-03,8168.15,32069.25,30109.25,15936.6,8111.95,2848.35,12226.65
2020-01-06,7978.75,31237.15,29799.3,15879.8,7987.35,2765.75,11993.05


In [13]:
df.iplot(kind='box')

In [14]:
fig = df.drop(['NIFTY 50 index'], axis = 1).iplot(asFigure=True, subplots=True, subplot_titles=True, legend=False)
fig.show()

- Due to the shutdown in April 2020, all sectoral indices were hit; however, the FMCG and pharmaceutical sectors have showed a robust resurgence.

# Regime Detection

- Reading Nifty 50 data

In [15]:
raw = nifty50[['Date','Close']]
raw.set_index(['Date'], drop=True, inplace=True)
raw.columns = ['Nifty50']

# Calculate Log Returns

In [16]:
rets = np.log(raw/raw.shift(1)).dropna()

# Using KMeans (unsupervised learning) to discover regime

- Importing KMeans from scikit learn python library 

In [17]:
from sklearn.cluster import KMeans

In [18]:
symbol = 'Nifty50'
data = pd.DataFrame(rets[symbol])
data.head()

Unnamed: 0_level_0,Nifty50
Date,Unnamed: 1_level_1
2000-01-04,0.028787
2000-01-05,-0.026528
2000-01-06,0.013568
2000-01-07,-0.002662
2000-01-10,0.012106


# Adding features - works as a input for KMean

- Momentum
- Volatility

In [19]:
f = ['mom', 'vol']
window = 20
data['mom'] = data[symbol].rolling(window).mean()
data['vol'] = data[symbol].rolling(window).std()
data.dropna(inplace=True)

In [20]:
data

Unnamed: 0_level_0,Nifty50,mom,vol
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2000-02-01,0.002132,-0.001359,0.018981
2000-02-02,0.024543,-0.001571,0.018647
2000-02-03,0.006215,0.000066,0.017757
2000-02-04,0.001157,-0.000555,0.017474
2000-02-07,0.022774,0.000717,0.018223
...,...,...,...
2021-07-01,-0.002643,-0.000033,0.004338
2021-07-02,0.002688,0.000165,0.004368
2021-07-05,0.007108,0.000262,0.004504
2021-07-06,-0.001017,0.000248,0.004508


In [21]:
fig = px.scatter(data_frame=data[f], x='mom', y='vol')
fig.show()

# Normalization or Gaussian normalization or z-score normalization
- Scale the dataset to improve the machine learning model's performance

In [22]:
data = (data - data.mean())/data.std()

In [23]:
fig = px.scatter(data_frame=data[f], x='mom', y='vol')
fig.show()

# Clustering

In [24]:
model = KMeans(n_clusters=4)
model.fit(data[f])
cluster = model.predict(data[f]) # give data in labels (0 1 2 3)

In [25]:
data['c'] = cluster

In [26]:
data['c'].value_counts()

1    2788
3    1629
0     637
2     279
Name: c, dtype: int64

In [27]:
fig = px.scatter(data_frame=data, x='mom', y='vol', color='c')
fig.show()

## Regime detected by KMean
- Positive Momentum and low Volatility
- Positive Momentum and high Volatility
- Negative Momentum and high volatility
- Negative Momentum and low volatility