# Nifty 50 March

Code to predict the daily increase or decrease of all the stocks in Nifty 50 using a combination of Random Forest, K nearest-neighbours and linear support vector classifier, the most appropraite one would be selected by the Voting Classifier. If the closing value increases by 2%, the model returns 1, if decreases by 2%, model returns -1 or else, the model returns 0. Training data used is from 1/3/2019 to 28/2/2020.

Functions used:

save_symbols: To get and save the symbols of all the 50 companies

get_data_from_yahoo: To obtain the data from yahoo finance for the duration of time.

compile_data: to compile all the data Adjusted Close values into a single  CSV file

visualise_data: to get the correlation matrix of all the companies and obtain a heatmap for its visualization

process_data_for_labels: to get a percentage value of the dataframe obtained in the previous value

buy_sell_hold: to define the condition for buy, sell, hold

extract_featuresets: to preprocess the data for applying the classifiers

apply_model: to apply the selected model and get its accuracy for the 18 working days for the month of march

In [1]:
%matplotlib ipympl

import bs4 as bs
import datetime as dt
import matplotlib.pyplot as plt
from matplotlib import style
import numpy as np
import os
import pandas as pd
import pandas_datareader.data as pdr
import pickle
import requests
from collections import Counter
from sklearn import svm, neighbors
from sklearn.model_selection import train_test_split
from sklearn.ensemble import VotingClassifier, RandomForestClassifier



style.use('ggplot')


def save_symbols():
    resp = requests.get('https://en.wikipedia.org/wiki/NIFTY_50')
    soup = bs.BeautifulSoup(resp.text, 'lxml')
    table = soup.find('table', {'class': 'wikitable sortable'})
    tickers = []
    for row in table.findAll('tr')[1:]:
        ticker = row.findAll('td')[1].text
        tickers.append(ticker)
    with open("nifty50.pickle", "wb") as f:
        pickle.dump(tickers, f)
    return tickers


save_symbols()


  from pandas.util.testing import assert_frame_equal


['ADANIPORTS.NS',
 'ASIANPAINT.NS',
 'AXISBANK.NS',
 'BAJAJ-AUTO.NS',
 'BAJFINANCE.NS',
 'BAJAJFINSV.NS',
 'BHARTIARTL.NS',
 'INFRATEL.NS',
 'BPCL.NS',
 'CIPLA.NS',
 'COALINDIA.NS',
 'DRREDDY.NS',
 'EICHERMOT.NS',
 'GAIL.NS',
 'GRASIM.NS',
 'HCLTECH.NS',
 'HDFC.NS',
 'HDFCBANK.NS',
 'HEROMOTOCO.NS',
 'HINDALCO.NS',
 'HINDUNILVR.NS',
 'BRITANNIA.NS',
 'ICICIBANK.NS',
 'INDUSINDBK.NS',
 'INFY.NS',
 'IOC.NS',
 'ITC.NS',
 'JSWSTEEL.NS',
 'KOTAKBANK.NS',
 'LT.NS',
 'M&M.NS',
 'MARUTI.NS',
 'NESTLEIND.NS',
 'NTPC.NS',
 'ONGC.NS',
 'POWERGRID.NS',
 'RELIANCE.NS',
 'SBIN.NS',
 'SUNPHARMA.NS',
 'TCS.NS',
 'TATAMOTORS.NS',
 'TATASTEEL.NS',
 'TECHM.NS',
 'TITAN.NS',
 'ULTRACEMCO.NS',
 'UPL.NS',
 'VEDL.NS',
 'WIPRO.NS',
 'SHREECEM.NS',
 'ZEEL.NS']

In [2]:
def get_data_from_yahoo(reload_nifty50=False):
    if reload_nifty50:
        tickers = save_symbols()
    else:
        with open("nifty50.pickle", "rb") as f:
            tickers = pickle.load(f)
    if not os.path.exists('nifty_stock_dfs'):
        os.makedirs('nifty_stock_dfs')

    start = dt.datetime(2019, 3, 1)
    end = dt.datetime.now()
    for ticker in tickers:
        print(ticker)
        
        if not os.path.exists('nifty_stock_dfs/{}.csv'.format(ticker)):
            df = pdr.get_data_yahoo(ticker, start, end)
            df.reset_index(inplace=True)
            df.set_index("Date", inplace=True)
            df.to_csv('nifty_stock_dfs/{}.csv'.format(ticker))
        else:
            print('Already have {}'.format(ticker))


get_data_from_yahoo()

ADANIPORTS.NS
Already have ADANIPORTS.NS
ASIANPAINT.NS
Already have ASIANPAINT.NS
AXISBANK.NS
Already have AXISBANK.NS
BAJAJ-AUTO.NS
Already have BAJAJ-AUTO.NS
BAJFINANCE.NS
Already have BAJFINANCE.NS
BAJAJFINSV.NS
Already have BAJAJFINSV.NS
BHARTIARTL.NS
Already have BHARTIARTL.NS
INFRATEL.NS
Already have INFRATEL.NS
BPCL.NS
Already have BPCL.NS
CIPLA.NS
Already have CIPLA.NS
COALINDIA.NS
Already have COALINDIA.NS
DRREDDY.NS
Already have DRREDDY.NS
EICHERMOT.NS
Already have EICHERMOT.NS
GAIL.NS
Already have GAIL.NS
GRASIM.NS
Already have GRASIM.NS
HCLTECH.NS
Already have HCLTECH.NS
HDFC.NS
Already have HDFC.NS
HDFCBANK.NS
Already have HDFCBANK.NS
HEROMOTOCO.NS
Already have HEROMOTOCO.NS
HINDALCO.NS
Already have HINDALCO.NS
HINDUNILVR.NS
Already have HINDUNILVR.NS
BRITANNIA.NS
Already have BRITANNIA.NS
ICICIBANK.NS
Already have ICICIBANK.NS
INDUSINDBK.NS
Already have INDUSINDBK.NS
INFY.NS
Already have INFY.NS
IOC.NS
Already have IOC.NS
ITC.NS
Already have ITC.NS
JSWSTEEL.NS
Already hav

In [3]:
def compile_data():
    with open("nifty50.pickle", "rb") as f:
        tickers = pickle.load(f)

    main_df = pd.DataFrame()

    for count, ticker in enumerate(tickers):
        df = pd.read_csv('nifty_stock_dfs/{}.csv'.format(ticker))
        df.set_index('Date', inplace=True)

        df.rename(columns={'Adj Close': ticker}, inplace=True)
        df.drop(['Open', 'High', 'Low', 'Close', 'Volume'], 1, inplace=True)

        if main_df.empty:
            main_df = df
        else:
            main_df = main_df.join(df, how='outer')

        if count % 10 == 0:
            print(count)
    print(main_df.head())
    main_df.to_csv('nifty_joined_closes.csv')


compile_data()

0
10
20
30
40
            ADANIPORTS.NS  ASIANPAINT.NS  AXISBANK.NS  BAJAJ-AUTO.NS  \
Date                                                                   
2019-03-01     329.384583    1376.789062   701.528809    2680.434814   
2019-03-05     335.365295    1388.160522   731.141968    2718.111572   
2019-03-06     333.190430    1378.123779   719.706177    2740.437256   
2019-03-07     336.996368    1379.854248   732.839905    2733.229248   
2019-03-08     338.232086    1366.900757   732.490356    2766.740479   

            BAJFINANCE.NS  BAJAJFINSV.NS  BHARTIARTL.NS  INFRATEL.NS  \
Date                                                                   
2019-03-01    2650.003418    6434.886230     282.477997   277.880035   
2019-03-05    2684.264404    6537.944336     282.661011   280.627991   
2019-03-06    2755.525635    6603.286621     284.084015   286.692566   
2019-03-07    2723.754639    6592.645996     282.477997   289.630066   
2019-03-08    2757.119141    6611.080078     283.

In [4]:
def visualize_data():
    df = pd.read_csv('nifty_joined_closes.csv')
    df_corr = df.corr()
    print(df_corr.head())
    df_corr.to_csv('niftycorr.csv')
    data1 = df_corr.values
    fig1 = plt.figure()
    ax1 = fig1.add_subplot(1, 1, 1)

    heatmap1 = ax1.pcolor(data1, cmap=plt.cm.RdYlGn)
    fig1.colorbar(heatmap1)

    ax1.set_xticks(np.arange(data1.shape[1]) + 0.5, minor=False)
    ax1.set_yticks(np.arange(data1.shape[0]) + 0.5, minor=False)
    ax1.invert_yaxis()
    ax1.xaxis.tick_top()
    column_labels = df_corr.columns
    row_labels = df_corr.index
    ax1.set_xticklabels(column_labels)
    ax1.set_yticklabels(row_labels)
    plt.xticks(rotation=90)
    heatmap1.set_clim(-1, 1)
    plt.tight_layout()
    plt.show()


visualize_data()

               ADANIPORTS.NS  ASIANPAINT.NS  AXISBANK.NS  BAJAJ-AUTO.NS  \
ADANIPORTS.NS       1.000000      -0.207487     0.816694       0.436262   
ASIANPAINT.NS      -0.207487       1.000000    -0.238491       0.453392   
AXISBANK.NS         0.816694      -0.238491     1.000000       0.599242   
BAJAJ-AUTO.NS       0.436262       0.453392     0.599242       1.000000   
BAJFINANCE.NS       0.186603       0.809530     0.199080       0.615234   

               BAJFINANCE.NS  BAJAJFINSV.NS  BHARTIARTL.NS  INFRATEL.NS  \
ADANIPORTS.NS       0.186603       0.381560      -0.286111     0.536781   
ASIANPAINT.NS       0.809530       0.607745       0.784402    -0.505529   
AXISBANK.NS         0.199080       0.494564      -0.168321     0.677056   
BAJAJ-AUTO.NS       0.615234       0.771319       0.302837     0.207523   
BAJFINANCE.NS       1.000000       0.903545       0.801712    -0.329406   

                BPCL.NS  CIPLA.NS  ...  TATAMOTORS.NS  TATASTEEL.NS  TECHM.NS  \
ADANIPORTS.NS  0.

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [11]:
def process_data_for_labels(ticker):
    hm_days = 28
    df = pd.read_csv('nifty_joined_closes.csv', index_col=0)
    tickers = df.columns.values.tolist()
    df.fillna(0, inplace=True)
    for i in range(1, hm_days+1):
        df['{}_{}d'.format(ticker, i)] = ((df[ticker].shift(-i) - df[ticker]) / df[ticker]) * 100
    df.fillna(0, inplace=True)
    return tickers, df

process_data_for_labels('BAJFINANCE.NS')

(['ADANIPORTS.NS',
  'ASIANPAINT.NS',
  'AXISBANK.NS',
  'BAJAJ-AUTO.NS',
  'BAJFINANCE.NS',
  'BAJAJFINSV.NS',
  'BHARTIARTL.NS',
  'INFRATEL.NS',
  'BPCL.NS',
  'CIPLA.NS',
  'COALINDIA.NS',
  'DRREDDY.NS',
  'EICHERMOT.NS',
  'GAIL.NS',
  'GRASIM.NS',
  'HCLTECH.NS',
  'HDFC.NS',
  'HDFCBANK.NS',
  'HEROMOTOCO.NS',
  'HINDALCO.NS',
  'HINDUNILVR.NS',
  'BRITANNIA.NS',
  'ICICIBANK.NS',
  'INDUSINDBK.NS',
  'INFY.NS',
  'IOC.NS',
  'ITC.NS',
  'JSWSTEEL.NS',
  'KOTAKBANK.NS',
  'LT.NS',
  'M&M.NS',
  'MARUTI.NS',
  'NESTLEIND.NS',
  'NTPC.NS',
  'ONGC.NS',
  'POWERGRID.NS',
  'RELIANCE.NS',
  'SBIN.NS',
  'SUNPHARMA.NS',
  'TCS.NS',
  'TATAMOTORS.NS',
  'TATASTEEL.NS',
  'TECHM.NS',
  'TITAN.NS',
  'ULTRACEMCO.NS',
  'UPL.NS',
  'VEDL.NS',
  'WIPRO.NS',
  'SHREECEM.NS',
  'ZEEL.NS'],
             ADANIPORTS.NS  ASIANPAINT.NS  AXISBANK.NS  BAJAJ-AUTO.NS  \
 Date                                                                   
 2019-03-01     329.384583    1376.789062   701.528809   

In [12]:
def buy_sell_hold(*args):
    cols = [c for c in args]
    requirement = 2
    for col in cols:
        if col > requirement:
            return 1
        if col < -requirement:
            return -1
    return 0


def extract_featuresets(ticker):
    tickers, df = process_data_for_labels(ticker)
    hm_days = 28

    df['{}_target'.format(ticker)] = list(map( buy_sell_hold, *[df['{}_{}d'.format(ticker, i)] for i in range(1, hm_days + 1)]))
                                              

    vals = df['{}_target'.format(ticker)].values.tolist()
    str_vals = [str(i) for i in vals]
    print('Data spread:', Counter(str_vals))

    df.fillna(0, inplace=True)
    df = df.replace([np.inf, -np.inf], np.nan)
    df.dropna(inplace=True)

    df_vals = df[[ticker for ticker in tickers]].pct_change()
    df_vals = df_vals.replace([np.inf, -np.inf], 0)
    df_vals.fillna(0, inplace=True)

    X = df_vals.values
    y = df['{}_target'.format(ticker)].values
    return X, y, df

extract_featuresets('BAJFINANCE.NS')


Data spread: Counter({'1': 153, '-1': 106, '0': 1})


(array([[ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.01815723,  0.00825941,  0.04221232, ..., -0.03220645,
          0.03349841, -0.0039066 ],
        [-0.00648506, -0.00723025, -0.015641  , ...,  0.01741839,
          0.01321441, -0.02796991],
        ...,
        [ 0.11525504,  0.01761822, -0.01781954, ...,  0.03643851,
          0.00121464, -0.02336064],
        [ 0.10291265,  0.04479132,  0.07801417, ..., -0.00963999,
          0.02698464,  0.05329415],
        [-0.02895152,  0.01493891,  0.04467566, ...,  0.04551968,
          0.00664355,  0.02589641]]),
 array([ 1,  1,  1,  1,  1,  1,  1,  1,  1,  1, -1, -1,  1,  1,  1,  1,  1,
         1,  1, -1,  1,  1, -1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, -1,
        -1, -1, -1, -1, -1, -1, -1,  1, -1,  1,  1,  1,  1,  1,  1,  1,  1,
        -1,  1,  1,  1,  1,  1,  1,  1,  1, -1,  1,  1,  1, -1,  1, -1,  1,
         1,  1,  1,  1,  1,  1,  1,  1,  1, -1, -1, -1, -1, -1, -1,  

In [13]:
def apply_models(ticker):
    X, y, df = extract_featuresets(ticker)
    
    X_train, X_test = X[:243, :], X[243:, :]
    y_train, y_test = y[:243], y[243:]
    
    
    clf = VotingClassifier([('lsvc', svm.LinearSVC()),
                            ('knn', neighbors.KNeighborsClassifier() ),
                            ('rfor', RandomForestClassifier())])
    clf.fit(X_train, y_train)
    
    confidence = clf.score(X_test, y_test)
    print('Accuracy: ',confidence)
    
    predictions = clf.predict(X_test)
    
    print('Predicted Spread: ', Counter(predictions))
    

    return confidence

with open('nifty50.pickle', 'rb') as f:
    tickers = pickle.load(f)
    
for ticker in tickers:
    print('\nFor {}\n'.format(ticker))
    apply_models(ticker)
    



For ADANIPORTS.NS

Data spread: Counter({'-1': 146, '1': 113, '0': 1})
Accuracy:  0.6470588235294118
Predicted Spread:  Counter({1: 9, -1: 8})

For ASIANPAINT.NS

Data spread: Counter({'1': 132, '-1': 126, '0': 2})
Accuracy:  0.4117647058823529
Predicted Spread:  Counter({1: 16, -1: 1})

For AXISBANK.NS

Data spread: Counter({'-1': 152, '1': 107, '0': 1})
Accuracy:  0.4117647058823529
Predicted Spread:  Counter({-1: 11, 1: 6})

For BAJAJ-AUTO.NS

Data spread: Counter({'-1': 135, '1': 121, '0': 4})
Accuracy:  0.29411764705882354
Predicted Spread:  Counter({1: 11, -1: 6})

For BAJFINANCE.NS

Data spread: Counter({'1': 153, '-1': 106, '0': 1})
Accuracy:  0.23529411764705882
Predicted Spread:  Counter({1: 12, -1: 5})

For BAJAJFINSV.NS

Data spread: Counter({'1': 149, '-1': 110, '0': 1})
Accuracy:  0.29411764705882354
Predicted Spread:  Counter({1: 11, -1: 6})

For BHARTIARTL.NS

Data spread: Counter({'1': 132, '-1': 127, '0': 1})
Accuracy:  0.35294117647058826
Predicted Spread:  Counter(