### Import Dependencies 

In [1]:
import math
import requests
import datetime
import json
import urllib
import pandas as pd
import concurrent
from concurrent.futures import ALL_COMPLETED

### Set Environment Variables 

In [2]:
BASE_URL = 'https://www.niftyindices.com/'
HISTORICAL_DATA_URL = "https://www.niftyindices.com/Backpage.aspx/getHistoricaldatatabletoString"

### Define Helper Functions 

In [3]:
def get_adjusted_headers():
    return {
    "Accept": "application/json, text/javascript, */*; q=0.01",
    "Content-Type": "application/json; charset=UTF-8",
    "Origin": "https://www.niftyindices.com",
    "Referer": "https://www.niftyindices.com/reports/historical-data",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36"
}

def fetch_cookies():
    response = requests.get(BASE_URL, timeout=30, headers=get_adjusted_headers())
    if response.status_code != requests.codes.ok:
        # logging.error("Fetched url: %s with status code: %s and response from server: %s" % (
        #     BASE_URL, response.status_code, response.content))
        raise ValueError("Please try again in a minute.")
    return response.cookies.get_dict()

def scrape_data(start_date, end_date, name, input_type='index'):
    """
    Called by stocks and indices to scrape data.
    Create threads for different requests, parses data, combines them and returns dataframe
    Args:
        start_date (datetime.datetime): start date
        end_date (datetime.datetime): end date
        input_type (str): Either 'stock' or 'index'
        name (str, optional): stock symbol or index name. Defaults to None.
    Returns:
        Pandas DataFrame: df containing data for stocksymbol for provided date range
    """
    cookies = fetch_cookies()

    start_date = datetime.datetime.strptime(start_date, "%d-%m-%Y")
    end_date = datetime.datetime.strptime(end_date, "%d-%m-%Y")

    pld = {
        'name' : name,
        'startDate' : start_date.strftime('%d-%b-%Y'),
        'endDate' : end_date.strftime('%d-%b-%Y')
    }

    payload = str(pld)
    response = requests.request("POST", HISTORICAL_DATA_URL, data=payload, timeout=30, headers=get_adjusted_headers(), cookies=cookies)
    if response.status_code == requests.codes.ok:
        return pd.DataFrame(eval(json.loads(response.text)['d']))

### Scrape Directly to DataFrame 

In [4]:
scrape_data('01-01-2000','20-05-2023','NIFTY 50')

Unnamed: 0,Index Name,INDEX_NAME,HistoricalDate,OPEN,HIGH,LOW,CLOSE
0,Nifty 50,NIFTY 50,19 May 2023,18186.15,18218.10,18060.40,18203.40
1,Nifty 50,NIFTY 50,18 May 2023,18287.50,18297.20,18104.85,18129.95
2,Nifty 50,NIFTY 50,17 May 2023,18300.45,18309.00,18115.35,18181.75
3,Nifty 50,NIFTY 50,16 May 2023,18432.35,18432.35,18264.35,18286.50
4,Nifty 50,NIFTY 50,15 May 2023,18339.30,18458.90,18287.90,18398.85
...,...,...,...,...,...,...,...
5810,Nifty 50,Nifty 50,07 Jan 2000,1616.6,1628.25,1597.2,1613.30
5811,Nifty 50,Nifty 50,06 Jan 2000,1595.8,1639,1595.8,1617.60
5812,Nifty 50,Nifty 50,05 Jan 2000,1634.55,1635.5,1555.05,1595.80
5813,Nifty 50,Nifty 50,04 Jan 2000,1594.4,1641.95,1594.4,1638.70
