In [2]:
import zlib
import urllib
import requests
from datetime import datetime
import gzip
import pandas
import zipfile
import io


def getNseDerivativeMarketActivityReport(date: datetime) -> dict[str, pandas.DataFrame]:
    url = "https://www1.nseindia.com/archives/fo/mkt/fo" + \
        date.strftime("%d%m%Y") + ".zip"
    response = requests.get(url, stream=True, timeout=10)

    if response.status_code != 200:
        return None

    zip = zipfile.ZipFile(io.BytesIO(response.content))
    dfs = {text_file.filename: pandas.read_csv(zip.open(text_file.filename))
           for text_file in zip.infolist()
           if text_file.filename.endswith('.csv')}
    return dfs


def extractStrikeReport(data, date: datetime) -> pandas.DataFrame:
    try:
        if data == None:
            return None
        filename = "op" + \
            date.strftime("%d%m%Y") + ".csv"
        return data[filename], None
    except Exception as e:
        print(f"Error in date - {date.isoformat()}, key {filename} not found")
        return None, f"Error in date - {date.isoformat()}, key {filename} not found"


def extractLotSizeReport(data, date: datetime) -> pandas.DataFrame:
    try:
        if data == None:
            return None
        filename = "optidx" + \
            date.strftime("%d%m%Y") + ".csv"
        return data[filename], None
    except Exception as e:
        print(f"Error in date - {date.isoformat()}, key {filename} not found")
        return None, f"Error in date - {date.isoformat()}, key {filename} not found"



# date = datetime(2022, 12, 7)
# output = getNseDerivativeMarketActivityReport(date)


# strikeDf, err = extractStrikeReport(output, date)
# lotSizeDf, err = extractLotSizeReport(output, date)



In [3]:
from time import sleep


def progress_bar(current, total, bar_length=20):
    fraction = current / total

    arrow = int(fraction * bar_length - 1) * '-' + '>'
    padding = int(bar_length - len(arrow)) * ' '

    ending = '\n' if current == total else '\r'

    print(f'Progress: [{arrow}{padding}] {int(fraction*100)}%', end=ending)


# for index in range(0, 100):
#     progress_bar(index, 100)
#     sleep(0.05)


In [4]:

from datetime import timedelta


def getLotSize(df: pandas.DataFrame) -> dict[str, int]:
    output = {}
    index = 0
    for row, _ in df.iterrows():
        if index == 0:
            index = index + 1
            continue
        ticker = row[0].strip().upper().replace("BANKNIFTY", "CNXBAN")
        output[ticker] = int(int(row[2].strip())/int(row[1].strip()))
    return output


def getStrikeDiffWrtExpiry(df: pandas.DataFrame) -> dict[str, dict[datetime, int]]:
    tempOutput: dict[str, dict[datetime, list[int]]] = {}
    index = 0
    for _, row in df.iterrows():
        try:
            if index == 0:
                index = index + 1
                continue
            ticker = row[1].strip().upper().replace("BANKNIFTY", "CNXBAN")
            expiry = row[2].strip().lower().split("/")
            strike = int(row[3])
            expiry = datetime(int(expiry[2]), int(expiry[1]), int(expiry[0])).isoformat()
            if ticker in tempOutput.keys():
                if expiry not in tempOutput[ticker].keys():
                    tempOutput[ticker][expiry] = [strike]
                else:
                    tempOutput[ticker][expiry].append(strike)
            else:
                tempOutput[ticker] = {expiry: [strike]}
        except Exception as e:
            # print(e)
            continue

    output: dict[str, dict[datetime, int]] = {}
    for ticker in tempOutput:
        for expiry in tempOutput[ticker]:
            if ticker in output.keys():
                strikeDiff = __getMinStrikeDiff(tempOutput[ticker][expiry])
                if strikeDiff != None:
                    output[ticker][expiry] = strikeDiff
            else:
                strikeDiff = __getMinStrikeDiff(tempOutput[ticker][expiry])
                if strikeDiff != None:
                    output[ticker] = {expiry: strikeDiff}
    
    return output

def __getMinStrikeDiff(strikes: list[int]) -> int:
    if len(strikes) <= 1:
        return None 
    
    strikes.sort()
    minDiff = 10000
    for index in range(1, len(strikes)):
        if strikes[index] - strikes[index - 1] < minDiff and strikes[index] - strikes[index - 1] != 0:
            minDiff = strikes[index] - strikes[index - 1]
    return minDiff


# lotSize = getLotSize(lotSizeDf)
# expiry = getStrikeDiffWrtExpiry(strikeDf)

# print(lotSize)
# print(expiry)


In [6]:
# scrape data
import json
START_DATE = datetime(2011, 3, 1)
END_DATE = datetime(2022, 12, 27)

current_date = START_DATE

holidayListOutput = []
lotSizeOutput = []
strikeAndExpiryOutput = []
while END_DATE >= current_date:
    try:
        output = getNseDerivativeMarketActivityReport(current_date)
        if output == None:
            holidayListOutput.append(current_date.isoformat())
            current_date = current_date + timedelta(days=1)
            continue

        lotSizeDf, err = extractLotSizeReport(output, current_date)
        if err == None:
            lotSize = getLotSize(lotSizeDf)
            lotSizeOutput.append(
                {"date": current_date.isoformat(), "data": lotSize})

        strikeDf, err = extractStrikeReport(output, current_date)
        if err == None:
            strikeAndExpiry = getStrikeDiffWrtExpiry(strikeDf)

            tempStrikeAndExpiry = {}
            # get nifty, cnxban and finnifty
            if "NIFTY" in strikeAndExpiry.keys():
                tempStrikeAndExpiry["NIFTY"] = strikeAndExpiry["NIFTY"]
            if "CNXBAN" in strikeAndExpiry.keys():
                tempStrikeAndExpiry["CNXBAN"] = strikeAndExpiry["CNXBAN"]
            if "FINNIFTY" in strikeAndExpiry.keys():
                tempStrikeAndExpiry["FINNIFTY"] = strikeAndExpiry["FINNIFTY"]

            strikeAndExpiryOutput.append(
                {"date": current_date.isoformat(), "data": tempStrikeAndExpiry})

        current_date = current_date + timedelta(days=1)
    except Exception as e:
        current_date = current_date + timedelta(days=1)

    total_days = (END_DATE - START_DATE).days
    lapse_days = (current_date - START_DATE).days if (current_date -
                                                      START_DATE).days > 0 else 1
    progress_bar(lapse_days*100.0/(total_days + 1), 100)


with open('lotSizeOutput.json', 'w', encoding='utf-8') as f:
    json.dump(lotSizeOutput, f, ensure_ascii=False, indent=4)

with open('strikeAndExpiryOutput.json', 'w', encoding='utf-8') as f:
    json.dump(strikeAndExpiryOutput, f, ensure_ascii=False, indent=4)

with open('holidayListOutput.json', 'w', encoding='utf-8') as f:
    json.dump(holidayListOutput, f, ensure_ascii=False, indent=4)




Error in date - 2011-11-18T00:00:00, key optidx18112011.csv not found
Error in date - 2011-11-18T00:00:00, key op18112011.csv not found
Error in date - 2012-11-22T00:00:00, key optidx22112012.csv not found
Error in date - 2012-11-22T00:00:00, key op22112012.csv not found
Error in date - 2013-05-07T00:00:00, key optidx07052013.csv not found
Error in date - 2013-05-07T00:00:00, key op07052013.csv not found
Error in date - 2014-05-12T00:00:00, key optidx12052014.csv not found
Error in date - 2014-05-12T00:00:00, key op12052014.csv not found
Error in date - 2019-12-13T00:00:00, key optidx13122019.csv not found
Error in date - 2022-10-24T00:00:00, key optidx24102022.csv not found
Error in date - 2022-10-28T00:00:00, key optidx28102022.csv not found
Error in date - 2022-11-29T00:00:00, key optidx29112022.csv not found
Progress: [------------------->] 100%
