In [1]:
import pandas as pd
from zipfile import ZipFile
import numpy as np
from datetime import datetime, timedelta,date
from urllib.request import urlopen
from io import BytesIO
from pymongo import MongoClient

In [2]:
sec_base_url='https://archives.nseindia.com/products/content'
fno_base_url='https://archives.nseindia.com/content/historical/DERIVATIVES'

In [3]:
def date_range(start,end):
    daterange=np.arange(np.datetime64(start), np.datetime64(end)).astype(datetime)
    #print (daterange)
    return daterange
    

In [4]:
def download_data(currentdate):
    print (f"Called Download data for date {currentdate}")
    sec_date=currentdate.strftime('%d%m%Y')
    fno_day=currentdate.strftime("%d")
    fno_month=currentdate.strftime("%b").upper()
    fno_year=currentdate.strftime("%Y")
    fno_date=currentdate.strftime('%d%b%Y').upper()
    sec_url=f"{sec_base_url}/sec_bhavdata_full_{sec_date}.csv"
    fno_url=f"{fno_base_url}/{fno_year}/{fno_month}/fo{fno_date}bhav.csv.zip"
    fno_csv_file_name=f"fo{fno_date}bhav.csv"
    file_found= False
    try:
        # Download the fno file
        print (f"Download of FNO for {currentdate} Started from {fno_url}")
        resp = urlopen(fno_url,timeout=2)
        file_found=True
    except:
        file_found= False
    if file_found==True:
        zipfile = ZipFile(BytesIO(resp.read()))
        myfile=zipfile.extractall()
        listOfiles=zipfile.infolist()
        for elem in listOfiles:
            fno_csv_file_name=elem.filename
        df_fno=pd.read_csv(fno_csv_file_name,skipinitialspace = True)
        if not df_fno.empty:
            print (f"Download of FNO for {currentdate} Completed")
            df_fno= df_fno.rename(str.strip, axis = 'columns')
    else:
        print(f" FNO File is not present for {currentdate}")
        df_fno=pd.DataFrame()
        
        
    try: 
        # Download the sec file
        print (f"Download of SEC for {currentdate} Started from {sec_url}")
        resp = urlopen(sec_url,timeout=2)
        file_found=True
    except:
        file_found= False
    if file_found==True:        
        df_sec=pd.read_csv(sec_url,skipinitialspace = True)
        if not df_sec.empty:
            print (f"Download of SEC for {currentdate} Completed")
            df_sec= df_sec.rename(str.strip, axis = 'columns')
    else:
        print(f" Sec File is not present for {currentdate}")
        df_sec=pd.DataFrame()
        
    
    #print (df_sec)
    return df_sec,df_fno
    

In [5]:
def load_to_db(startdate,enddate):
    dbname='STAGINGDB'+startdate.split('-')[1]+startdate.split('-')[0]
    print(f'staging db is being created with name {dbname}')
    m_client=MongoClient()
    db=m_client[dbname]
    daterange=date_range(startdate,enddate)
    for currentdate in daterange:
        print(currentdate)
        df_sec,df_fno=download_data(currentdate)
        if not df_sec.empty:
            print(f" Security File loading for date {currentdate} started")
            data_sec=df_sec.to_dict(orient='records')
            db['SECMASTER'].insert_many(data_sec)
            print(f" Security File loading for date {currentdate} Ended")
        if not df_fno.empty:
            print(f" Security File loading for date {currentdate} started")
            data_fno=df_fno.to_dict(orient='records')
            db['FNOMASTER'].insert_many(data_fno)
            print(f" Security File loading for date {currentdate} Ended")
    m_client.close()
            

In [6]:
#startdate='2019-10-01'
# # ifwant to load data from 1st jan to 31 jan then give start date as 2021-01-01 and end as 2021-02-01(one day extra)
# startdate='2021-05-12'
# enddate='2021-11-19'



In [7]:
#load_to_db(startdate,enddate)

In [8]:
#This method read data from Mongo DB
def getdata(stock,db):
    stock=str(stock)
    mongo_client=MongoClient()
    db=mongo_client[db]
    data=db[stock].find()
    df=pd.DataFrame(list(data))
    df.drop("_id",axis=1,inplace=True)
    ###Covert to UTC to get the correct Values
    df['Datetime']=pd.to_datetime(df['Datetime']).dt.tz_localize('UTC').dt.tz_convert('Asia/Kolkata')
    #df['Datetime']=pd.to_datetime(df['Datetime'])
    df=df.set_index('Datetime')
    return df

In [9]:
enddate=str(date.today()+timedelta(days=1))
testdf=getdata('ACC','NSEEQ')
startdate=str(testdf.tail(1).index[0]+timedelta(days=1)).split(' ')[0]
startdate



'2021-12-01'

In [10]:
# 

In [11]:
dbname='STAGINGDB'+startdate.split('-')[1]+startdate.split('-')[0]
dbname

'STAGINGDB122021'

In [12]:
load_to_db(startdate,enddate)

staging db is being created with name STAGINGDB122021
2021-12-01
Called Download data for date 2021-12-01
Download of FNO for 2021-12-01 Started from https://archives.nseindia.com/content/historical/DERIVATIVES/2021/DEC/fo01DEC2021bhav.csv.zip
Download of FNO for 2021-12-01 Completed
Download of SEC for 2021-12-01 Started from https://archives.nseindia.com/products/content/sec_bhavdata_full_01122021.csv
Download of SEC for 2021-12-01 Completed
 Security File loading for date 2021-12-01 started
 Security File loading for date 2021-12-01 Ended
 Security File loading for date 2021-12-01 started
 Security File loading for date 2021-12-01 Ended
2021-12-02
Called Download data for date 2021-12-02
Download of FNO for 2021-12-02 Started from https://archives.nseindia.com/content/historical/DERIVATIVES/2021/DEC/fo02DEC2021bhav.csv.zip
Download of FNO for 2021-12-02 Completed
Download of SEC for 2021-12-02 Started from https://archives.nseindia.com/products/content/sec_bhavdata_full_02122021.csv