### Extracting Data By Running Daily This Python API Script of NIfty50 Stock 

In [6]:
import pandas as pd
from bsedata.bse import BSE
import time
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

github_excel_url = "https://raw.githubusercontent.com/jangid6/Stock-ETL-Project/main/Equity.xlsx"
engine = 'openpyxl' 
Equity = pd.read_excel(github_excel_url, engine = engine)
Equity['Security Code'] = Equity['Security Code'].astype(str)
Equity.head(n=2) # Get the list of stocks in Nifty50

Unnamed: 0,Security Code,Issuer Name,Security Id,Security Name,Status,Group,Face Value,ISIN No,Industry,Instrument,Sector Name,Industry New Name,Igroup Name,ISubgroup Name
0,500002,ABB India Limited,ABB,ABB India Limited,Active,A,2.0,INE117A01022,Heavy Electrical Equipment,Equity,Industrials,Capital Goods,Electrical Equipment,Heavy Electrical Equipment
1,500003,Aegis Logistics Ltd.,AEGISLOG,AEGIS LOGISTICS LTD.,Active,A,1.0,INE208C01025,Trading - Gas,Equity,Energy,"Oil, Gas & Consumable Fuels",Gas,Trading - Gas


##### CALLING BSE API for fetching Stocks Data for eg: Price, Code, Updated Date, Open Price, Close Price, Mrkt Cap

In [7]:
nifty50_stock_symbols = [ "ADANIENT", "ADANIPORTS", "APOLLOHOSP", "ASIANPAINT", "AXISBANK",
    "BAJAJ-AUTO", "BAJFINANCE", "BAJAJFINSV", "BPCL", "BHARTIARTL",
    "BRITANNIA", "CIPLA", "COALINDIA", "DIVISLAB", "DRREDDY", "EICHERMOT",
    "GRASIM", "HCLTECH", "HDFCBANK", "HDFCLIFE", "HEROMOTOCO", "HINDALCO",
    "HINDUNILVR", "ICICIBANK", "ITC", "INDUSINDBK", "INFY", "JSWSTEEL",
    "KOTAKBANK", "LTIM", "LT", "M&M", "MARUTI", "NTPC", "NESTLEIND",
    "ONGC", "POWERGRID", "RELIANCE", "SBILIFE", "SBIN", "SUNPHARMA",
    "TCS", "TATACONSUM", "TATAMOTORS", "TATASTEEL", "TECHM", "TITAN",
    "UPL", "ULTRACEMCO", "WIPRO"
]
nifty50_SqDF= Equity[Equity['Security Id'].isin(nifty50_stock_symbols)].reset_index(drop=True)
nifty50_SqDF.rename(columns={'Group': 'CompanyGroup'}, inplace=True)
nifty50_SqDF.columns = nifty50_SqDF.columns.str.replace(' ', '')

b = BSE(update_codes=True)
result_dfs = []
sqcode_ListNifty50 = nifty50_SqDF['SecurityCode'].values
for sqCode in sqcode_ListNifty50:
    try:
        stock_data = b.getQuote(sqCode)
        stock_df = pd.DataFrame([stock_data])
        result_dfs.append(stock_df)
        time.sleep(0.5)
    except IndexError:
        print(f"IndexError for {sqCode}: Data not available")
        
nifty50_OverviewTable_SF = pd.concat(result_dfs, ignore_index=True)
nifty50DailyTable = pd.DataFrame()

for scripCode in nifty50_OverviewTable_SF['scripCode']:
    try:
        stock_data = b.getQuote(scripCode)
        stock_df = pd.DataFrame([stock_data])
        nifty50DailyTable = pd.concat([nifty50DailyTable, stock_df.iloc[:, :-2]], ignore_index=True)
        time.sleep(1.5)
    except IndexError:
        print(f"IndexError for {scripCode}: Data not available")
nifty50DailyTable.head(n=2)

Unnamed: 0,companyName,currentValue,change,pChange,updatedOn,securityID,scripCode,group,faceValue,industry,previousClose,previousOpen,dayHigh,dayLow,52weekHigh,52weekLow,weightedAvgPrice,totalTradedValue,totalTradedQuantity,2WeekAvgQuantity,marketCapFull,marketCapFreeFloat
0,Bajaj Finance Limited,7126.15,104.3,1.49,28 Nov 23 | 04:01 PM,BAJFINANCE,500034,A / S&P BSE SENSEX,2.0,Financial Services,7021.85,7022.2,7133.0,7022.2,8190.0,5487.25,7081.82,24.93 Cr.,0.35 Lakh,0.47 Lakh,"4,40,412.52 Cr.","1,93,781.51 Cr."
1,CIPLA LTD.,1192.05,-6.8,-0.57,28 Nov 23 | 04:01 PM,CIPLA,500087,A / S&P BSE 100,2.0,Healthcare,1198.85,1198.85,1202.9,1188.35,1283.0,852.0,1195.81,7.52 Cr.,0.63 Lakh,0.68 Lakh,"96,237.23 Cr.","63,516.57 Cr."


#####  Data Transformation & Some Meaningful Cleaning, So We can Data Can be Written to SQL Server Database

In [20]:
nifty50DailyTable.rename(columns={'group': 'sharegroup'}, inplace=True)
nifty50DailyTable.rename(columns={'52weekHigh': 'fiftytwoweekHigh'}, inplace=True)
nifty50DailyTable.rename(columns={'52weekLow': 'fiftytwoweekLow'}, inplace=True)
nifty50DailyTable.rename(columns={'2WeekAvgQuantity': 'twoWeekAvgQuantity'}, inplace=True)
# Convert 'updatedOn' column to datetime and extract date
nifty50DailyTable['updatedOn'] = pd.to_datetime(nifty50DailyTable['updatedOn'], format='%d %b %y | %I:%M %p', errors='coerce')

# Check if there are any invalid or missing date values
if pd.isna(nifty50DailyTable['updatedOn']).any():
    print("There are invalid or missing date values in the 'updatedOn' column.")
else:
    # Extract date from 'updatedOn' column and convert the column to datetime
    nifty50DailyTable['updatedOn'] = pd.to_datetime(nifty50DailyTable['updatedOn'].dt.date)

if 'totalTradedValueCr' not in nifty50DailyTable.columns:
   # Assuming nifty50DailyTable is your DataFrame
    nifty50DailyTable['totalTradedValueCr'] = pd.to_numeric(nifty50DailyTable['totalTradedValue'].str.replace(',', '').str.replace(' Cr.', '', regex=True), errors='coerce')  # Convert to numeric and handle 'Cr.'
    nifty50DailyTable['totalTradedQuantityLakh'] = pd.to_numeric(nifty50DailyTable['totalTradedQuantity'].str.replace(',', '').str.replace(' Lakh', '', regex=True), errors='coerce')  # Convert to numeric and handle 'Lakh'
    nifty50DailyTable['twoWeekAvgQuantityLakh'] = pd.to_numeric(nifty50DailyTable['twoWeekAvgQuantity'].str.replace(',', '').str.replace(' Lakh', '', regex=True), errors='coerce')  # Convert to numeric and handle 'Lakh'
    nifty50DailyTable['marketCapFullCr'] = pd.to_numeric(nifty50DailyTable['marketCapFull'].str.replace(',', '').str.replace(' Cr.', '', regex=True), errors='coerce')  # Convert to numeric and handle 'Cr.'
    nifty50DailyTable['marketCapFreeFloatCr'] = pd.to_numeric(nifty50DailyTable['marketCapFreeFloat'].str.replace(',', '').str.replace(' Cr.', '', regex=True), errors='coerce')  # Convert to numeric and handle 'Cr.'

    # Drop original columns
    nifty50DailyTable.drop(['totalTradedValue', 'totalTradedQuantity','twoWeekAvgQuantity', 'marketCapFull', 'marketCapFreeFloat'], axis=1, inplace=True)

nifty50DailyTable.head(n=2)

Unnamed: 0,companyName,currentValue,change,pChange,updatedOn,securityID,scripCode,sharegroup,faceValue,industry,previousClose,previousOpen,dayHigh,dayLow,fiftytwoweekHigh,fiftytwoweekLow,weightedAvgPrice,totalTradedValueCr,totalTradedQuantityLakh,twoWeekAvgQuantityLakh,marketCapFullCr,marketCapFreeFloatCr
0,Bajaj Finance Limited,7126.15,104.3,1.49,2023-11-28,BAJFINANCE,500034,A / S&P BSE SENSEX,2.0,Financial Services,7021.85,7022.2,7133.0,7022.2,8190.0,5487.25,7081.82,24.93,0.35,0.47,440412.52,193781.51
1,CIPLA LTD.,1192.05,-6.8,-0.57,2023-11-28,CIPLA,500087,A / S&P BSE 100,2.0,Healthcare,1198.85,1198.85,1202.9,1188.35,1283.0,852.0,1195.81,7.52,0.63,0.68,96237.23,63516.57


#### Loading data to Microsoft SQL Server Database Using pyodbc connection and Sqlalchemy Engine

In [21]:
import pyodbc ##for SQL Queries
from sqlalchemy import create_engine, inspect ##prebuilt toolkit to work with SQL database
from sqlalchemy.exc import SQLAlchemyError
import sqlalchemy as sa

server = 'localhost'
database = 'nifty50'
username = 'sa'
password = 'jangid6'
driver = 'ODBC Driver 17 for SQL Server'

# Azure SQL Database connection string
conn_str = f'DRIVER={driver};SERVER={server};DATABASE={database};UID={username};PWD={password}'

# Create an SQLAlchemy engine
engine = create_engine(f"mssql+pyodbc://{username}:{password}@{server}/{database}?driver={driver}")

def create_connection(conn_str):
    conn = pyodbc.connect(conn_str)
    cursor = conn.cursor()
    return conn, cursor

try:
    # Try to connect to the SQL Server using the engine
    connection = engine.connect()
    print("Connection successful!")
    connection.close()
    conn,  cursor = create_connection(conn_str)
    
    inspector = inspect(engine)
    nifty50_table_name = 'nifty50_dailydata'
    if not inspector.has_table(nifty50_table_name):
        nifty50_table_schema = f'''
        CREATE TABLE {nifty50_table_name} (
            companyName NVARCHAR(MAX),
            currentValue FLOAT,
            change FLOAT,
            pChange FLOAT,
            updatedOn DATE,
            securityID NVARCHAR(MAX),
            scripCode NVARCHAR(MAX),
            sharegroup NVARCHAR(MAX),
            faceValue FLOAT,
            industry NVARCHAR(MAX),
            previousClose FLOAT,
            previousOpen FLOAT,
            dayHigh FLOAT,
            dayLow FLOAT,
            fiftytwoweekHigh FLOAT,
            fiftytwoweekLow FLOAT,
            weightedAvgPrice FLOAT,
            totalTradedQuantityLakh FLOAT,
            totalTradedValueCr FLOAT,
            twoWeekAvgQuantityLakh FLOAT,
            marketCapFullCr FLOAT,
            marketCapFreeFloatCr FLOAT
        );
        '''
        # Execute the schema to create the table
        cursor.execute(nifty50_table_schema)
        conn.commit()
        conn.close()

    with engine.begin() as engineConn:
        sql_max_updatedOn = pd.read_sql_query(sa.text(f'SELECT MAX(updatedOn) FROM {nifty50_table_name}'), engineConn).iloc[0, 0]
        df_max_updatedOn = nifty50DailyTable['updatedOn'].max()
        if (pd.isnull(sql_max_updatedOn)) and (not pd.isnull(df_max_updatedOn)):
            nifty50DailyTable.to_sql(nifty50_table_name, engine, index=False, if_exists='append', method='multi')
            print("Daily Data didn't exist, but now inserted successfully.")
        else:
            if (df_max_updatedOn > pd.Timestamp(sql_max_updatedOn)):
                nifty50DailyTable.to_sql(nifty50_table_name, engine, index=False, if_exists='append', method='multi')
                print("Data appended successfully.")
            else:
                print("No new data to append.")
    
    company_table_name = 'nifty50_companydata'
    if not inspector.has_table(company_table_name):
        # Define the table schema based on the 'Equity' DataFrame columns
        company_table_schema = f'''
        CREATE TABLE {company_table_name} (
            securityCode NVARCHAR(MAX),
            issuerName NVARCHAR(MAX),
            securityId NVARCHAR(MAX),
            securityName NVARCHAR(MAX),
            status NVARCHAR(MAX),
            CompanyGroup NVARCHAR(MAX),
            faceValue FLOAT,
            isinNo NVARCHAR(MAX),
            industry NVARCHAR(MAX),
            instrument NVARCHAR(MAX),
            sectorName NVARCHAR(MAX),
            industryNewName NVARCHAR(MAX),
            igroupName NVARCHAR(MAX),
            iSubgroupName NVARCHAR(MAX)
        );
        '''

        # Execute the schema to create the 'company' table
        conn , cursor = create_connection(conn_str)
        cursor.execute(company_table_schema)
        # Commit the changes and close the connection
        conn.commit()
        conn.close()
        nifty50_SqDF.to_sql(company_table_name, engine, index=False, if_exists='append', method='multi')
    else:
        print("company Table already exist, hence skipping")

except SQLAlchemyError as e:
    print(f"Error connecting to SQL Server: {e}")


Connection successful!
Data appended successfully.
company Table already exist, hence skipping
