In [8]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
import datetime
import ssl
import certifi
import urllib

In [11]:
import urllib.request


data = [{'symbol': 'MIDCAPIETF'}, {'symbol': '0P0000XW8F'}]

symbols = [(i['symbol'], i['symbol'] + '.NS') for i in data]

symbol_dfs = []
start_time = int(datetime.datetime(2024,1,1,0,0,0).timestamp())
end_time = int(datetime.datetime.now().timestamp())

# Create an SSL context using certifi
context = ssl.create_default_context(cafile=certifi.where())

for symbol, name in symbols:
    url = f'https://query1.finance.yahoo.com/v7/finance/download/{name}?period1={start_time}&period2={end_time}&interval=1d&events=history&includeAdjustedClose=true'
    
    print(url)

    # Open the URL with the SSL context
    with urllib.request.urlopen(url, context=context) as response:
        d = pd.read_csv(response, usecols=['Date', 'Adj Close'], parse_dates=['Date'], index_col='Date')
        d.index = d.index.date
        symbol_dfs.append((symbol, d))

nifty50_df = pd.DataFrame()

for df in symbol_dfs:
    nifty50_df[df[0]] = df[1]['Adj Close']

nifty50_df.index = pd.to_datetime(nifty50_df.index)

https://query1.finance.yahoo.com/v7/finance/download/MIDCAPIETF.NS?period1=1704047400&period2=1727379504&interval=1d&events=history&includeAdjustedClose=true


HTTPError: HTTP Error 401: Unauthorized

In the next step we are going to get the sector and the industry of the companies. This will get used at a later stage to filter the master dataset.

In [156]:
import yfinance as yf

company_sector_industry = pd.DataFrame(columns=['Symbol', 'Sector', 'Industry'])

for symbol, yfin_name in symbols:
    stock_info = yf.Ticker(yfin_name).info

    temp = pd.DataFrame({
        'Symbol': symbol,
        'yfin_name': yfin_name,
        'Sector': stock_info.get('sector', 'N/A'),
        'Industry': stock_info.get('industry', 'N/A')
    }, index=[0])

    company_sector_industry = pd.concat([company_sector_industry, temp], ignore_index=True)

company_sector_industry.set_index('Symbol', inplace=True)
    

In [157]:
company_sector_industry

Unnamed: 0_level_0,Sector,Industry,yfin_name
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
BAJAJ-AUTO,Consumer Cyclical,Auto Manufacturers,BAJAJ-AUTO.NS
COALINDIA,Energy,Thermal Coal,COALINDIA.NS
BHARTIARTL,Communication Services,Telecom Services,BHARTIARTL.NS
TATAMOTORS,Consumer Cyclical,Auto Manufacturers,TATAMOTORS.NS
SUNPHARMA,Healthcare,Drug Manufacturers - Specialty & Generic,SUNPHARMA.NS
ICICIBANK,Financial Services,Banks - Regional,ICICIBANK.NS
BAJAJFINSV,Financial Services,Financial Conglomerates,BAJAJFINSV.NS
M&M,Consumer Cyclical,Auto Manufacturers,M&M.NS
HEROMOTOCO,Consumer Cyclical,Auto Manufacturers,HEROMOTOCO.NS
BPCL,Energy,Oil & Gas Refining & Marketing,BPCL.NS


In [158]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
scaled_nifty50_data = scaler.fit_transform(nifty50_df)
scaled_df = pd.DataFrame(scaled_nifty50_data, index=nifty50_df.index, columns=nifty50_df.columns)

In [159]:
scaled_df

Unnamed: 0,BAJAJ-AUTO,COALINDIA,BHARTIARTL,TATAMOTORS,SUNPHARMA,ICICIBANK,BAJAJFINSV,M&M,HEROMOTOCO,BPCL,...,INFY,HCLTECH,TATACONSUM,BRITANNIA,DIVISLAB,TITAN,ASIANPAINT,ONGC,WIPRO,LTIM
2024-01-01,0.009422,0.037098,0.000000,0.023829,0.000000,0.072630,0.854140,0.079096,0.090297,0.002902,...,0.306218,0.376987,0.064644,0.510400,0.331341,0.729333,1.000000,0.000000,0.287593,0.991855
2024-01-02,0.000000,0.099023,0.017228,0.007683,0.067666,0.009240,0.896182,0.044633,0.074327,0.018135,...,0.271580,0.341886,0.155628,0.486153,0.400324,0.759471,0.993076,0.012022,0.229369,0.921762
2024-01-03,0.084018,0.052793,0.044115,0.000000,0.072183,0.015894,0.859502,0.043938,0.022574,0.035544,...,0.180845,0.263279,0.089645,0.458694,0.400687,0.752044,0.967201,0.020509,0.131962,0.812996
2024-01-04,0.067199,0.053935,0.069409,0.037241,0.099193,0.027352,0.963749,0.034024,0.000000,0.005078,...,0.227234,0.219279,0.298934,0.542912,0.407670,0.790108,0.976458,0.065770,0.106328,0.756725
2024-01-05,0.081927,0.051081,0.066820,0.024741,0.075195,0.051562,0.996139,0.034243,0.022811,0.006710,...,0.267799,0.251043,0.285530,0.473227,0.391104,0.779896,0.940889,0.078500,0.137088,0.805030
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-08-19,0.832470,0.901749,0.925698,0.806298,0.946924,0.762421,0.320800,0.870854,0.688391,0.937115,...,0.968249,0.979014,0.608060,0.892174,0.804525,0.442294,0.577208,0.953718,0.607706,0.673692
2024-08-20,0.834854,0.916909,0.884642,0.804208,0.982150,0.775655,0.539728,0.875387,0.717969,0.979038,...,0.983457,1.000000,0.575879,0.918601,0.836917,0.456695,0.616800,0.916405,0.643670,0.692706
2024-08-21,0.853905,0.961225,0.913281,0.799767,0.978922,0.758507,0.620646,0.873987,0.739446,0.992514,...,0.984484,0.975834,0.608839,0.975385,0.944980,0.579217,0.687963,0.899184,0.656147,0.696095
2024-08-22,0.870294,0.941691,0.959144,0.756011,0.952336,0.819087,0.641036,0.847121,0.763561,0.984279,...,1.000000,0.973036,0.755468,0.974948,0.951458,0.642268,0.739551,0.875146,0.602202,0.690667


In [160]:
cols = [ column for column in nifty50_df.columns if company_sector_industry.loc[column].Sector == 'Consumer Defensive' ]