In [33]:
# Package imports

import yfinance as yf
import pandas as pd

In [34]:
# List of NIFTY50 tickers (from 2024)

# nifty_50_tickers = [
#     'ADANIENT', 'ADANIPORTS', 'APOLLOHOSP', 'ASIANPAINT', 'AXISBANK',
#     'BAJAJ-AUTO', 'BAJFINANCE', 'BAJAJFINSV', 'BEL', 'BPCL', 
#     'BHARTIARTL', 'BRITANNIA', 'CIPLA', 'COALINDIA', 'DRREDDY', 
#     'EICHERMOT', 'GRASIM', 'HCLTECH', 'HDFCBANK', 'HDFCLIFE', 
#     'HEROMOTOCO', 'HINDALCO', 'HINDUNILVR', 'ICICIBANK', 'INDUSINDBK', 
#     'INFY', 'JSWSTEEL', 'KOTAKBANK', 'LT', 'M&M', 
#     'MARUTI', 'NTPC', 'NESTLEIND', 'ONGC', 'POWERGRID', 
#     'RELIANCE', 'SBILIFE', 'SHRIRAMFIN', 'SBIN', 'SUNPHARMA', 
#     'TCS', 'TATACONSUM', 'TATAMOTORS', 'TATASTEEL', 'TECHM', 
#     'TITAN', 'TRENT', 'ULTRACEMCO', 'WIPRO', 'BTC-USD', 'ETH-USD'
# ]

nifty_50_tickers = ['BTC-USD', 'ETH-USD']

In [35]:
# Initialising dataframe with business dates to store closing prices for each stock

ALL_CLOSING_PRICES = pd.DataFrame(pd.bdate_range(start='2017-01-10', end='2023-12-31'), columns=['Date'])
ALL_CLOSING_PRICES.set_index('Date', inplace=True)

ALL_CLOSING_PRICES

2017-01-10
2017-01-11
2017-01-12
2017-01-13
2017-01-16
...
2023-12-25
2023-12-26
2023-12-27
2023-12-28
2023-12-29


In [36]:
# Retrieve data for all the tickers from 2017 to 2022

for ticker in nifty_50_tickers:
    if not ticker.endswith('-USD'):
        ticker = ticker + ".NS"
    closing_prices = yf.download(ticker, start='2017-01-01', end='2023-12-31', progress=False)['Close']
    ALL_CLOSING_PRICES[ticker] = closing_prices

In [37]:
ALL_CLOSING_PRICES.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1819 entries, 2017-01-10 to 2023-12-29
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   BTC-USD  1819 non-null   float64
 1   ETH-USD  1602 non-null   float64
dtypes: float64(2)
memory usage: 42.6 KB


In [38]:
# Getting a dictionary for ticker -> non-null value counts

nonnull_values = ALL_CLOSING_PRICES.count(axis=0).to_dict()
print (nonnull_values)
# Getting median number of non-null entries

median_num_of_nonnull_entries = ALL_CLOSING_PRICES.count(axis=0).median()
print (median_num_of_nonnull_entries)

{'BTC-USD': 1819, 'ETH-USD': 1602}
1710.5


In [39]:
# Getting a list of all stocks which dont have the median number of entries

# stocks_to_be_excluded = [ticker for ticker in nonnull_values if nonnull_values[ticker] !=  median_num_of_nonnull_entries]
# print (stocks_to_be_excluded)

In [40]:
# ALL_CLOSING_PRICES = ALL_CLOSING_PRICES.drop(columns=stocks_to_be_excluded)
# ALL_CLOSING_PRICES

In [41]:
# Dropping all rows which have NaN values which is due to trading holidays

ALL_CLOSING_PRICES = ALL_CLOSING_PRICES.dropna()
ALL_CLOSING_PRICES

Unnamed: 0_level_0,BTC-USD,ETH-USD
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-11-09,7143.580078,320.884003
2017-11-10,6618.140137,299.252991
2017-11-13,6559.490234,316.716003
2017-11-14,6635.750000,337.631012
2017-11-15,7315.540039,333.356995
...,...,...
2023-12-25,43613.140625,2272.561768
2023-12-26,42520.402344,2231.465332
2023-12-27,43442.855469,2378.739990
2023-12-28,42627.855469,2347.566162


In [42]:
# Save to file

ALL_CLOSING_PRICES.to_csv('./data/closing_prices.csv')

In [43]:
ALL_CLOSING_PRICES

Unnamed: 0_level_0,BTC-USD,ETH-USD
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-11-09,7143.580078,320.884003
2017-11-10,6618.140137,299.252991
2017-11-13,6559.490234,316.716003
2017-11-14,6635.750000,337.631012
2017-11-15,7315.540039,333.356995
...,...,...
2023-12-25,43613.140625,2272.561768
2023-12-26,42520.402344,2231.465332
2023-12-27,43442.855469,2378.739990
2023-12-28,42627.855469,2347.566162
