## Configs

In [7]:
offset = 0
limit = 3000
period = '2y' # valid periods: 1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max

## Download all NASDAQ traded symbols

In [2]:
import pandas as pd

data = pd.read_csv("http://www.nasdaqtrader.com/dynamic/SymDir/nasdaqtraded.txt", sep='|')
data_clean = data[data['Test Issue'] == 'N']
symbols = data_clean['NASDAQ Symbol'].tolist()
print('total number of symbols traded = {}'.format(len(symbols)))

total number of symbols traded = 8867


## OR Download SP500 Symbols

In [8]:
# Read top 500 companies from the provided CSV file
import pandas as pd

top_500 = pd.read_csv('constituents.csv') #This .csv contains a list of SP500 companies sourced from https://github.com/datasets/s-and-p-500-companies/blob/main/data/constituents
top_500_symbols = top_500['Symbol'].tolist()
print('Total number of symbols in the provided list = {}'.format(len(top_500_symbols)))


Total number of symbols in the provided list = 503


## Download Historic data

In [2]:
! pip install yfinance > /dev/null 2>&1
! mkdir hist

mkdir: hist: File exists


In [10]:
import yfinance as yf
import os, contextlib

## All

In [5]:
%%time

limit = limit if limit else len(symbols)
end = min(offset + limit, len(symbols))
is_valid = [False] * len(symbols)
# force silencing of verbose API
with open(os.devnull, 'w') as devnull:
    with contextlib.redirect_stdout(devnull):
        for i in range(offset, end):
            s = symbols[i]
            data = yf.download(s, period=period)
            if len(data.index) == 0:
                continue
        
            is_valid[i] = True
            data.to_csv('hist/{}.csv'.format(s))

print('Total number of valid symbols downloaded = {}'.format(sum(is_valid)))

Total number of valid symbols downloaded = 2733
CPU times: user 9min 34s, sys: 10.8 s, total: 9min 44s
Wall time: 20min 3s


## SP500

In [11]:
%%time

limit = limit if limit else len(top_500_symbols)
end = min(offset + limit, len(top_500_symbols))
is_valid = [False] * len(top_500_symbols)
# force silencing of verbose API
with open(os.devnull, 'w') as devnull:
    with contextlib.redirect_stdout(devnull):
        for i in range(offset, end):
            s = top_500_symbols[i]
            data = yf.download(s, period=period)
            if len(data.index) == 0:
                continue

            is_valid[i] = True
            data.to_csv('hist/{}.csv'.format(s))

print('Total number of valid symbols downloaded = {}'.format(sum(is_valid)))

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

Total number of valid symbols downloaded = 498
CPU times: user 6.28 s, sys: 945 ms, total: 7.22 s
Wall time: 58.3 s





In [6]:
valid_data = data_clean[is_valid]
valid_data.to_csv('symbols_valid_meta.csv', index=False)

## Separating ETFs and Stocks

In [7]:
!mkdir stocks
!mkdir etfs

In [8]:
etfs = valid_data[valid_data['ETF'] == 'Y']['NASDAQ Symbol'].tolist()
stocks = valid_data[valid_data['ETF'] == 'N']['NASDAQ Symbol'].tolist()

In [9]:
import shutil
from os.path import isfile, join

def move_symbols(symbols, dest):
    for s in symbols:
        filename = '{}.csv'.format(s)
        shutil.move(join('hist', filename), join(dest, filename))
        
move_symbols(etfs, "etfs")
move_symbols(stocks, "stocks")

In [10]:
! rmdir hist