## Download Symbols
This file downloads all symbols it finds from Yahoo! Finance.  
Requires `download-and-sanitize` to be run beforehand, so that `cnsfails-symbol.csv` exists.  
Code based on [Kaggle](https://www.kaggle.com/jacksoncrow/download-nasdaq-historical-data) by Oleh Onyshchak

In [1]:
import os
import shutil
import datetime as dt
import pandas as pd
from pathlib import Path

def store_csv(df, filename, folder):
    print(f"Storing {filename} to output folder")
    fullpath = os.path.join(folder, filename)
    df.to_csv(fullpath, index=False, sep =',', decimal='.')

def get_or_create_folder(baseDir, folderName):
    path = os.path.join(baseDir, folderName)
    if(not os.path.exists(path)):
        os.mkdir(path)
    return path

# create folders if they don't exist
current_dir = os.getcwd()
raw_folder = get_or_create_folder(current_dir, 'raw')
cns_folder_raw = get_or_create_folder(raw_folder, 'cnsfails')

sanitized_folder = get_or_create_folder(current_dir, 'sanitized')
cns_folder_sanitized = get_or_create_folder(sanitized_folder, 'cnsfails-txt')
csv_yearly = get_or_create_folder(sanitized_folder, 'cnsfails-yearly')

In [4]:
%%time
import yfinance as yf
import contextlib

historic_prices_folder = get_or_create_folder(raw_folder, 'historical-prices')
df_symbols = pd.read_csv(os.path.join(sanitized_folder, f"cnsfails-symbol.csv"), index_col=None, header=0, sep =',', decimal='.')
df_historic_metadata = df_symbols

historic_metadata_path = os.path.join(historic_prices_folder, f"historic-prices-metadata.csv")
if(os.path.exists(historic_metadata_path)):
    old_df_historic_metadata = pd.read_csv(historic_metadata_path, index_col=None, header=0, sep =',', decimal='.')
    old_df_historic_metadata = old_df_historic_metadata[['SYMBOL', 'PRICE DATA']]
    df_historic_metadata = df_historic_metadata.set_index('SYMBOL').join(old_df_historic_metadata.set_index('SYMBOL'), how='left').reset_index()
else:
    df_historic_metadata['PRICE DATA'] = ["Not Processed"] * len(df_historic_metadata.index)



offset = 0
limit = 10000
period = 'max' # valid periods: 1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max

end = min(offset + limit, len(df_historic_metadata.index))
# force silencing of verbose API
#with open(os.devnull, 'w') as devnull:
#    with contextlib.redirect_stdout(devnull):
for i in range(offset, end):
    s = df_historic_metadata['SYMBOL'][i]
    r = df_historic_metadata['PRICE DATA'][i]
    if(r == 'No Data Found'):
        continue
    try:
        file_path = os.path.join(historic_prices_folder, f'SYMBOL-{s}.csv')
        if(os.path.exists(file_path)):
                df_historic_metadata['PRICE DATA'][i] = "Downloaded"
                continue
        data = yf.download(s, period=period)
        if len(data.index) == 0:
            df_historic_metadata['PRICE DATA'][i] = "No Data Found"
            continue
    
        df_historic_metadata['PRICE DATA'][i] = "Downloaded"
        data.to_csv(file_path)
    except Exception as e:
        print(f"Exception for symbol {s}, entry #{i}: {e}")
        df_historic_metadata['PRICE DATA'][i] = f"Exception: {e}"


store_csv(df_historic_metadata, f"historic-prices-metadata.csv", historic_prices_folder)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Storing historic-prices-metadata.csv to output folder
Wall time: 1.82 s


In [None]:
df_symbols_metadata = pd.read_csv("http://www.nasdaqtrader.com/dynamic/SymDir/nasdaqtraded.txt", sep='|')
df_symbols_metadata = df_symbols_metadata.rename(columns={"Symbol": "SYMBOL"})
df_symbols_metadata = df_symbols_metadata.rename(columns={"Security Name": "DESCRIPTION"})
df_symbols_metadata = df_symbols_metadata[['SYMBOL', 'DESCRIPTION', 'ETF']]
df_symbols_metadata.head()