In [5]:
# Replace the following with your own User Agent
User_Agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36"

# File containing Name, Symbol data for all stocks you would like to fetch.
stock_symbols_csv_file = "500_Stocks.csv"

# File where raw fetched data for stocks will be stored.
raw_directory = "30y_stock_csvs"

# File where cleaned stocks data will be stored.
clean_directory = "clean_30y_stock_csvs"

# Data for training/testing file:
mother_file = "mother_file.csv"

# Labels for training/testing file:
labels_file = "best_stock_per_day_labels.csv"

# Training window size
days_to_train_on = 30


In [6]:

"""
Retrieve intraday stock data from Yahoo Finance.
"""

import requests
import pandas as pd
import arrow
import datetime
import argparse
import shutil

failed_fetch_file = "failed_fetch.csv"
retry_fetch_file = "retry_fetch.csv"

## Using a data range of 30 years gets a sufficient amount of data per stock for neural network training.
## Function based on: https://gist.github.com/lebedov/f09030b865c4cb142af1#gistcomment-2674318
def get_historical_quote_data(symbol, data_range='30y', data_interval='1d'):
    url = 'https://query1.finance.yahoo.com/v8/finance/chart/{symbol}?range={data_range}&interval={data_interval}'.format(**locals())
    #print(url) # Uncomment this if you would like to check the url used.
    headers = {'User-Agent': User_Agent}
    res = requests.get(url,
                       headers = headers)
    data = res.json()
    body = data['chart']['result'][0]
    dt = datetime.datetime
    #print(body) # Uncomment this if you would like to inspect the response body
    # to check the data was successfully retrieved.
    dt = pd.Series(map(lambda x: arrow.get(x).to('Asia/Calcutta').datetime.replace(tzinfo=None), body['timestamp']), name='Datetime')
    df = pd.DataFrame(body['indicators']['quote'][0], index=dt)
    dg = pd.DataFrame(body['timestamp']) #TODO: Check if this is necessary.
    df = df.loc[:, ('open', 'high', 'low', 'close', 'volume')]
    df.dropna(inplace=True) # Remove NaN rows.
    df.columns = ['OPEN','HIGH','LOW','CLOSE','VOLUME'] # Rename columns in pandas dataframe.
    
    return df

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--retry', default = False)
    args = parser.parse_args()
    if args.retry:
        shutil.copyfile(failed_fetch_file, retry_fetch_file)
        csv_file = retry_fetch_file
    else:
        csv_file = stock_symbols_csv_file
    stocks_to_fetch = pd.read_csv(csv_file)
    not_finished_file = open(failed_fetch_file, "w")
    headers = ",".join(stocks_to_fetch.columns.values)
    #print(headers) # Uncomment this to see what the headers will look like.
    not_finished_file.write(headers + "\n")
    for index, row in stocks_to_fetch.iterrows():
        print(index)
        symbol = row["Symbol"]
        try:
            data = get_historical_quote_data(symbol)
            #print(data) # Uncomment this to see what the stored data will look like.
            data.to_csv(raw_directory + '/' + symbol + '.csv')
        except:
            row_string = ",".join(row.values)
            print("Could not fetch " + row_string)
            not_finished_file.write(row_string + "\n")
    not_finished_file.close()

main()


usage: ipykernel_launcher.py [-h] [--retry RETRY]
ipykernel_launcher.py: error: unrecognized arguments: --ip=127.0.0.1 --stdin=9015 --control=9007 --hb=9006 --Session.signature_scheme="hmac-sha256" --Session.key=b"542d2b27-643d-46cd-bd72-611d62ccb741" --shell=9008 --transport="tcp" --iopub=9016 --f=C:\Users\rohan\AppData\Local\Temp\tmp-20988az222RrXF3qs.json


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
