# Download stock list

In [8]:
pip install datapackage yfinance lxml pandas_datareader html5lib

Collecting html5lib
[?25l  Downloading https://files.pythonhosted.org/packages/a5/62/bbd2be0e7943ec8504b517e62bab011b4946e1258842bc159e5dfde15b96/html5lib-1.0.1-py2.py3-none-any.whl (117kB)
[K     |████████████████████████████████| 122kB 478kB/s eta 0:00:01
Installing collected packages: html5lib
Successfully installed html5lib-1.0.1
Note: you may need to restart the kernel to use updated packages.


In [2]:
from datapackage import Package
package = Package('https://datahub.io/core/nyse-other-listings/datapackage.json')
package.resource_names

['validation_report',
 'nyse-listed_csv',
 'other-listed_csv',
 'nyse-listed_json',
 'other-listed_json',
 'nyse-other-listings_zip',
 'nyse-listed_csv_preview',
 'other-listed_csv_preview',
 'nyse-listed',
 'other-listed']

In [9]:
import pandas as pd
from os import path
import yfinance as yf

file_name = 'data/nyse.csv'

# If we don't have data locally, download from remote source
if not path.exists(file_name):
    # filter data file
    filter_fn = lambda resource: resource.name == 'nyse-listed_csv'
    resource = list(filter(filter_fn, package.resources))[0]
    df = pd.DataFrame.from_dict(resource.read())
    df.columns=["Symbol", "Description"]
    df.to_csv(file_name, index=False)
    
stocks_df = pd.read_csv(file_name)
stocks_df.index = stocks_df['Symbol'].values
print(stocks_df.head())
print(stocks_df.tail())
print(stocks_df.shape)


     Symbol                                        Description
A         A            Agilent Technologies, Inc. Common Stock
AA       AA                            Alcoa Inc. Common Stock
AA$B   AA$B  Alcoa Inc. Depository Shares Representing 1/10...
AAC     AAC                    AAC Holdings, Inc. Common Stock
AAN     AAN                         Aaron's, Inc. Common Stock
     Symbol                                        Description
ZPIN   ZPIN  Zhaopin Limited American Depositary Shares, ea...
ZQK     ZQK                      Quiksilver, Inc. Common Stock
ZTR     ZTR   Zweig Total Return Fund, Inc. (The) Common Stock
ZTS     ZTS                   Zoetis Inc. Class A Common Stock
ZX       ZX  China Zenix Auto International Limited America...
(3298, 2)


In [14]:
import yfinance as yf
import numpy as np
import sys

file_name_with_vol = file_name.replace(".csv", "-volume.csv")

# If we don't have data locally, download from remote source
if not path.exists(file_name_with_vol):

    tickers = []
    batch_counter = 0
    batch_size = 100
    total_symbols = stocks_df.shape[0]
    stocks_vol_df = pd.DataFrame(stocks_df)
    
    for stock in stocks_df['Symbol'].values:
        # We are going to download 100 ticker information at a time
        if len(tickers) >= 100 or (batch_counter * batch_size + len(tickers)) >= total_symbols:
            
            batch_counter = batch_counter + 1
            
            # Download information
            tickers_data = yf.Tickers(' '.join(tickers)).tickers
            print('Batch ', batch_counter, 'with', len(tickers_data), ' tickers')

            # Store last known volume
            for ticker in tickers:
                try:
                    print('Processing yF:', getattr(tickers_data, ticker), ', Ticker name:', ticker)
                    stocks_vol_df.loc[ticker, 'Last Volume'] = getattr(tickers_data, ticker).info['averageVolume']
                except:
                    # It is possible that some of th tickets do not have the information we are looking for
                    # and they may throw exception while processing
                    print('Filed to process', ticker, sys.exc_info())

            print('Downloaded volume info for', tickers)

            # clear the list for next batch
            tickers = []

        tickers.append(stock)

    stocks_vol_df.to_csv(file_name_with_vol, index=False)

stocks_vol_df = pd.read_csv(file_name_with_vol).dropna()
stocks_vol_df.index = stocks_vol_df['Symbol'].values
print(stocks_vol_df.head())
print(stocks_vol_df.shape)


    Symbol                                        Description  Last Volume
A        A            Agilent Technologies, Inc. Common Stock    1712146.0
AA      AA                            Alcoa Inc. Common Stock    4695503.0
AAP    AAP  Advance Auto Parts Inc Advance Auto Parts Inc W/I    1065904.0
AAT    AAT           American Assets Trust, Inc. Common Stock     275667.0
ABB    ABB                               ABB Ltd Common Stock    1777283.0
(1511, 3)


In [16]:
# We are going to use stocks with last known volume more than 1,000,000
stocks_vol_df = stocks_vol_df[stocks_vol_df['Last Volume'] >= 1000000]
print(stocks_vol_df.head())
print(stocks_vol_df.shape)

     Symbol                                        Description  Last Volume
A         A            Agilent Technologies, Inc. Common Stock    1712146.0
AA       AA                            Alcoa Inc. Common Stock    4695503.0
AAP     AAP  Advance Auto Parts Inc Advance Auto Parts Inc W/I    1065904.0
ABB     ABB                               ABB Ltd Common Stock    1777283.0
ABBV   ABBV                           AbbVie Inc. Common Stock    8004485.0
(455, 3)


In [25]:
start_date = '2010-11-01'
end_date = '2019-12-31'
data_folder = 'data/'

for ticker in stocks_vol_df.index.values:
    f_name = data_folder + ticker + '.csv'
    if not path.exists(f_name):
        data = yf.download(ticker, start=start_date, end=end_date)
        data.to_csv(f_name)
    else:
        print('Data exists for:', ticker)

Data exists for: A
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[**************

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

KeyboardInterrupt: 