## Python for Finance 2 - Download every stock

#### Imports

In [1]:
# Provides ways to work with large multidimensional arrays
import numpy as np 
# Allows for further data manipulation and analysis
import pandas as pd
from pandas_datareader import data as web # Reads stock data 
import matplotlib.pyplot as plt # Plotting
import matplotlib.dates as mdates # Styling dates
%matplotlib inline

import datetime as dt # For defining dates
import mplfinance as mpf # Matplotlib finance

import time

#### Holds stocks not downloaded

In [2]:
stocks_not_downloaded = []
missing_stocks = []


#### Saves stock data to CSV

In [3]:
# Function that gets a dataframe by providing a ticker and starting date
def save_to_csv_from_yahoo(folder, ticker, syear, smonth, sday, eyear, emonth, eday):
    # Defines the time periods to use
    start = dt.datetime(syear, smonth, sday)
    end = dt.datetime(eyear, emonth, eday)
    
    try:
        print("Get Data for : ", ticker)
        # Reads data into a dataframe
        df = web.DataReader(ticker, 'yahoo', start, end)['Adj Close']
    
        # Wait 10 seconds
        time.sleep(10)
    
        # Save data to a CSV file
        df.to_csv(folder + ticker + '.csv')
    except Exception as ex:
        stocks_not_downloaded.append(ticker)
        print("Couldn't Get Data for :", ticker)

#### Returns a stock dataframe from a CSV

In [4]:
# Reads a dataframe from the CSV file, changes index to date and returns it
def get_stock_df_from_csv(folder, ticker):
    
    # Try to get the file and if it doesn't exist issue a warning
    try:
        df = pd.read_csv(folder + ticker + '.csv')
    except FileNotFoundError:
        print("File Doesn't Exist")
    else:
        return df

#### Returns a named columns data from a CSV

In [5]:
def get_column_from_csv(file, col_name):
    # Try to get the file and if it doesn't exist issue a warning
    try:
        df = pd.read_csv(file)
    except FileNotFoundError:
        print("File Doesn't Exist")
    else:
        return df[col_name]

#### Test receiving stock tickers

In [6]:
tickers = get_column_from_csv('../data/Wilshire-5000-Stocks.csv', 'Ticker')
tickers

# for x in tickers:
#     print(x, end=", ")

0          A
1         AA
2        AAL
3       AAME
4        AAN
        ... 
3476    ZUMZ
3477     ZUO
3478    ZYNE
3479    ZYXI
3480    ZNGA
Name: Ticker, Length: 3481, dtype: object

#### Get 5 years of data for the 1st 20 stocks

In [8]:
# Folder used to store stock data
folder = "../data/stock-list/"

for x in range(20):
    save_to_csv_from_yahoo(folder, tickers[x], 2017, 1, 1, 2021, 12, 31)
print("Finished")

Get Data for :  A
Get Data for :  AA
Get Data for :  AAL
Get Data for :  AAME
Get Data for :  AAN
Get Data for :  AAOI
Get Data for :  AAON
Get Data for :  AAP
Get Data for :  AAPL
Get Data for :  AAT
Get Data for :  AAWW
Get Data for :  AAXN
Couldn't Get Data for : AAXN
Get Data for :  ABBV
Get Data for :  ABC
Get Data for :  ABCB
Get Data for :  ABEO
Get Data for :  ABG
Get Data for :  ABIO
Get Data for :  ABM
Get Data for :  ABMD
Finished


#### Download stocks in blocks

In [9]:
for x in range(20, 500):
    save_to_csv_from_yahoo(folder, tickers[x], 2017, 1, 1, 2021, 12, 31)
print("Finished")
stocks_not_downloaded

Get Data for :  ABR
Get Data for :  ABT
Get Data for :  ABTX
Get Data for :  AC
Get Data for :  ACA
Get Data for :  ACAD
Get Data for :  ACBI
Get Data for :  ACC
Get Data for :  ACCO
Get Data for :  ACER
Get Data for :  ACGL
Get Data for :  ACHC
Get Data for :  ACHV
Get Data for :  ACIA
Couldn't Get Data for : ACIA
Get Data for :  ACIW
Get Data for :  ACLS
Get Data for :  ACM
Get Data for :  ACMR
Get Data for :  ACN
Get Data for :  ACNB
Get Data for :  ACOR
Get Data for :  ACRE
Get Data for :  ACRS
Get Data for :  ACRX
Get Data for :  ACTG
Get Data for :  ACU
Get Data for :  ACY
Couldn't Get Data for : ACY
Get Data for :  ADBE
Get Data for :  ADC
Get Data for :  ADES
Get Data for :  ADI
Get Data for :  ADM
Get Data for :  ADMA
Get Data for :  ADMP
Get Data for :  ADMS
Couldn't Get Data for : ADMS
Get Data for :  ADNT
Get Data for :  ADP
Get Data for :  ADRO
Couldn't Get Data for : ADRO
Get Data for :  ADS
Couldn't Get Data for : ADS
Get Data for :  ADSK
Get Data for :  ADSW
Couldn't Ge

['AAXN',
 'ACIA',
 'ACY',
 'ADMS',
 'ADRO',
 'ADS',
 'ADSW',
 'AEGN',
 'AFH',
 'AFIN',
 'AHC',
 'AIMT',
 'AKCA',
 'AKER',
 'ALSK',
 'ALTM',
 'ALXN',
 'AMAG',
 'AMRB',
 'ANH',
 'ARA',
 'ARPO',
 'ASFI',
 'ASNA',
 'ATH',
 'AWSM',
 'AXE',
 'BASI',
 'BCEI',
 'BDGE',
 'BMCH',
 'BMTC',
 'BOCH',
 'BOMN',
 'BPFH',
 'BREW',
 'BRKS',
 'BSTC',
 'BXS',
 'CAI']

In [10]:
for x in missing_stocks:
    save_to_csv_from_yahoo(folder, x, 2017, 1, 1, 2021, 12, 31)
print("Finished")
stocks_not_downloaded

Finished


['AAXN',
 'ACIA',
 'ACY',
 'ADMS',
 'ADRO',
 'ADS',
 'ADSW',
 'AEGN',
 'AFH',
 'AFIN',
 'AHC',
 'AIMT',
 'AKCA',
 'AKER',
 'ALSK',
 'ALTM',
 'ALXN',
 'AMAG',
 'AMRB',
 'ANH',
 'ARA',
 'ARPO',
 'ASFI',
 'ASNA',
 'ATH',
 'AWSM',
 'AXE',
 'BASI',
 'BCEI',
 'BDGE',
 'BMCH',
 'BMTC',
 'BOCH',
 'BOMN',
 'BPFH',
 'BREW',
 'BRKS',
 'BSTC',
 'BXS',
 'CAI']

In [11]:
stocks_not_downloaded

['AAXN',
 'ACIA',
 'ACY',
 'ADMS',
 'ADRO',
 'ADS',
 'ADSW',
 'AEGN',
 'AFH',
 'AFIN',
 'AHC',
 'AIMT',
 'AKCA',
 'AKER',
 'ALSK',
 'ALTM',
 'ALXN',
 'AMAG',
 'AMRB',
 'ANH',
 'ARA',
 'ARPO',
 'ASFI',
 'ASNA',
 'ATH',
 'AWSM',
 'AXE',
 'BASI',
 'BCEI',
 'BDGE',
 'BMCH',
 'BMTC',
 'BOCH',
 'BOMN',
 'BPFH',
 'BREW',
 'BRKS',
 'BSTC',
 'BXS',
 'CAI']