## Python for Finance 6 - Best Stocks to Buy Now

In [1]:
# Provides ways to work with large multidimensional arrays
import numpy as np 
# Allows for further data manipulation and analysis
import pandas as pd
from pandas_datareader import data as web # Reads stock data 
import matplotlib.pyplot as plt # Plotting
import matplotlib.dates as mdates # Styling dates
%matplotlib inline

import datetime as dt # For defining dates
import mplfinance as mpf # Matplotlib finance

import time

# Used to get data from a directory
import os
from os import listdir
from os.path import isfile, join

#Statsmodels is a great library we can use to run regressions.
import statsmodels.api as sm
# Seaborn extends the capabilities of Matplotlib
import seaborn as sns
# Used for calculating regressions
from statsmodels.tsa.ar_model import AutoReg, ar_select_order

#### Dates & Other Constants

In [2]:
# Define path to files
PATH = "../data/stock-list/"

# Start date defaults
ST_YEAR = 2017
ST_MONTH = 1
ST_DAY = 3
ST_DATE_STR = f"{ST_YEAR}-{ST_MONTH}-{ST_DAY}"
ST_DATE_DATETIME = dt.datetime(ST_YEAR, ST_MONTH, ST_DAY)

# End date defaults
EN_YEAR = 2021
EN_MONTH = 8
EN_DAY = 19
EN_DATE_STR = f"{EN_YEAR}-{EN_MONTH}-{EN_DAY}"
EN_DATE_DATETIME = dt.datetime(EN_YEAR, EN_MONTH, EN_DAY)

#### Hold Stocks not Downloaded

In [3]:
stocks_not_downloaded = []
missing_stocks = []

#### Get Stock File Names in a List

In [4]:
files = [x for x in listdir(PATH) if isfile(join(PATH, x))]
tickers = [os.path.splitext(x)[0] for x in files]
tickers

tickers.sort()
len(tickers)

2888

#### Save Stock Data to CSV

In [5]:
# Function that gets a dataframe by providing a ticker and starting date
def save_to_csv_from_yahoo(ticker, syear, smonth, sday, eyear, emonth, eday):
    # Defines the time periods to use
    start = dt.datetime(syear, smonth, sday)
    end = dt.datetime(eyear, emonth, eday)
    
    try:
        print("Get Data for : ", ticker)
        # Reads data into a dataframe
        df = web.DataReader(ticker, 'yahoo', start, end)['Adj Close']
    
        # Wait 10 seconds
        time.sleep(5)
    
        # Save data to a CSV file Windows
        df.to_csv('../data/update/' + ticker + '.csv')
        
    except Exception as ex:
        stocks_not_downloaded.append(ticker)
        print("Couldn't Get Data for :", ticker)

In [15]:
for x in range(0, 500):
    save_to_csv_from_yahoo(tickers[x], 2021, 8, 20, 2021, 9, 10)
print("Finished")
stocks_not_downloaded

Get Data for :  A
Get Data for :  AA
Get Data for :  AAL
Get Data for :  AAME
Get Data for :  AAOI
Get Data for :  AAON
Get Data for :  AAP
Get Data for :  AAPL
Get Data for :  AAT
Get Data for :  AAWW
Get Data for :  ABBV
Get Data for :  ABC
Get Data for :  ABCB
Get Data for :  ABEO
Get Data for :  ABG
Get Data for :  ABIO
Get Data for :  ABM
Get Data for :  ABMD
Get Data for :  ABR
Get Data for :  ABT
Get Data for :  ABTX
Get Data for :  AC
Get Data for :  ACA
Get Data for :  ACAD
Get Data for :  ACBI
Get Data for :  ACC
Get Data for :  ACCO
Get Data for :  ACER
Get Data for :  ACGL
Get Data for :  ACHC
Get Data for :  ACHV
Get Data for :  ACIW
Get Data for :  ACLS
Get Data for :  ACM
Get Data for :  ACMR
Get Data for :  ACN
Get Data for :  ACNB
Get Data for :  ACOR
Get Data for :  ACRE
Get Data for :  ACRS
Get Data for :  ACRX
Get Data for :  ACTG
Get Data for :  ACU
Get Data for :  ACY
Get Data for :  ADBE
Get Data for :  ADC
Get Data for :  ADES
Get Data for :  ADI
Get Data for : 

['A',
 'AA',
 'AAL',
 'AAME',
 'AAOI',
 'AAON',
 'AAP',
 'AAPL',
 'AAT',
 'AAWW',
 'ABBV',
 'ABC',
 'ABCB',
 'ABEO',
 'ABG',
 'ABIO',
 'ABM',
 'ABMD',
 'ABR',
 'ABT',
 'ABTX',
 'AC',
 'ACA',
 'ACAD',
 'ACBI',
 'ACC',
 'ACCO',
 'ACER',
 'ACGL',
 'ACHC',
 'ACHV',
 'ACIW',
 'ACLS',
 'ACM',
 'ACMR',
 'ACN',
 'ACNB',
 'ACOR']

In [16]:
for x in range(501, 1000):
    save_to_csv_from_yahoo(tickers[x], 2021, 8, 20, 2021, 9, 10)
print("Finished")
stocks_not_downloaded

Get Data for :  CDNS
Get Data for :  CDR
Get Data for :  CDTX
Get Data for :  CDW
Get Data for :  CDXC
Get Data for :  CDXS
Get Data for :  CDZI
Get Data for :  CE
Get Data for :  CECE
Get Data for :  CEI
Get Data for :  CEIX
Get Data for :  CELC
Get Data for :  CELH
Get Data for :  CEMI
Get Data for :  CENT
Get Data for :  CENTA
Get Data for :  CENX
Get Data for :  CERN
Get Data for :  CERS
Get Data for :  CETX
Get Data for :  CEVA
Get Data for :  CF
Get Data for :  CFBK
Get Data for :  CFFI
Get Data for :  CFFN
Get Data for :  CFG
Get Data for :  CFMS
Get Data for :  CFR
Get Data for :  CGNX
Get Data for :  CHCI
Get Data for :  CHCO
Get Data for :  CHCT
Get Data for :  CHD
Get Data for :  CHDN
Get Data for :  CHE
Get Data for :  CHEF
Get Data for :  CHGG
Get Data for :  CHH
Get Data for :  CHMA
Couldn't Get Data for : CHMA
Get Data for :  CHMG
Get Data for :  CHMI
Get Data for :  CHRS
Get Data for :  CHRW
Get Data for :  CHS
Get Data for :  CHTR
Get Data for :  CHUY
Get Data for :  C

['A',
 'AA',
 'AAL',
 'AAME',
 'AAOI',
 'AAON',
 'AAP',
 'AAPL',
 'AAT',
 'AAWW',
 'ABBV',
 'ABC',
 'ABCB',
 'ABEO',
 'ABG',
 'ABIO',
 'ABM',
 'ABMD',
 'ABR',
 'ABT',
 'ABTX',
 'AC',
 'ACA',
 'ACAD',
 'ACBI',
 'ACC',
 'ACCO',
 'ACER',
 'ACGL',
 'ACHC',
 'ACHV',
 'ACIW',
 'ACLS',
 'ACM',
 'ACMR',
 'ACN',
 'ACNB',
 'ACOR',
 'CHMA']

In [18]:
for x in range(1001, 1500):
    save_to_csv_from_yahoo(tickers[x], 2021, 8, 20, 2021, 9, 10)
print("Finished")
stocks_not_downloaded

Get Data for :  FIXX
Get Data for :  FIZZ
Get Data for :  FL
Get Data for :  FLIC
Get Data for :  FLL
Get Data for :  FLMN
Get Data for :  FLNT
Get Data for :  FLO
Get Data for :  FLR
Get Data for :  FLS
Get Data for :  FLT
Get Data for :  FLWS
Get Data for :  FLXS
Get Data for :  FMAO
Get Data for :  FMBH
Get Data for :  FMC
Get Data for :  FMNB
Get Data for :  FN
Get Data for :  FNB
Get Data for :  FNCB
Get Data for :  FND
Get Data for :  FNF
Get Data for :  FNHC
Get Data for :  FNKO
Get Data for :  FNLC
Get Data for :  FNWB
Get Data for :  FOCS
Get Data for :  FOLD
Get Data for :  FONR
Get Data for :  FOR
Get Data for :  FORD
Get Data for :  FORM
Get Data for :  FORR
Get Data for :  FOSL
Get Data for :  FOX
Get Data for :  FOXA
Get Data for :  FOXF
Get Data for :  FPI
Get Data for :  FR
Get Data for :  FRBA
Get Data for :  FRBK
Get Data for :  FRC
Get Data for :  FRD
Get Data for :  FRGI
Get Data for :  FRME
Get Data for :  FRPH
Get Data for :  FRPT
Get Data for :  FRT
Get Data for 

['A',
 'AA',
 'AAL',
 'AAME',
 'AAOI',
 'AAON',
 'AAP',
 'AAPL',
 'AAT',
 'AAWW',
 'ABBV',
 'ABC',
 'ABCB',
 'ABEO',
 'ABG',
 'ABIO',
 'ABM',
 'ABMD',
 'ABR',
 'ABT',
 'ABTX',
 'AC',
 'ACA',
 'ACAD',
 'ACBI',
 'ACC',
 'ACCO',
 'ACER',
 'ACGL',
 'ACHC',
 'ACHV',
 'ACIW',
 'ACLS',
 'ACM',
 'ACMR',
 'ACN',
 'ACNB',
 'ACOR',
 'CHMA',
 'GLPI']

In [6]:
for x in range(1501, 2000):
    save_to_csv_from_yahoo(tickers[x], 2021, 8, 20, 2021, 9, 10)
print("Finished")
stocks_not_downloaded

Get Data for :  LCI
Get Data for :  LCII
Get Data for :  LCNB
Get Data for :  LCUT
Get Data for :  LDOS
Get Data for :  LE
Get Data for :  LEA
Get Data for :  LECO
Get Data for :  LEE
Get Data for :  LEG
Get Data for :  LEGH
Get Data for :  LEN
Get Data for :  LEU
Get Data for :  LEVI
Get Data for :  LFUS
Get Data for :  LFVN
Get Data for :  LGIH
Get Data for :  LGL
Get Data for :  LGND
Get Data for :  LH
Get Data for :  LHCG
Get Data for :  LIFE
Get Data for :  LII
Get Data for :  LILA
Get Data for :  LILAK
Get Data for :  LIN
Get Data for :  LINC
Get Data for :  LIND
Get Data for :  LIQT
Get Data for :  LITE
Get Data for :  LIVE
Get Data for :  LIVN
Get Data for :  LJPC
Get Data for :  LKFN
Get Data for :  LKQ
Get Data for :  LL
Get Data for :  LLNW
Get Data for :  LLY
Get Data for :  LMAT
Get Data for :  LMFA
Get Data for :  LMNR
Get Data for :  LMT
Get Data for :  LNC
Get Data for :  LNDC
Get Data for :  LNG
Get Data for :  LNN
Get Data for :  LNT
Get Data for :  LNTH
Get Data for 

['LSCC', 'LSTR', 'MOBL', 'MPC', 'MRCY', 'NURO', 'NUS', 'NUVA', 'PEG']

In [7]:
for x in range(2001, 2888):
    save_to_csv_from_yahoo(tickers[x], 2021, 8, 20, 2021, 9, 10)
print("Finished")
stocks_not_downloaded

Get Data for :  PFE
Get Data for :  PFG
Get Data for :  PFGC
Get Data for :  PFIE
Get Data for :  PFIN
Get Data for :  PFIS
Get Data for :  PFMT
Get Data for :  PFS
Get Data for :  PFSW
Get Data for :  PG
Get Data for :  PGC
Get Data for :  PGR
Get Data for :  PGRE
Get Data for :  PGTI
Get Data for :  PH
Get Data for :  PHAS
Get Data for :  PHIO
Get Data for :  PHM
Get Data for :  PHX
Get Data for :  PI
Get Data for :  PII
Get Data for :  PINC
Get Data for :  PINS
Get Data for :  PJT
Get Data for :  PK
Get Data for :  PKBK
Get Data for :  PKE
Get Data for :  PKG
Get Data for :  PKI
Get Data for :  PKOH
Get Data for :  PLAB
Get Data for :  PLAN
Get Data for :  PLAY
Get Data for :  PLBC
Get Data for :  PLCE
Get Data for :  PLD
Get Data for :  PLNT
Get Data for :  PLOW
Get Data for :  PLPC
Get Data for :  PLSE
Get Data for :  PLUG
Get Data for :  PLUS
Get Data for :  PLXP
Get Data for :  PLXS
Get Data for :  PLYA
Get Data for :  PLYM
Get Data for :  PM
Get Data for :  PMD
Get Data for :  

['LSCC',
 'LSTR',
 'MOBL',
 'MPC',
 'MRCY',
 'NURO',
 'NUS',
 'NUVA',
 'PEG',
 'TCI',
 'TCON',
 'TCS',
 'TDS',
 'TDW',
 'UONE',
 'UONEK',
 'WTI']

In [8]:
for x in missing_stocks:
    save_to_csv_from_yahoo(x, 2021, 8, 20, 2021, 9, 10)
print("Finished")
stocks_not_downloaded

Finished


['LSCC',
 'LSTR',
 'MOBL',
 'MPC',
 'MRCY',
 'NURO',
 'NUS',
 'NUVA',
 'PEG',
 'TCI',
 'TCON',
 'TCS',
 'TDS',
 'TDW',
 'UONE',
 'UONEK',
 'WTI']

#### Function that Returns a Dataframe from a CSV

In [6]:
def get_df_from_csv(folder, ticker):
    try:
        df = pd.read_csv(folder + ticker + '.csv', index_col='Date', 
                         parse_dates=True)
    except FileNotFoundError:
        pass
        print("File Doesn't Exist")
    else:
        return df

#### Merge New Data

In [7]:
def update_stock_data(ticker):
    # For Windows 
    up_folder = '../data/update/'

    # For Windows 
    stock_folder = PATH

    update_df = get_df_from_csv(up_folder, ticker)
    update_df

    # Get original dataframe
    orig_df = get_df_from_csv(stock_folder, ticker)

    # Drop the unnamed column
    orig_df.drop(orig_df.columns[orig_df.columns.str.contains('unnamed',case = False)],
          axis = 1, inplace = True)

    # Drop the daily_return column
    orig_df = orig_df.drop('daily_return', 1)

    join_df = pd.concat([orig_df, update_df])
    return join_df

#### Save Dataframe to CSV

In [8]:
def save_dataframe_to_csv(df, folder, ticker):
    df.to_csv(folder + ticker + '.csv')

#### Update Files with New Data

In [9]:
def update_stocks(stock_folder):
    
    for x in tickers:
        try:
            print("Working on :", x)
            new_df = update_stock_data(x)
            save_dataframe_to_csv(new_df, stock_folder, x)
        except Exception as ex:
            print(ex)

In [None]:
# For Windows
update_stocks(PATH)

#### Add Daily Return to Dataframe

In [11]:
# We calculate a percentage rate of return for each day to compare investments.
# Simple Rate of Return = (End Price - Beginning Price) / Beginning Price OR (EP / BP) - 1

# Shift provides the value from the previous day
# NaN is displayed because there was no previous day price for the 1st calculation
def add_daily_return_to_df(df, ticker):
    df['daily_return'] = (df['Adj Close'] / df['Adj Close'].shift(1)) - 1
    # Save data to a CSV file
    # For Windows
    df.to_csv(PATH + ticker + '.csv')
    
    return df  

#### Update Daily Returns

In [12]:
for x in tickers:
    try:
        print("Working on :", x)
        new_df = get_df_from_csv(PATH, x)
        new_df = add_daily_return_to_df(new_df, x)
        save_dataframe_to_csv(new_df, PATH, x)
    except Exception as ex:
        print(ex)

Working on : A
Working on : AA
Working on : AAL
Working on : AAME
Working on : AAOI
Working on : AAON
Working on : AAP
Working on : AAPL
Working on : AAT
Working on : AAWW
Working on : ABBV
Working on : ABC
Working on : ABCB
Working on : ABEO
Working on : ABG
Working on : ABIO
Working on : ABM
Working on : ABMD
Working on : ABR
Working on : ABT
Working on : ABTX
Working on : AC
Working on : ACA
Working on : ACAD
Working on : ACBI
Working on : ACC
Working on : ACCO
Working on : ACER
Working on : ACGL
Working on : ACHC
Working on : ACHV
Working on : ACIW
Working on : ACLS
Working on : ACM
Working on : ACMR
Working on : ACN
Working on : ACNB
Working on : ACOR
Working on : ACRE
Working on : ACRS
Working on : ACRX
Working on : ACTG
Working on : ACU
Working on : ACY
Working on : ADBE
Working on : ADC
Working on : ADES
Working on : ADI
Working on : ADM
Working on : ADMA
Working on : ADMP
Working on : ADNT
Working on : ADP
Working on : ADS
Working on : ADSK
Working on : ADT
Working on : ADTN
W

#### Regression Time Series Example with AutoReg

In [13]:
def calc_projected_roi(ticker):
    a_df = get_df_from_csv(PATH, ticker)

    a_df = a_df.asfreq('d') # Change frequency to day
    a_df.index # Check frequency
    a_df = a_df.fillna(method='ffill') # Fill missing values

    # Delete unnamed column
    a_df.drop(a_df.columns[a_df.columns.str.contains('unnamed',case = False)],
          axis = 1, inplace = True)

    # Delete daily return column
    a_df = a_df.drop(['daily_return'], axis=1)
    
    # Figure out optimum lags which will be 1 or 2 for this data set
    lags = ar_select_order(a_df, maxlag=30)

    # Create our model using whole data set
    model = AutoReg(a_df['Adj Close'], lags.ar_lags)
    model_fit = model.fit()

    # Define training and testing area
    print("Length :",len(a_df)) # 1712 observations
    train_df = a_df.iloc[50:1369] # 80% minus 1st 50
    test_df = a_df.iloc[1369:] # Last 20%

    # Define training model for 500 days (Play with Number & Test)
    # and White's covariance estimator
    train_model = AutoReg(a_df['Adj Close'], 500).fit(cov_type="HC0")

    # Define start and end for prediction 
    start = len(train_df)
    end = len(train_df) + len(test_df) - 1

    prediction = train_model.predict(start=start, end=end, dynamic=True)

    # Predict 160 days into the future
    forecast = train_model.predict(start=end, end=end+60, dynamic=True)

    # Get starting price of prediction
    s_price = forecast.head(1).iloc[0]

    # Get the last price of prediction
    e_price = forecast.iloc[-1]

    # Get return over prediction
    return (e_price - s_price) / s_price

In [14]:
def get_proj_rois():
    # Will hold all tickers & stock rois
    ticker = []
    roi = []
    
    for x in tickers:
        print("Working on :", x)
        try:
            the_roi = calc_projected_roi(x)
        except Exception as ex:
            print("Stock Data Corrupted")
        else:
            ticker.append(x)
            print("ROI :", the_roi)
            roi.append(the_roi)
        
    return pd.DataFrame({'Ticker':ticker, 'ROI':roi})

In [None]:
proj_roi_df = get_proj_rois()
proj_roi_df

In [None]:
proj_roi_df.sort_values(by=['ROI'], ascending=False)[0:20]