## Python for Finance 2 - Download every stock

#### Imports

In [7]:
import numpy as np
import pandas as pd
from pandas_datareader import data as web # Reads stock data
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
%matplotlib inline

import datetime as dt
import mplfinance as mpf # Matplotlib finance

import time

import os
from os import listdir
from os.path import isfile, join

#### Default values

In [8]:
# Define path to files
path = "../data/stock-list/"

# Start date defaults
ST_YEAR = 2017
ST_MONTH = 1
ST_DAY = 3
ST_DATE_STR = f"{ST_YEAR}-{ST_MONTH}-{ST_DAY}"
ST_DATE_DATETIME = dt.datetime(ST_YEAR, ST_MONTH, ST_DAY)

# End date defaults
EN_YEAR = 2021
EN_MONTH = 8
EN_DAY = 19
EN_DATE_STR = f"{EN_YEAR}-{EN_MONTH}-{EN_DAY}"
EN_DATE_DATETIME = dt.datetime(EN_YEAR, EN_MONTH, EN_DAY)

#### Get stock file names in a list

In [9]:
# listdir returns all files in the directory and isfile will return true
# if it is a file and then we store its name in our list named files
files = [x for x in listdir(path) if isfile(join(path, x))]

# Remove extension from file names
# Splitext splits the file name into 2 parts being the name and extension
# We say get all file names and then store just the name in our list named files
tickers = [os.path.splitext(x)[0] for x in files]
tickers

['A',
 'AA',
 'AAL',
 'AAME',
 'AAN',
 'AAOI',
 'AAON',
 'AAP',
 'AAPL',
 'AAT',
 'AAWW',
 'ABBV',
 'ABC',
 'ABCB',
 'ABEO',
 'ABG',
 'ABIO',
 'ABM',
 'ABMD',
 'ABR',
 'ABT',
 'ABTX',
 'AC',
 'ACA',
 'ACAD',
 'ACBI',
 'ACC',
 'ACCO',
 'ACER',
 'ACGL',
 'ACHC',
 'ACHV',
 'ACIW',
 'ACLS',
 'ACM',
 'ACMR',
 'ACN',
 'ACNB',
 'ACOR',
 'ACRE',
 'ACRS',
 'ACRX',
 'ACTG',
 'ACU',
 'ACY',
 'ADBE',
 'ADC',
 'ADES',
 'ADI',
 'ADM',
 'ADMA',
 'ADMP',
 'ADNT',
 'ADP',
 'ADS',
 'ADSK',
 'ADT',
 'ADTN',
 'ADUS',
 'ADVM',
 'ADXS',
 'AE',
 'AEE',
 'AEHR',
 'AEIS',
 'AEL',
 'AEMD',
 'AEO',
 'AEP',
 'AERI',
 'AES',
 'AEY',
 'AFG',
 'AFI',
 'AFL',
 'AGCO',
 'AGE',
 'AGEN',
 'AGFS',
 'AGIO',
 'AGLE',
 'AGM',
 'AGNC',
 'AGO',
 'AGR',
 'AGRX',
 'AGS',
 'AGTC',
 'AGX',
 'AGYS',
 'AHH',
 'AHT',
 'AIG',
 'AIMC',
 'AIN',
 'AINC',
 'AIR',
 'AIRG',
 'AIRI',
 'AIRT',
 'AIT',
 'AIV',
 'AIZ',
 'AJG',
 'AJRD',
 'AJX',
 'AKAM',
 'AKBA',
 'AKR',
 'AKTS',
 'AL',
 'ALB',
 'ALBO',
 'ALCO',
 'ALDX',
 'ALE',
 'ALEC',
 'ALEX'

#### Create a dataframe from our list

In [10]:
stock_df = pd.DataFrame(tickers, columns=['Ticker'])
stock_df.head()

Unnamed: 0,Ticker
0,A
1,AA
2,AAL
3,AAME
4,AAN


#### Function that returns a dataframe from a CSV

In [11]:
# Reads a dataframe from the CSV file, changes index to date and returns it
def getDF_fromCSV(ticker):
    try:
        df = pd.read_csv(path + ticker + '.csv')
    except FileNotFoundError:
        print("File doesn't exist")
    else:
        return df

#### Function that saves dataframe to CSV

In [12]:
def saveDF_toCSV(df, ticker):
    df.to_csv(path + ticker + '.csv')

#### Delete unnamed columns in CSV files

In [13]:
def delete_unnamed_cols(df):
    df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
    return df

#### Add daily return to dataframe

In [14]:
# We calculate a percentage rate of return for each day to compare investments.
# Simple Rate of Return = (End Price - Beginning Price) / Beginning Price OR (EP / BP) - 1

# Shift provides the value from the previous day
# NaN is displayed because there was no previous day price for the 1st calculation
def add_dailyReturn_toDF(df, ticker):
    df['daily_return'] = (df['Adj Close'] / df['Adj Close'].shift(1)) - 1
    df.to_csv(path + ticker + '.csv')
    return df

#### Returns ROI over time

In [15]:
# Return on Investment is the return you received from your investment
# This amount does not include your initial investment
# If you invest 100 and have 200 after 5 years
# ROI = End Value (200) - Initial Value (100) / Inital Value = 1
# Your new total is Inital Investment + 1 * Inital Investment = 200

def get_roi_defined_time(df):
    df['Date'] = pd.to_datetime(df['Date']) # Set as a datetime
    start_value = df[df['Date'] == ST_DATE_STR]['Adj Close'][0]
    print("Initial Price:", start_value)
    end_value = df[df['Date'] == EN_DATE_STR]['Adj Close']
    print(end_value.item())
    print("Final Price:", end_value.item())
    
    # Calculate return on investment
    roi = (end_value - start_value) / start_value
    # Return the total return between 2 dates
    return roi

#### Get coefficient of variation

In [16]:
# Receives the dataframe with the Adj Close data and returns the coefficient of variation
def get_cov(stock_df):
    mean = stock_df['Adj Close'].mean()
    sd = stock_df['Adj Close'].std()
    cov = sd / mean
    return cov

#### Test functions

In [17]:
tickers[0]

'A'

In [18]:
stock_a = getDF_fromCSV(tickers[0])
stock_a.head()

Unnamed: 0,Date,Adj Close
0,2017-01-03,44.659351
1,2017-01-04,45.245331
2,2017-01-05,44.707394
3,2017-01-06,46.100292
4,2017-01-09,46.244377


In [19]:
add_dailyReturn_toDF(stock_a, tickers[0])
stock_a.head()

Unnamed: 0,Date,Adj Close,daily_return
0,2017-01-03,44.659351,
1,2017-01-04,45.245331,0.013121
2,2017-01-05,44.707394,-0.011889
3,2017-01-06,46.100292,0.031156
4,2017-01-09,46.244377,0.003125


In [20]:
stock_a = delete_unnamed_cols(stock_a)
stock_a.head()

Unnamed: 0,Date,Adj Close,daily_return
0,2017-01-03,44.659351,
1,2017-01-04,45.245331,0.013121
2,2017-01-05,44.707394,-0.011889
3,2017-01-06,46.100292,0.031156
4,2017-01-09,46.244377,0.003125


In [21]:
saveDF_toCSV(stock_a, tickers[0])

#### Add daily returns & clean up all files

In [22]:
# Create a backup for all original stock data

# Cycle through all tickers
for ticker in tickers:
    print("Working on :", ticker)
    
    # Get a dataframe for that ticker
    stock_df = getDF_fromCSV(ticker)
    
    # Add daily return to this dataframe
    add_dailyReturn_toDF(stock_df, ticker)
    
    # Delete unnamed columns in dataframe
    stock_df = delete_unnamed_cols(stock_df)
    
    # Save cleaned dataframe to csv
    saveDF_toCSV(stock_df, ticker)

Working on : A
Working on : AA
Working on : AAL
Working on : AAME
Working on : AAN
Working on : AAOI
Working on : AAON
Working on : AAP
Working on : AAPL
Working on : AAT
Working on : AAWW
Working on : ABBV
Working on : ABC
Working on : ABCB
Working on : ABEO
Working on : ABG
Working on : ABIO
Working on : ABM
Working on : ABMD
Working on : ABR
Working on : ABT
Working on : ABTX
Working on : AC
Working on : ACA
Working on : ACAD
Working on : ACBI
Working on : ACC
Working on : ACCO
Working on : ACER
Working on : ACGL
Working on : ACHC
Working on : ACHV
Working on : ACIW
Working on : ACLS
Working on : ACM
Working on : ACMR
Working on : ACN
Working on : ACNB
Working on : ACOR
Working on : ACRE
Working on : ACRS
Working on : ACRX
Working on : ACTG
Working on : ACU
Working on : ACY
Working on : ADBE
Working on : ADC
Working on : ADES
Working on : ADI
Working on : ADM
Working on : ADMA
Working on : ADMP
Working on : ADNT
Working on : ADP
Working on : ADS
Working on : ADSK
Working on : ADT
Wo

#### Get stock return over time period & coefficient of variation

In [23]:
stock_a

# Get total return since 2017
# Final Price 167.67 = (44.77 * 2.745) + 44.77
get_roi_defined_time(stock_a)

# Get coefficient of variation 
# This is higher than normal because I'm using many years instead of one
# get_cov(stock_a)

Initial Price: 44.65935134887695
167.24119567871094
Final Price: 167.24119567871094


1165    2.744819
Name: Adj Close, dtype: float64