**Downloads all Stock Data from a Given Index for 5 Years**

In [7]:
#!pip install cufflinks

In [6]:
# Provides ways to work with large multidimensional arrays
import numpy as np 
# Allows for further data manipulation and analysis
import pandas as pd 
import matplotlib.pyplot as plt # Plotting
import matplotlib.dates as mdates # Styling dates
%matplotlib inline

# pip install numpy
# conda install -c anaconda pandas
# conda install -c conda-forge matplotlib

import datetime as dt # For defining dates

import time

# In Powershell Prompt : conda install -c conda-forge multitasking
# pip install -i https://pypi.anaconda.org/ranaroussi/simple yfinance

import yfinance as yf

# To show all your output File -> Preferences -> Settings Search for Notebook
# Notebook Output Text Line Limit and set to 100

# Used for file handling like deleting files
import os

# conda install -c conda-forge cufflinks-py
# conda install -c plotly plotly
import cufflinks as cf
import plotly.express as px
import plotly.graph_objects as go

# Make Plotly work in your Jupyter Notebook
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
# Use Plotly locally
cf.go_offline()

from plotly.subplots import make_subplots

import warnings
warnings.simplefilter("ignore")

**Constants**

In [8]:
PATH = "../raw_data/yahoo_stocks/"
# file with tickers
file = '../raw_data/stocks_ticker/VL_5000_stocks.csv'

# Start end date defaults
S_DATE = "2017-01-03"
E_DATE = "2022-04-05"
S_DATE_DT = pd.to_datetime(S_DATE)
E_DATE_DT = pd.to_datetime(E_DATE)

**Holds Stocks Not Downloaded**

In [13]:
stocks_not_downloaded = []
missing_stocks = []

**Function that Returns a Stock Dataframe from a CSV**

In [14]:
# Reads a dataframe from the CSV file, changes index to date and returns it
def get_stock_df_from_csv(ticker):
    
    # Try to get the file and if it doesn't exist issue a warning
    try:
        df = pd.read_csv(PATH + ticker + '.csv', index_col=0)
    except FileNotFoundError:
        print("File Doesn't Exist")
    else:
        return df

**Returns a Named Columns Data from a CSV**

In [15]:
def get_column_from_csv(file, col_name):
    # Try to get the file and if it doesn't exist issue a warning
    try:
        df = pd.read_csv(file)
    except FileNotFoundError:
        print("File Doesn't Exist")
    else:
        return df[col_name]

**Get Stock Tickers**

In [19]:
# Total of 3482 tickers
tickers = get_column_from_csv(file, "Ticker")
tickers

# for x in tickers:
#      print(x, end=", ")

len(tickers)

3481

**Function that Saves Stock Data to CSV**

In [20]:
# Function that gets a dataframe by providing a ticker and starting date
def save_to_csv_from_yahoo(folder, ticker):
    
    stock = yf.Ticker(ticker)
    
    try:
        print("Get Data for : ", ticker)
        # Get historical closing price data
        df = stock.history(period="5y")
    
        # Wait 2 seconds
        time.sleep(2)
        
        if df.empty:
            stocks_not_downloaded.append(ticker)
        
        # Remove the period for saving the file name
        # Save data to a CSV file
        # File to save to 
        the_file = folder + ticker.replace(".", "_") + '.csv'
        print(the_file, " Saved")
        df.to_csv(the_file)
    except Exception as ex:
        stocks_not_downloaded.append(ticker)
        print("Couldn't Get Data for :", ticker)

**Get 5 Years of Data for the 1st 20 Stocks**

In [23]:
# # Folder used to store stock data
folder = "../raw_data/data_stocks/"

for x in range(20):
   save_to_csv_from_yahoo(folder, tickers[x])
print("Finished")

Get Data for :  A
../raw_data/data_stocks/A.csv  Saved
Get Data for :  AA
../raw_data/data_stocks/AA.csv  Saved
Get Data for :  AAL
../raw_data/data_stocks/AAL.csv  Saved
Get Data for :  AAME
../raw_data/data_stocks/AAME.csv  Saved
Get Data for :  AAN
../raw_data/data_stocks/AAN.csv  Saved
Get Data for :  AAOI
../raw_data/data_stocks/AAOI.csv  Saved
Get Data for :  AAON
../raw_data/data_stocks/AAON.csv  Saved
Get Data for :  AAP
../raw_data/data_stocks/AAP.csv  Saved
Get Data for :  AAPL
../raw_data/data_stocks/AAPL.csv  Saved
Get Data for :  AAT
../raw_data/data_stocks/AAT.csv  Saved
Get Data for :  AAWW
../raw_data/data_stocks/AAWW.csv  Saved
Get Data for :  AAXN
- AAXN: No data found, symbol may be delisted
../raw_data/data_stocks/AAXN.csv  Saved
Get Data for :  ABBV
../raw_data/data_stocks/ABBV.csv  Saved
Get Data for :  ABC
../raw_data/data_stocks/ABC.csv  Saved
Get Data for :  ABCB
../raw_data/data_stocks/ABCB.csv  Saved
Get Data for :  ABEO
../raw_data/data_stocks/ABEO.csv  Save

**Stocks Not Downloaded**

In [24]:
stocks_not_downloaded

['AAXN']

**Get Next 80 Stocks**

In [None]:
# for x in range(20, 100):
#   save_to_csv_from_yahoo(folder, tickers[x])
# print("Finished")
# stocks_not_downloaded

**Get Rest of Stocks**

In [None]:
# for x in range(3001, 3481):
#   save_to_csv_from_yahoo(folder, tickers[x])
# print("Finished")
# stocks_not_downloaded

**Delete Empty Files**

In [25]:
try:
  for x in missing_stocks:
    os.remove(folder + x + ".csv")
except FileNotFoundError:
  print("Couldn't Find " + x)

print("Finished")

Finished


**Are FAANG Stocks Worth Investing in?**

In [26]:
# Download just those stocks were are working with to make them up to date
# Facebook, Amazon, Apple, Netflix, Google
tickers = ["FB", "AMZN", "AAPL", "NFLX", "GOOG"]

for i in tickers:
    save_to_csv_from_yahoo(folder, i)
    print("Finished " + i)

Get Data for :  FB
../raw_data/data_stocks/FB.csv  Saved
Finished FB
Get Data for :  AMZN
../raw_data/data_stocks/AMZN.csv  Saved
Finished AMZN
Get Data for :  AAPL
../raw_data/data_stocks/AAPL.csv  Saved
Finished AAPL
Get Data for :  NFLX
../raw_data/data_stocks/NFLX.csv  Saved
Finished NFLX
Get Data for :  GOOG
../raw_data/data_stocks/GOOG.csv  Saved
Finished GOOG


In [27]:
fig = go.Figure()

# Get the dataframe with all FB's data
fb_df = get_stock_df_from_csv("FB")

# fb_df.head()

# Get closing price for the rest
amzn_df = get_stock_df_from_csv("AMZN")
aapl_df = get_stock_df_from_csv("AAPL")
nflx_df = get_stock_df_from_csv("NFLX")
goog_df = get_stock_df_from_csv("GOOG")

amzn_df

fb_plot = go.Scatter(x=fb_df.index, y=fb_df['Close'], name="Facebook")
amzn_plot = go.Scatter(x=amzn_df.index, y=amzn_df['Close'], name="Amazon")
aapl_plot = go.Scatter(x=aapl_df.index, y=aapl_df['Close'], name="Apple")
nflx_plot = go.Scatter(x=nflx_df.index, y=nflx_df['Close'], name="Netflix")
goog_plot = go.Scatter(x=goog_df.index, y=goog_df['Close'], name="Google")

# Plot price changes
fig.add_trace(fb_plot)
fig.add_trace(amzn_plot)
fig.add_trace(aapl_plot)
fig.add_trace(nflx_plot)
fig.add_trace(goog_plot)

fig.update_xaxes(title="Date", rangeslider_visible=True)
fig.update_yaxes(title="Price")
fig.update_layout(height=1200, width=1800, 
                  showlegend=True)
fig.show()

# This data isn't useful for our purposes because the scales are different
# We must calculate the daily returns for these stocks to get data we can
# work with

File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist


AttributeError: 'NoneType' object has no attribute 'index'

**Daily Returns**

For single stocks to find the daily return we subtract opening price from the closing price. Then you could multiply by the number of shares owned.

We calculate a percentage rate of return for each day to compare investments. Simple Rate of Return = (End Price - Beginning Price) / Beginning Price OR (EP / BP) - 1



In [28]:
# Shift provides the value from the previous day
# NaN is displayed because there was no previous day price for the 1st calculation
def add_daily_return_to_df(df, ticker):
    df['daily_return'] = (df['Close'] / df['Close'].shift(1)) - 1
    # Save data to a CSV file
    df.to_csv(PATH + ticker + '.csv')
    return df  

In [29]:
add_daily_return_to_df(fb_df, "FB")
add_daily_return_to_df(amzn_df, "AMZN")
add_daily_return_to_df(aapl_df, "AAPL")
add_daily_return_to_df(nflx_df, "NFLX")
add_daily_return_to_df(goog_df, "GOOG")
goog_df.head()

TypeError: 'NoneType' object is not subscriptable

**Get Cumulative Return**

In [None]:
def add_cum_return_to_df(df, ticker):
    df['cum_return'] = (1 + df['daily_return']).cumprod()
    df.to_csv(PATH + ticker + '.csv')
    return df

In [None]:
add_cum_return_to_df(fb_df, "FB")
add_cum_return_to_df(amzn_df, "AMZN")
add_cum_return_to_df(aapl_df, "AAPL")
add_cum_return_to_df(nflx_df, "NFLX")
add_cum_return_to_df(goog_df, "GOOG")

**Merge Multiple Stocks in One Dataframe by Column Name**

In [None]:
def merge_df_by_column_name(col_name, sdate, edate, *tickers):
    # Will hold data for all dataframes with the same column name
    mult_df = pd.DataFrame()
    
    for x in tickers:
        df = get_stock_df_from_csv(x)
        
        # NEW Check if your dataframe has duplicate indexes
        if not df.index.is_unique:
            # Delete duplicates 
            df = df.loc[~df.index.duplicated(), :]
        
        mask = (df.index >= sdate) & (df.index <= edate)
        mult_df[x] = df.loc[mask][col_name]
        
    return mult_df

In [None]:
mult_df = merge_df_by_column_name('cum_return',  S_DATE, 
                                  E_DATE, *tickers)
mult_df

In [None]:
# Plot out cumulative returns on $1 in each stock since beginning of 2017
fig = px.line(mult_df, x=mult_df.index, y=mult_df.columns)
fig.update_xaxes(title="Date", rangeslider_visible=True)
fig.update_yaxes(title="Price")
fig.update_layout(height=1200, width=1800, 
                  showlegend=True)
fig.show()

**Create a Price / Volume Chart**

In [None]:
fig = go.Figure()
nflx_plot = go.Scatter(x=nflx_df.index, y=nflx_df['Close'], name="Netflix")

# Plot price changes
fig.add_trace(nflx_plot)

# Plot volume as bar graph
fig.add_trace(go.Bar(x=nflx_df.index, y=nflx_df['Volume']/200000, name='Volume Traded'))

fig.update_xaxes(title="Date", rangeslider_visible=True)
fig.update_yaxes(title="Price")
fig.update_layout(height=1200, width=1800, 
                  showlegend=True)
fig.show()

**Adding Bollinger Bands**

Bollinger Bands plot 2 lines using a moving average and the standard deviation defines how far apart the lines are. They also are used to define if prices are to high or low. When bands tighten it is believed a sharp price move in some direction. Prices tend to bounce off of the bands which provides potential market actions.

A strong trend should be noted if the price moves outside the band. If prices go over the resistance line it is in overbought territory and if it breaks through support it is a sign of an oversold position.

You normally use 20 sessions when using them.

In [None]:
# Here we will add a middle band (20 days), upper band (20 days + 1.96 std),
# and lower band (20 days - 1.96 std)

In [None]:
# Here we will add a middle band (20 days), upper band (20 days + 1.96 std),
# and lower band (20 days - 1.96 std)
def add_bollinger_bands(df):
    df['middle_band'] = df['Close'].rolling(window=20).mean()
    df['upper_band'] = df['middle_band'] + 1.96 * df['Close'].rolling(window=20).std()
    df['lower_band'] = df['middle_band'] - 1.96 * df['Close'].rolling(window=20).std()

In [None]:
stk_dfs = [fb_df, amzn_df, aapl_df, nflx_df, goog_df]

In [None]:
for x in stk_dfs:
    add_bollinger_bands(x)

In [None]:
fb_df

**Plot with Bollinger Bands**

In [None]:
def plot_with_boll_bands(ticker):
    save_to_csv_from_yahoo("D:/Python for Finance/Wilshire_Stocks/", ticker)

    df = get_stock_df_from_csv(ticker)

    add_bollinger_bands(df)
    
    fig = go.Figure()

    candle = go.Candlestick(x=df.index, open=df['Open'],
    high=df['High'], low=df['Low'],
    close=df['Close'], name="Candlestick")

    upper_line = go.Scatter(x=df.index, y=df['upper_band'], 
    line=dict(color='rgba(250, 0, 0, 0.75)', 
    width=1), name="Upper Band")

    mid_line = go.Scatter(x=df.index, y=df['middle_band'], 
    line=dict(color='rgba(0, 0, 250, 0.75)', 
    width=0.7), name="Middle Band")

    lower_line = go.Scatter(x=df.index, y=df['lower_band'], 
    line=dict(color='rgba(0, 250, 0, 0.75)', 
    width=1), name="Lower Band")

    fig.add_trace(candle)
    fig.add_trace(upper_line)
    fig.add_trace(mid_line)
    fig.add_trace(lower_line)

    fig.update_xaxes(title="Date", rangeslider_visible=True)
    fig.update_yaxes(title="Price")
    fig.update_layout(title=ticker + " Bollinger Bands", 
    height=1200, width=1800, showlegend=True)
    fig.show()

In [None]:
plot_with_boll_bands("AMD")

**Support & Resistance**

We normally use multiple moving averages to develop our support and resistance lines. 50 day for medium, 100 for long and 200 for very long terms are commonly used.

**Download S&P Data for 5 years**

In [None]:
save_to_csv_from_yahoo(PATH, "^GSPC")
gspc_df = get_stock_df_from_csv("^GSPC")
gspc_df

**Calculate Moving Averages**

Moving averages are used to mitigate short term flucuations in a stock price. We create them by calculating the mean of a set of prices over a specified number of time periods. The Simple moving average (SMA) is just a simple mean. An Exponential Moving Average (EMA) is a weighted average that put more emphasis on more recent data.

In [None]:
# SMA
gspc_df['MA50'] = gspc_df['Close'].rolling(50).mean()
gspc_df['MA100'] = gspc_df['Close'].rolling(100).mean()

# EMA If we set adjust to False the weighted function is calculated recursively
gspc_df['EMA20'] = gspc_df['Close'].ewm(span=20, adjust=False).mean()
gspc_df['EMA50'] = gspc_df['Close'].ewm(span=50, adjust=False).mean()
gspc_df['EMA100'] = gspc_df['Close'].ewm(span=100, adjust=False).mean()

gspc_df

**Plotting Moving Averages**

In [None]:
fig = go.Figure()

candle = go.Candlestick(x=gspc_df.index, open=gspc_df['Open'],
    high=gspc_df['High'], low=gspc_df['Low'],
    close=gspc_df['Close'], name="Candlestick")

ema50_line = go.Scatter(x=gspc_df.index, y=gspc_df['EMA50'], 
    line=dict(color='rgba(250, 0, 0, 0.75)', 
    width=1), name="EMA50")

ema100_line = go.Scatter(x=gspc_df.index, y=gspc_df['EMA100'], 
    line=dict(color='rgba(0, 250, 0, 0.75)', 
    width=1), name="EMA100")

fig.add_trace(candle)
# 1st support line
fig.add_trace(ema50_line)
# 2nd support line
fig.add_trace(ema100_line)

fig.show()

# What we see with the support line is that if we hit it the market bounces back. When candles are green that means we have more buyers than sellers and vice versa.
# Support levels are good indicators and the market normally only breaks support when an event occurs that is external to the market.

**Analyzing Bitcoin**

In [None]:
btc_df = yf.download(tickers='BTC-USD', period='3d', interval='15m')
btc_df

In [None]:
# We'll create a short term moving average using 5 periods
# and another using 20 periods
# When the longterm MA goes above the short term that is a sell sign
# and vice versa
btc_df['MA5'] = btc_df['Close'].rolling(5).mean()
btc_df['MA20'] = btc_df['Close'].rolling(20).mean()

**Plot**

In [None]:
fig = go.Figure()

candle = go.Candlestick(x=btc_df.index, open=btc_df['Open'],
    high=btc_df['High'], low=btc_df['Low'],
    close=btc_df['Close'], name="Candlestick")

ma5_line = go.Scatter(x=btc_df.index, y=btc_df['MA5'], 
    line=dict(color='rgba(255,165,0, 0.75)', 
    width=1), name="MA5")

ma20_line = go.Scatter(x=btc_df.index, y=btc_df['MA20'], 
    line=dict(color='rgba(0, 0, 250, 0.75)', 
    width=1), name="MA20")

fig.add_trace(candle)
fig.add_trace(ma5_line)
fig.add_trace(ma20_line)

fig.show()

# When the MA20 (Blue) crosses above the MA5 (Orange) that is a Death Cross (Sell)
# When the MA5 (Orange) crosses above the MA20 (Blue)that is a Golden Cross (Buy)

**Longterm Bitcoin**

In [None]:
btcl_df = yf.download(tickers='BTC-USD', period='3mo', interval='1d')
btcl_df

In [None]:
btcl_df['MA5'] = btcl_df['Close'].rolling(5).mean()
btcl_df['MA20'] = btcl_df['Close'].rolling(20).mean()

In [None]:
fig = go.Figure()

candle = go.Candlestick(x=btcl_df.index, open=btcl_df['Open'],
    high=btcl_df['High'], low=btcl_df['Low'],
    close=btcl_df['Close'], name="Candlestick")

ma5_line = go.Scatter(x=btcl_df.index, y=btcl_df['MA5'], 
    line=dict(color='rgba(255,165,0, 0.75)', 
    width=1), name="MA5")

ma20_line = go.Scatter(x=btcl_df.index, y=btcl_df['MA20'], 
    line=dict(color='rgba(0, 0, 250, 0.75)', 
    width=1), name="MA20")

fig.add_trace(candle)
fig.add_trace(ma5_line)
fig.add_trace(ma20_line)

fig.show()

# We see here that we are in a down trend long term

**Short Term Plot Function**

In [None]:
def get_short_ma(ticker):
    df = yf.download(tickers=ticker, period='3d', interval='15m')
    df['MA5'] = df['Close'].rolling(5).mean()
    df['MA20'] = df['Close'].rolling(20).mean()
    fig = go.Figure()

    candle = go.Candlestick(x=df.index, open=df['Open'],
        high=df['High'], low=df['Low'],
        close=df['Close'], name="Candlestick")

    ma5_line = go.Scatter(x=df.index, y=df['MA5'], 
    line=dict(color='rgba(255,165,0, 0.75)', 
    width=1), name="MA5")

    ma20_line = go.Scatter(x=df.index, y=df['MA20'], 
    line=dict(color='rgba(0, 0, 250, 0.75)', 
    width=1), name="MA20")

    fig.add_trace(candle)
    fig.add_trace(ma5_line)
    fig.add_trace(ma20_line)

    fig.show()

**Long Term Plot Function**

In [None]:
def get_long_ma(ticker):
    df = yf.download(tickers=ticker, period='3mo', interval='1d')
    df['MA5'] = df['Close'].rolling(5).mean()
    df['MA20'] = df['Close'].rolling(20).mean()
    fig = go.Figure()

    candle = go.Candlestick(x=df.index, open=df['Open'],
        high=df['High'], low=df['Low'],
        close=df['Close'], name="Candlestick")

    ma5_line = go.Scatter(x=df.index, y=df['MA5'], 
    line=dict(color='rgba(255,165,0, 0.75)', 
    width=1), name="MA5")

    ma20_line = go.Scatter(x=df.index, y=df['MA20'], 
    line=dict(color='rgba(0, 0, 250, 0.75)', 
    width=1), name="MA20")

    fig.add_trace(candle)
    fig.add_trace(ma5_line)
    fig.add_trace(ma20_line)

    fig.show()

**Long Term Ethereum Plot**

In [None]:
get_long_ma('ETH-USD')

**Short Term Ethereum Plot**

In [None]:
get_short_ma('ETH-USD')

**Long Term Litecoin**

In [None]:
get_long_ma('LTC-USD')

**Long Term Cardano**

In [None]:
get_long_ma('ADA-USD')