In [3]:
# Provides ways to work with large multidimensional arrays
import numpy as np 
# Allows for further data manipulation and analysis
import pandas as pd 
import matplotlib.pyplot as plt # Plotting
import matplotlib.dates as mdates # Styling dates
%matplotlib inline

# pip install numpy
# conda install -c anaconda pandas
# conda install -c conda-forge matplotlib

import datetime as dt # For defining dates

import time

# In Powershell Prompt : conda install -c conda-forge multitasking
# pip install -i https://pypi.anaconda.org/ranaroussi/simple yfinance

import yfinance as yf

# To show all your output File -> Preferences -> Settings Search for Notebook
# Notebook Output Text Line Limit and set to 100

# Used for file handling like deleting files
import os

# conda install -c conda-forge cufflinks-py
# conda install -c plotly plotly
import cufflinks as cf
import plotly.express as px
import plotly.graph_objects as go

# Make Plotly work in your Jupyter Notebook
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
# Use Plotly locally
cf.go_offline()

from plotly.subplots import make_subplots

# New Imports
# Used to get data from a directory
import os
from os import listdir
from os.path import isfile, join

import warnings
warnings.simplefilter("ignore")

### Constants

In [4]:
# Change this to your location 
import variables
PATH = variables.stocks

In [5]:
# Start end date defaults
S_DATE = "2017-02-01"
E_DATE = "2022-12-06"
S_DATE_DT = pd.to_datetime(S_DATE)
E_DATE_DT = pd.to_datetime(E_DATE)

### Get Column Data From CSVs

In [6]:
def get_column_from_csv(file, col_name):
    try:
        df = pd.read_csv(file)
    except FileNotFoundError:
        print("File does not exist")
    else:
        return df[col_name]


### Get Stock Tickers

In [7]:
tickers = get_column_from_csv(variables.wilshire, "Ticker")
print(len(tickers))

3481


### Save Stock Data to CSV

In [8]:
# Function to get a dataframe by providing a ticker and starting data
def save_to_csv_from_yahoo(folder, ticker):
    stock = yf.Ticker(ticker)

    try:
        print("Get Data for: ", ticker)
        # Get historical closing price data 
        df = stock.history(period="5y")

        # Wait  2 seconds
        time.sleep(2)

        # Remove the period for saving the file name
        # Save data to a CSV file 
        # File to save to 
        the_file = folder + ticker.replace(".", "_") + '.csv'
        print(the_file, " Saved")
        df.to_csv(the_file)

    except Exception as ex:
        print("Couldn't Get Data :", ticker)
         

### Download All Stocks (commented out so it doesn't run again)

In [9]:
# for x in range(0, 3481):
#     save_to_csv_from_yahoo(PATH, tickers[x])
#     print(f"{tickers[x]} downloaded")

### Get Dataframe from CSV

In [10]:
def get_stock_df_from_csv(ticker):
    try:
        df = pd.read_csv(PATH + ticker + ".csv", index_col=0)
    except FileNotFoundError:
        print("File is not here" )
        print('Expected file: ' + PATH + ticker + ".csv")
    else: 
        return df 
         

### Get all stocks downloaded in list

In [11]:
files = [x for x in os.listdir(PATH) if os.path.isfile(os.path.join(PATH, x))]
tickers = [os.path.splitext(x)[0] for x in files]
# tickers.remove('.ds_Store')

tickers.sort()
len(tickers)

3261

### Add Daily Returns

In [38]:
# Shift provides the value from the previous day
# NaN is displayed because there was no previous day price for the 1st calculation
def add_daily_return_to_df(df):
    df['daily_return'] = (df['Close'] / df['Close'].shift(1)) - 1
    # Save data to a CSV file
    # df.to_csv(PATH + ticker + '.csv')
    return df

### Add Cumulative Return

In [37]:
def add_cum_return_to_df(df):
    df['cum_return'] = (1 + df['daily_return']).cumprod()
    # df.to_csv(PATH + ticker + '.csv')
    return df

### Add Bollinger Bands

In [36]:
# Here we will add a middle band (20 days), upper band (20 days + 1.96 std),
# and lower band (20 days - 1.96 std)
def add_bollinger_bands(df):
    df['middle_band'] = df['Close'].rolling(window=20).mean()
    df['upper_band'] = df['middle_band'] + 1.96 * df['Close'].rolling(window=20).std()
    df['lower_band'] = df['middle_band'] - 1.96 * df['Close'].rolling(window=20).std()
    # df.to_csv(PATH + ticker + '.csv')
    return df

### Ichimokuk data to dataframe

In [39]:
def add_Ichimoku(df):
    # Conversion
    hi_val = df['High'].rolling(window=9).max()
    low_val = df['Low'].rolling(window=9).min()
    df['Conversion'] = (hi_val + low_val) / 2

    # Baseline
    hi_val2 = df['High'].rolling(window=26).max()
    low_val2 = df['Low'].rolling(window=26).min()
    df['Baseline'] = (hi_val2 + low_val2) / 2

    # Spans
    df['SpanA'] = ((df['Conversion'] + df['Baseline']) / 2).shift(26)
    hi_val3 = df['High'].rolling(window=52).max()
    low_val3 = df['Low'].rolling(window=52).min()
    df['SpanB'] = ((hi_val3 + low_val3) / 2).shift(26)
    df['Lagging'] = df['Close'].shift(-26)

    return df

### Add Daily, Cumulative Bollinger Bands and Ichimoku to dataframes

In [29]:
# Try for just one stock to make sure it works 
# try:
#     print("Working on :", "A")
#     new_df = get_stock_df_from_csv("A")
#     new_df = add_daily_return_to_df(new_df)
#     new_df = add_cum_return_to_df(new_df)
#     new_df = add_bollinger_bands(new_df)
#     new_df = add_ichimoku(new_df)
#     new_df.to_csv(PATH + 'A' + '.csv')

# except Exception as ex:
#     print(ex)

### Add these columns to all dataframes (commentated out becuause we don't want to run it again)

In [40]:
count = 0
for ticker in tickers:
    count += 1
    try:
        print("Working on :", ticker)
        new_df = get_stock_df_from_csv(ticker)
        new_df = add_daily_return_to_df(new_df)
        new_df = add_cum_return_to_df(new_df)
        new_df = add_bollinger_bands(new_df)
        new_df = add_Ichimoku(new_df)
        new_df.to_csv(PATH + ticker + '.csv')

    except Exception as ex:
        print(ex)
    
    print(f'{count}/{len(tickers)}')

Working on : A
1/3261
Working on : AA
2/3261
Working on : AAL
3/3261
Working on : AAME
4/3261
Working on : AAN
5/3261
Working on : AAOI
6/3261
Working on : AAON
7/3261
Working on : AAP
8/3261
Working on : AAPL
9/3261
Working on : AAT
10/3261
Working on : AAWW
11/3261
Working on : AAXN
12/3261
Working on : ABBV
13/3261
Working on : ABC
14/3261
Working on : ABCB
15/3261
Working on : ABEO
16/3261
Working on : ABG
17/3261
Working on : ABIO
18/3261
Working on : ABM
19/3261
Working on : ABMD
20/3261
Working on : ABR
21/3261
Working on : ABT
22/3261
Working on : ABTX
23/3261
Working on : AC
24/3261
Working on : ACA
25/3261
Working on : ACAD
26/3261
Working on : ACBI
27/3261
Working on : ACC
28/3261
Working on : ACCO
29/3261
Working on : ACER
30/3261
Working on : ACGL
31/3261
Working on : ACHC
32/3261
Working on : ACHV
33/3261
Working on : ACIA
34/3261
Working on : ACIW
35/3261
Working on : ACLS
36/3261
Working on : ACM
37/3261
Working on : ACMR
38/3261
Working on : ACN
39/3261
Working on : AC

### Plot with Bollinger Bands

In [41]:
def plot_with_boll_bands(df, ticker):
    fig = go.Figure()

    candle = go.Candlestick(x=df.index, open=df['Open'], high=df['High'], 
    low=df['Low'], close=df['Close'], name='Candlestick')

    upper_line = go.Scatter(x=df.index, y=df['upper_band'], 
    line=dict(color='rgba(250, 0, 0, 0.75)', 
    width=1), name='Upper Band')

    mid_line = go.Scatter(x=df.index, y=df['middle_band'], 
    line=dict(color='rgba(0, 0, 250, 0.75)', 
    width=1), name='Middle Band')

    lower_line = go.Scatter(x=df.index, y=df['lower_band'], 
    line=dict(color='rgba(0, 250, 0, 0.75)', 
    width=1), name='Lower Band')

    fig.add_trace(candle)
    fig.add_trace(upper_line)
    fig.add_trace(mid_line)
    fig.add_trace(lower_line)

    fig.update_xaxes(title="Date", rangeslider_visible=True)
    fig.update_yaxes(title="Price")

    fig.update_layout(title=ticker + " Bollinger Bands", 
    height=800, width=1700, showlegend=True)
    fig.show()


    



    

### Plot Ichimoku

In [42]:
def get_fill_color(label):
    if label >= 1:
        return 'rgba(0, 250, 0, 0.4)'
    else:
        return 'rgba(250, 0, 0, 0.4)'

In [43]:
def get_ichimoku(df):
    candle = go.Candlestick(x=df.index, open=df['Open'], high=df['High'], 
    low=df['Low'], close=df['Close'], name='Candlestick')

    df1 = df.copy()
    fig = go.Figure()
    df['label']= np.where(df['SpanA'] > df['SpanB'], 1, 0)
    df['group'] = df['label'].ne(df['label'].shift()).cumsum()

    df = df.groupby('group')

    dfs = []
    for name, data in df:
        dfs.append(data)

    for df in dfs:
        fig.add_traces(go.Scatter(x=df.index, y=df.SpanA, 
        line=dict(color='rgba(0,0,0,0)')))

        fig.add_traces(go.Scatter(x=df.index, y=df.SpanB, 
        line=dict(color='rgba(0,0,0,0)'), 
        fill='tonexty', fillcolor=get_fill_color(df['label'].iloc[0])))

    baseline = go.Scatter(x=df1.index, y=df1['Baseline'], 
    line=dict(color='pink', width=2), name='Baseline')

    conversion = go.Scatter(x=df1.index, y=df1['Conversion'], 
    line=dict(color='black', width=1), name='Conversion')

    lagging = go.Scatter(x=df1.index, y=df1['Lagging'], 
    line=dict(color='purple', width=2), name='Lagging')

    span_a = go.Scatter(x=df1.index, y=df1['SpanA'], 
    line=dict(color='green', width=2, dash='dot'), name='Span A')

    span_b = go.Scatter(x=df1.index, y=df1['SpanB'], 
    line=dict(color='red', width=1, dash='dot'), name='Span B')
    
    fig.add_trace(candle)
    fig.add_trace(baseline)
    fig.add_trace(conversion)
    fig.add_trace(lagging)
    fig.add_trace(span_a)
    fig.add_trace(span_b)
    fig.update_layout(height=800, width=1700, showlegend=True)
    fig.show()



In [46]:
def get_Ichimoku(df):

    candle = go.Candlestick(x=df.index, open=df['Open'],
    high=df['High'], low=df["Low"], close=df['Close'], name="Candlestick")

    df1 = df.copy()
    fig = go.Figure()
    df['label'] = np.where(df['SpanA'] > df['SpanB'], 1, 0)
    df['group'] = df['label'].ne(df['label'].shift()).cumsum()

    df = df.groupby('group')

    dfs = []
    for name, data in df:
        dfs.append(data)

    for df in dfs:
        fig.add_traces(go.Scatter(x=df.index, y=df.SpanA,
        line=dict(color='rgba(0,0,0,0)')))

        fig.add_traces(go.Scatter(x=df.index, y=df.SpanB,
        line=dict(color='rgba(0,0,0,0)'),
        fill='tonexty',
        fillcolor=get_fill_color(df['label'].iloc[0])))

    baseline = go.Scatter(x=df1.index, y=df1['Baseline'], 
    line=dict(color='pink', width=2), name="Baseline")

    conversion = go.Scatter(x=df1.index, y=df1['Conversion'], 
    line=dict(color='black', width=1), name="Conversion")

    lagging = go.Scatter(x=df1.index, y=df1['Lagging'], 
    line=dict(color='purple', width=2), name="Lagging")

    span_a = go.Scatter(x=df1.index, y=df1['SpanA'], 
    line=dict(color='green', width=2, dash='dot'), name="Span A")

    span_b = go.Scatter(x=df1.index, y=df1['SpanB'], 
    line=dict(color='red', width=1, dash='dot'), name="Span B")

    fig.add_trace(candle)
    fig.add_trace(baseline)
    fig.add_trace(conversion)
    fig.add_trace(lagging)
    fig.add_trace(span_a)
    fig.add_trace(span_b)
    
    fig.update_layout(height=800, width=1700, showlegend=True)

    fig.show()

### Plots

In [47]:
test_df = get_stock_df_from_csv("AMD")
get_Ichimoku(test_df)