<a href="https://colab.research.google.com/github/graphtrek/stockforecast/blob/main/graphtrek_20.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# install necessary modules
!pip install yfinance
!pip install plotly==5.5.0
!pip install ta
!pip install mplfinance

Collecting yfinance
  Downloading yfinance-0.1.68-py2.py3-none-any.whl (26 kB)
Collecting requests>=2.26
  Downloading requests-2.26.0-py2.py3-none-any.whl (62 kB)
[K     |████████████████████████████████| 62 kB 655 kB/s 
[?25hCollecting lxml>=4.5.1
  Downloading lxml-4.7.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (6.4 MB)
[K     |████████████████████████████████| 6.4 MB 21.3 MB/s 
Installing collected packages: requests, lxml, yfinance
  Attempting uninstall: requests
    Found existing installation: requests 2.23.0
    Uninstalling requests-2.23.0:
      Successfully uninstalled requests-2.23.0
  Attempting uninstall: lxml
    Found existing installation: lxml 4.2.6
    Uninstalling lxml-4.2.6:
      Successfully uninstalled lxml-4.2.6
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
google-colab 1.0.0 requires re

In [2]:
import pandas as pd
import yfinance as yf

from datetime import datetime, timedelta

import plotly.graph_objects as go
from plotly.subplots import make_subplots

import requests
from bs4 import BeautifulSoup
import numpy as np
import math
from mplfinance.original_flavor import candlestick_ohlc
import matplotlib.dates as mpl_dates
import matplotlib.pyplot as plt


from ta.trend import MACD
from ta.momentum import StochasticOscillator
from ta.momentum import RSIIndicator


from tabulate import tabulate
from pathlib import Path

In [3]:
# method 1: fractal candlestick pattern

def get_stock_price(ticker, from_date):
  #df = yf.download(ticker.ticker, start='2020-01-01')
  #df = df.rename(columns={"Close": "Close1", "Adj Close": "Close"})
  
  #ticker = yf.Ticker(symbol)

  df = ticker.history(start=from_date, interval="1d")
  #print(df.info())
  df['Date'] = pd.to_datetime(df.index)
  df['Date'] = df['Date'].apply(mpl_dates.date2num)
  #df = df.loc[:,['Date', 'Open', 'High', 'Low', 'Close']]
  df['MA21'] = df['Close'].ewm(span=21, adjust=False).mean()
  df['MA50'] = df['Close'].rolling(window=50).mean()
  df['MA100'] = df['Close'].rolling(window=100).mean()
  df['MA200'] = df['Close'].rolling(window=200).mean()
  return df

def is_support(df,i):
  cond1 = df['Low'][i] < df['Low'][i-1] 
  cond2 = df['Low'][i] < df['Low'][i+1] 
  cond3 = df['Low'][i+1] < df['Low'][i+2] 
  cond4 = df['Low'][i-1] < df['Low'][i-2]
  return (cond1 and cond2 and cond3 and cond4)

def is_resistance(df,i):
  cond1 = df['High'][i] > df['High'][i-1] 
  cond2 = df['High'][i] > df['High'][i+1] 
  cond3 = df['High'][i+1] > df['High'][i+2] 
  cond4 = df['High'][i-1] > df['High'][i-2]
  return (cond1 and cond2 and cond3 and cond4)

def is_far_from_level(value, levels, df):
    ave =  np.mean(df['High'] - df['Low'])
    return np.sum([abs(value - level) < ave for level in levels]) == 0

def plot_all(levels, df):
    fig, ax = plt.subplots(figsize=(16, 9), dpi=300)
    candlestick_ohlc(ax,df.values,width=0.6, colorup='green', colordown='red', alpha=0.8)
    date_format = mpl_dates.DateFormatter('%d %b %Y')
    ax.xaxis.set_major_formatter(date_format)
    for level in levels:
        plt.hlines(level[1], xmin=df['Date'][level[0]], xmax=max(df['Date']), colors='blue', linestyle='--')
    fig.show()

def findNearestGreaterThan(searchVal, inputData):
    diff = inputData - searchVal
    diff[diff<0] = np.inf
    idx = diff.argmin()
    return inputData[idx]


def findNearestLessThan(searchVal, inputData):
    diff = inputData - searchVal
    diff[diff>0] = -np.inf
    idx = diff.argmax()
    return inputData[idx]

def indicators(chart_df):
  # MACD
  macd = MACD(close=chart_df['Close'], 
            window_slow=26,
            window_fast=12, 
            window_sign=9)
  # stochastics
  stoch = StochasticOscillator(high=chart_df['High'],
                             close=chart_df['Close'],
                             low=chart_df['Low'],
                             window=14, 
                             smooth_window=3)

  rsi = RSIIndicator(close=chart_df['Close'], window=14)
  return macd, stoch, rsi

In [4]:
def calculate_levels(chart_df):
  levels = []
  low = 0
  high = np.round(chart_df['Close'].max(),1)
  for i in range(2,len(chart_df)-2):
    if is_support(chart_df,i):
      low = chart_df['Low'][i]
    if is_far_from_level(low, levels, chart_df):
      levels.append(low)
    elif is_resistance(chart_df,i):
      high = chart_df['High'][i]
    if is_far_from_level(high, levels, chart_df):
      levels.append(high)
  levels = sorted(levels, reverse=True)

  last_day_df = chart_df[-1:]
  close_price = np.round(last_day_df['Close'][0],1)

  min_level = np.round(findNearestLessThan(close_price,levels),1)
  if(min_level > close_price):
    min_level = np.round(close_price * 0.8,1)

  max_level = np.round(findNearestGreaterThan(close_price,levels),1)
  if(max_level < close_price):
    max_level = np.round(close_price * 1.2,1)

#  print('close_price',close_price,'min_level:',min_level,'max_level:', max_level)
  return levels, min_level, max_level

In [43]:
def options_chain(ticker):

    #tk = yf.Ticker(symbol)
    # Expiration dates
    exps = ticker.options

    # Get options for each expiration
    options = pd.DataFrame()
    for e in exps:
        opt = ticker.option_chain(e)
        opt = pd.DataFrame().append(opt.calls).append(opt.puts)
        opt['expirationDate'] = e
        options = options.append(opt, ignore_index=True)

    # Bizarre error in yfinance that gives the wrong expiration date
    # Add 1 day to get the correct expiration date
    options['expirationDate'] = pd.to_datetime(options['expirationDate'])
    options.insert(0,'dte',(options['expirationDate'] - datetime.today()).dt.days + 1)
    options['expirationDate'] = options['expirationDate'].dt.date
    # Boolean column if the option is a CALL x : True if (x > 10 and x < 20) else False
    options.insert(1,'CALL',options['contractSymbol'].str[4:].apply(lambda x: "C" in x))
    
    options[['bid', 
             'ask', 
             'strike', 
             'lastPrice', 
             'volume',
             'change',
             'percentChange',
             'openInterest',
             'impliedVolatility']] = options[[
                                   'bid', 
                                   'ask', 
                                   'strike',
                                   'lastPrice',
                                   'volume',
                                   'change',
                                   'percentChange',
                                   'openInterest',
                                   'impliedVolatility']].apply(pd.to_numeric)
    
    options['spread%'] = np.round(100 - ((options['bid'] / options['ask']) * 100),1) # Calculate the midpoint of the bid-ask
    
    # Drop unnecessary and meaningless columns
    #options = options.drop(columns = ['contractSize', 'currency', 'change', 'percentChange', 'lastTradeDate', 'lastPrice'])

    return options

In [33]:
def find_level_optionInterests(ticker,close_price,min_level,max_level, dte_min, dte_max):
  options_df = options_chain(ticker)
  options_df['impliedVolatility'] = np.round(options_df['impliedVolatility'],2)
  options_df['percentChange'] = np.round(options_df['percentChange'],2)
  #expirationDates = options_df['expirationDate'].unique()
  #print(sorted(expirationDates))

  #PUT_options_df = pd.DataFrame()
  #CALL_options_df = pd.DataFrame()

  #for key, value in options_df.items():
  #  date = key
  #  rsi = float(value.get('RSI'))
  #  rsi_data.append([date,rsi])
#  print('PUT OPTIONS', 'CLOSE PRICE:',close_price, 'SUPPORT -15%:', np.round(min_level * 0.85,2), 'RESISTANCE +15%:', np.round(max_level * 1.15,2))
  PUT_options_df = options_df.query('CALL == False and strike>' + str(min_level * 0.85) + ' and strike<' + str(max_level * 1.15) + ' and dte>' + str(dte_min) + ' and dte<' + str(dte_max))
  put_max_openInterest_index = PUT_options_df["openInterest"].idxmax()
  put_max_volume_index = PUT_options_df["volume"].idxmax()
  PUT_options_to_return_df = PUT_options_df.loc[put_max_openInterest_index:put_max_openInterest_index]
  PUT_options_to_return_df = PUT_options_to_return_df.append(PUT_options_df.loc[put_max_volume_index:put_max_volume_index])
  PUT_options_to_return_df = PUT_options_to_return_df.drop(columns = ['contractSize', 'currency','change','percentChange', 'lastTradeDate', 'lastPrice', 'inTheMoney','contractSymbol']) 
  
#  print(tabulate(PUT_options_to_return_df, headers = 'keys', tablefmt = 'psql'))

#  print('CALL OPTIONS', 'CLOSE PRICE:',close_price, 'SUPPORT -15%:', np.round(min_level * 0.85,2), 'RESISTANCE +15%:', np.round(max_level * 1.15,2))
  CALL_options_df = options_df.query('CALL == True and strike>' + str(min_level * 0.85) + ' and strike<' + str(max_level * 1.15) + ' and dte>' + str(dte_min) + ' and dte<' + str(dte_max))
  call_max_openInterest_index = CALL_options_df["openInterest"].idxmax()
  call_max_volume_index = CALL_options_df["volume"].idxmax()
  CALL_options_to_return_df = CALL_options_df.loc[call_max_openInterest_index:call_max_openInterest_index]
  CALL_options_to_return_df = CALL_options_to_return_df.append(CALL_options_df.loc[call_max_volume_index:call_max_volume_index])
  CALL_options_to_return_df = CALL_options_to_return_df.drop(columns = ['contractSize', 'currency', 'change','percentChange', 'lastTradeDate', 'lastPrice', 'inTheMoney','contractSymbol']) 
#  print(call_max_openInterest_index,tabulate(CALL_options_to_return_df, headers = 'keys', tablefmt = 'psql'))
  return PUT_options_to_return_df, CALL_options_to_return_df

In [30]:
def get_chart(ticker, df, days):
  last_day_df = df[-1:]
  last_date = last_day_df['Date'].index[0].date()
  close_price = np.round(last_day_df['Close'][0],1)

  ath = np.round(df['Close'].max(),1)
  discount = np.round(ath - close_price,1)
  discount_percent = np.round((discount / close_price) * 100, 1)
  
  chart_df = df.tail(days)

  macd, soch, rsi = indicators(chart_df)
  levels, min_level, max_level = calculate_levels(chart_df)
  options_df = pd.DataFrame()
  near_PUT_options_df, near_CALL_options_df = find_level_optionInterests(ticker,close_price,min_level,max_level, -1, 33)
  far_PUT_options_df, far_CALL_options_df = find_level_optionInterests(ticker,close_price,min_level,max_level, 33, 333)

  options_df = options_df.append(near_PUT_options_df)
  options_df = options_df.append(far_PUT_options_df)
  options_df = options_df.append(near_CALL_options_df)
  options_df = options_df.append(far_CALL_options_df)
  options_df = options_df.sort_values(by=['dte'])

  min_level_discount = np.round(min_level - close_price)
  min_level_discount_percent = np.round((min_level_discount / close_price) * 100, 1)
  
  min_level_0_85 = np.round(min_level * 0.85,2)
  min_level_0_85_discount = np.round(min_level_0_85 - close_price)
  min_level_0_85_discount_percent =  np.round((min_level_0_85_discount / close_price) * 100, 1)

  max_level_discount = np.round(max_level - close_price)
  max_level_discount_percent = np.round((max_level_discount / close_price) * 100, 1)
  
  max_level_1_15 = np.round(max_level * 1.15,2)
  max_level_1_15_discount = np.round(max_level_1_15 - close_price)
  max_level_1_15_discount_percent = np.round((max_level_1_15_discount / close_price) * 100, 1)

  tradingview_link = '<a href="https://in.tradingview.com/chart/66XmQfYy/?symbol=' + ticker.ticker +'">' + ticker.ticker +'</a> '
  seeking_alpha_link = '<a href="https://seekingalpha.com/symbol/'+ ticker.ticker +'"> Seeking Alpha </a> '
  google_news_link = '<a href="https://news.google.com/search?for=' + ticker.ticker + '+stock when:7d&hl=en-US&gl=US&ceid=US%3Aen"> Google News </a> '
  twitter_link = '<a href="https://twitter.com/search?q=$' + ticker.ticker + '%20stock&src=typed_query&f=live"> Twitter </a> '

  title = '<b>' + tradingview_link + '</b>' + ' <b>Date:</b>' + str(last_date) + ' <b>Close Price:</b>' + str(close_price) + ' ' + seeking_alpha_link + google_news_link + twitter_link
  title += '<br>'
  title += '<b>Support:</b>' + str(min_level) + ' ('+str(min_level_discount_percent)+'%) ' 
  title += '<b>Close Price:</b>' + str(close_price) + ' ' 
  title += '<b>Resistance:</b>' + str(max_level) + ' ('+str(max_level_discount_percent)+'%) '
  title += '<b>ATH:</b>' + str(ath) + ' <b>Discount:</b>' + str(discount) + ' (' + str(discount_percent) + '%)'
  title += '<br>' 
  title += '<b>Support -15%:</b>' + str(min_level_0_85) + ' (' + str(min_level_0_85_discount_percent) + '%) '
  title += '<b>Resistance +15%:</b>' + str(max_level_1_15) + ' (' + str(max_level_1_15_discount_percent) +  '%) '
  
  # add subplot properties when initiliazing fig variable
  fig = make_subplots(rows=5, cols=1, shared_xaxes=True,
                    vertical_spacing=0.01, 
                    row_heights=[0.26,0.34,0.1,0.15,0.15],
                    subplot_titles=[title],
                    specs=[
                           [{"type": "table"}],
                           [{"type": "candlestick"}],
                           [{"type": "bar"}],
                           [{"type": "scatter"}],
                           [{"type": "scatter"}]
                           ])

  fig.update_layout(
      height=900, width=1200, 
      showlegend=True,
      dragmode= 'pan', 
      margin=go.layout.Margin(
          l=20, #left margin
          r=20, #right margin
          b=20, #bottom margin
          t=100  #top margin
      ))


  fig.add_trace(
      go.Table(
        header=dict(values=list(options_df.columns),
                fill_color='paleturquoise',
                font=dict(color='black', size=12),
                align='left'),
        cells=dict(values=options_df.transpose().values.tolist(),
               fill_color='lavender',
               align='left')
        ),row=1, col=1)
  
  # Plot OHLC on 1st subplot (using the codes from before)
  fig.add_trace(go.Candlestick(x=chart_df.index,
                             open=chart_df['Open'],
                             high=chart_df['High'],
                             low=chart_df['Low'],
                             close=chart_df['Close'], 
                             name=ticker.ticker,
                             showlegend=True), row=2, col=1)
  

  # add moving average traces
  fig.add_trace(go.Scatter(x=chart_df.index, 
                         y=chart_df['MA21'], 
                         line=dict(color='green', width=2), 
                         name='MA 21'), row=2, col=1)
  fig.add_trace(go.Scatter(x=chart_df.index, 
                         y=chart_df['MA50'], 
                         line=dict(color='blue', width=2), 
                         name='MA 50'), row=2, col=1)
  fig.add_trace(go.Scatter(x=chart_df.index, 
                         y=chart_df['MA100'], 
                         line=dict(color='orange', width=2), 
                         name='MA 100'), row=2, col=1)
  fig.add_trace(go.Scatter(x=chart_df.index, 
                         y=chart_df['MA200'], 
                         line=dict(color='red', width=2), 
                         name='MA 200'), row=2, col=1)


  start_date = "2021-06-01"
  end_date = "2022-01-31"
  zoom_df = chart_df.iloc[chart_df.index >= start_date]
  y_zoom_max = zoom_df["High"].max()
  y_zoom_min = zoom_df["Low"].min()

  ath_percent = 0
  
  for idx, level in  enumerate(levels):
      percent = 0
      if idx == 0:
        ath = level
      current_level = level
      if idx > 0:
        prev_level = levels[idx-1]
        diff = prev_level - current_level
        ath_diff = ath - current_level
        percent = (diff / current_level) * 100
        ath_percent =  (ath_diff / current_level) * 100
      if level <= (min_level * 0.85) or level >= (max_level * 1.15):
        line_color = 'rgba(100, 10, 100, 0.2)'
      else:
        line_color = 'rgba(200, 20, 200, 1)'
      fig.add_trace(go.Scatter(
          x = [chart_df.index.min(), chart_df.index.max()],
          y = [level, level],
          mode="lines+text",
          name="Lines and Text",
          showlegend=False,
          text=['','$' + str(np.round(current_level,1)) + ' (' + str(np.round(percent,1)) + '% disc:' + str(np.round(ath_percent,1))+ '%)',''],
          textposition="top right",
          line = dict(shape = 'linear', color = line_color, dash = 'dash', width=1)
        ), row=2, col=1)


  # Plot volume trace on 3d row 
  colors = ['green' if row['Open'] - row['Close'] >= 0 
            else 'red' for index, row in chart_df.iterrows()]
  fig.add_trace(go.Bar(x=chart_df.index, 
                      y=chart_df['Volume'],
                      marker_color=colors,
                      name='Volume'
                      ), row=3, col=1)


  fig.add_trace(go.Scatter(x=chart_df.index,
                          y=rsi.rsi(),
                          line=dict(color='black', width=2),
                          name='RSI(14)'
                          ), row=4, col=1)

  fig.add_trace(go.Scatter(
          x = [np.min(chart_df.index), np.max(chart_df.index)],
          y = [70, 70],
          mode = "lines",
          line = dict(shape = 'linear', color = 'rgb(100, 10, 100)', dash = 'dash'),
          name = 'RSI(14) over bought'
      ),row=4, col=1)

  fig.add_trace(go.Scatter(
          x = [np.min(chart_df.index), np.max(chart_df.index)],
          y = [50, 50],
          mode = "lines",
          line = dict(shape = 'linear', color = 'rgb(10, 12, 240)', dash = 'dash'),
          name='RSI(14) Neutral'
      ),row=4, col=1)

  fig.add_trace(go.Scatter(
          x = [np.min(chart_df.index), np.max(chart_df.index)],
          y = [30, 30],
          mode = "lines",
          line = dict(shape = 'linear', color = 'rgb(10, 120, 24)', dash = 'dash'),
          name='RSI(14) over sold'
      ),row=4, col=1)

  # Plot MACD trace on 3rd row
  colors = ['green' if val >= 0 
            else '#FF5733' for val in macd.macd_diff()]
  fig.add_trace(go.Bar(x=chart_df.index, 
                      y=macd.macd_diff(),
                      marker_color=colors,
                      name='MACD diff'
                      ), row=5, col=1)
  fig.add_trace(go.Scatter(x=chart_df.index,
                          y=macd.macd(),
                          line=dict(color='orange', width=2),
                          name='MACD'
                          ), row=5, col=1)
  fig.add_trace(go.Scatter(x=chart_df.index,
                          y=macd.macd_signal(),
                          line=dict(color='blue', width=1),
                          name='MACD signal'
                          ), row=5, col=1)


  fig.update_xaxes(type="date", range=[start_date, end_date])
  fig.update_yaxes(range=[y_zoom_min,y_zoom_max], row=2, col=1)

#  for idx, level in  enumerate(levels):
#    fig.add_hline(level,row=2, col=1)

  fig.update_layout(xaxis_rangeslider_visible=False)
  # removing all empty dates
  # build complete timeline from start date to end date
  dt_all = pd.date_range(start=chart_df.index[0],end=chart_df.index[-1])
  # retrieve the dates that ARE in the original datset
  dt_obs = [d.strftime("%Y-%m-%d") for d in pd.to_datetime(chart_df.index)]
  # define dates with missing values
  dt_breaks = [d for d in dt_all.strftime("%Y-%m-%d").tolist() if not d in dt_obs]

  fig.update_layout(xaxis_rangebreaks=[dict(values=dt_breaks)])
  fig.update_yaxes(showspikes=True, spikemode='across', spikesnap='cursor',spikedash='dash')
  fig.update_xaxes(showspikes=True, spikemode='across', spikesnap='cursor', spikedash='dash')
  config = dict({'scrollZoom': True})

  folder='/content/drive/MyDrive/models/charts/'+ str(last_date)
  Path(folder).mkdir(parents=True, exist_ok=True)
  #print('chart folder:', folder)
  fig.write_html(folder + '/' + ticker.ticker + '.html') 
  return fig

In [49]:
ticker = yf.Ticker('SPY')
df = get_stock_price(ticker,"2019-01-01")
fig = get_chart(ticker, df, 365)
fig.show()


divide by zero encountered in double_scalars


divide by zero encountered in double_scalars



In [9]:
def save_charts(symbols):
  for symbol in symbols:
    ticker = yf.Ticker(symbol)
    df = get_stock_price(ticker,"2019-01-01")
    print(ticker.ticker,df.size)
    get_chart(ticker, df, 365)
  return None

In [47]:
symbols = ['AAPL','AMD','AMZN','ABBV','ARKG','ARKK','ARKQ','ATVI',
           'AFRM','BA','CAT','CCL','CHPT','CHWY','COIN','CRM','DDOG',
           'DIA','DIS','DKNG','DOCU','EA','ETSY','FB','GOOGL','HOG']

symbols1 = ['HOOD','HUT','IWM','JAZZ','JETS','LVS','MA','MP',
           'MRNA','MSFT','MSTR','MU','NCLH','NFLX','NKE','NNDM','NVDA']

symbols2 = ['PFE','PINS','PLTR','PYPL','QQQ','RBLX','SPY',
           'ROKU','SBUX','SHOP','SNAP','SOFI','SOXL','SOXX','SPY','SQ']

symbols3 = ['TDOC','TEN','TGT','TLT','TSLA','TTD','TWTR','UAA',
           'V','VTI','WBA','VALE','WMT','WYNN','XLE','XLF','XLNX','ZM']

symbols4 = ['COST','RIVN','U','PENN','QCOM','RKLB','LCID','ASTR','BNGO','AAL']

#save_charts(symbols)
#save_charts(symbols1)
#save_charts(symbols2)
#save_charts(symbols3)
#save_charts(symbols4)

AAPL 9084



divide by zero encountered in double_scalars


divide by zero encountered in double_scalars



AMD 9084



divide by zero encountered in double_scalars


divide by zero encountered in double_scalars



AMZN 9084



divide by zero encountered in double_scalars


divide by zero encountered in double_scalars



ABBV 9084
ARKG 9084


KeyError: ignored