# Stock market technical analysis

## Downloads all Stock Data from a Given Index for 5 Years

In [1]:
# Provides ways to work with large multidimensional arrays
import numpy as np 
# Allows for further data manipulation and analysis
import pandas as pd 
import matplotlib.pyplot as plt # Plotting
import matplotlib.dates as mdates # Styling dates
%matplotlib inline

# pip install numpy
# conda install -c anaconda pandas
# conda install -c conda-forge matplotlib

import datetime as dt # For defining dates
from dateutil.relativedelta import relativedelta
import time

# In Powershell Prompt : conda install -c conda-forge multitasking
# pip install -i https://pypi.anaconda.org/ranaroussi/simple yfinance

import yfinance as yf

# To show all your output File -> Preferences -> Settings Search for Notebook
# Notebook Output Text Line Limit and set to 100

# Used for file handling like deleting files
import os
import shutil

# conda install -c conda-forge cufflinks-py
# conda install -c plotly plotly
import cufflinks as cf
import plotly.express as px
import plotly.graph_objects as go

import plotly.io as pio
# This ensures Plotly output works in multiple places:
# plotly_mimetype: VS Code notebook UI
# notebook: "Jupyter: Export to HTML" command in VS Code
# See https://plotly.com/python/renderers/#multiple-renderers
pio.renderers.default = "plotly_mimetype+notebook"

# Make Plotly work in your Jupyter Notebook
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
# Use Plotly locally
cf.go_offline()

from plotly.subplots import make_subplots

# New Imports
# Used to get data from a directory
import os
from os import listdir
from os.path import isfile, join

from IPython.display import Markdown as md
from IPython.display import display

from multiprocessing.pool import ThreadPool
from functools import partial

import warnings
warnings.simplefilter("ignore")

## Constants

In [2]:
PATH = "../PythonFinance/Australia/"
STOCK_SECTORS = "./stock_sectors_au.csv"

# Start end date defaults
# S_DATE = (dt.datetime.today() - relativedelta(years=5)).strftime("%Y-%m-%d")
# E_DATE = dt.datetime.today().strftime("%Y-%m-%d")
# S_DATE_DT = pd.to_datetime(S_DATE)
# E_DATE_DT = pd.to_datetime(E_DATE)
THREADS = 8

## Holds Stocks Not Downloaded

In [3]:
stocks_not_downloaded = []
missing_stocks = []

## Function that Returns a Stock Dataframe from a CSV

In [4]:
# Reads a dataframe from the CSV file, changes index to date and returns it
def get_stock_df_from_csv(ticker):
    # Try to get the file and if it doesn't exist issue a warning
    try:
        df = pd.read_csv(PATH + ticker + '.csv', index_col=0)
    except FileNotFoundError:
        print("File Doesn't Exist")
    else:
        return df

## Returns a Named Columns Data from a CSV

In [5]:
def get_column_from_csv(file, col_name):
    # Try to get the file and if it doesn't exist issue a warning
    try:
        df = pd.read_csv(file)
    except FileNotFoundError:
        print("File Doesn't Exist")
    else:
        return df[col_name]

## Get Stock Tickers

In [6]:
tickers = get_column_from_csv(STOCK_SECTORS, "Symbol")
print(f"Number of stocks: {len(tickers)}")

Number of stocks: 2210


## Function that Saves Stock Data to CSV

In [7]:
# Function that gets a dataframe by providing a ticker and starting date
def save_to_csv_from_yahoo(folder, ticker):
    
    stock = yf.Ticker(ticker)
    
    try:
        # print(f"Downloading: {ticker}")
        # Get historical closing price data
        df = stock.history(period="5y")
           
        if df.empty:
            stocks_not_downloaded.append(ticker)
        
        # Remove the period for saving the file name
        # Save data to a CSV file
        # File to save to 
        the_file = folder + ticker.replace(".", "_") + '.csv'
        # print(f"Saved: {the_file}")
        df.to_csv(the_file)
    except Exception as ex:
        stocks_not_downloaded.append(ticker)
        print(f"Couldn't Get Data for: {ticker}")

## Delete old data

In [8]:
folder = PATH
for filename in os.scandir(folder):
    try:
        if os.path.isfile(filename) or os.path.islink(filename):
            os.unlink(filename)
        elif os.path.isdir(filename):
            shutil.rmtree(filename)
    except Exception as e:
        print('Failed to delete %s. Reason: %s' % (filename, e))

## Get 5 Years of Data for Stocks

In [9]:
folder = PATH

save_csv = partial(save_to_csv_from_yahoo, folder)
with ThreadPool(THREADS) as p:
  p.map(save_csv, tickers)
print("Finished")

- AHY.AX: No data found, symbol may be delisted- CDH.AX: No data found, symbol may be delisted

- 5GN.AX: No data found, symbol may be delisted
- CDV.AX: No data found, symbol may be delisted
- CDY.AX: No data found, symbol may be delisted
- ABW.AX: No data found, symbol may be delisted
- BCL.AX: No data found, symbol may be delisted
- ANA.AX: No data found, symbol may be delisted
- 8EC.AX: No data found, symbol may be delisted
- BD1.AX: No data found, symbol may be delisted
- CFE.AX: No data found, symbol may be delisted
- CP1.AX: No data found, symbol may be delisted
- BDA.AX: No data found, symbol may be delisted
- BSM.AX: No data found, symbol may be delisted
- BDI.AX: No data found, symbol may be delisted
- ABL.AX: No data found, symbol may be delisted
- ABT.AX: No data found, symbol may be delisted
- AU8.AX: No data found, symbol may be delisted
- CGL.AX: No data found, symbol may be delisted
- CGM.AX: No data found, symbol may be delisted
- AGM.AX: No data found, symbol may be d

## Delete Empty Files

In [10]:
try:
  for x in missing_stocks:
    os.remove(PATH + x + ".csv")
except FileNotFoundError:
  print("Couldn't Find " + x)
print("Finished")

Finished


## Daily Returns

For single stocks to find the daily return we subtract opening price from the closing price. Then you could multiply by the number of shares owned.

We calculate a percentage rate of return for each day to compare investments.
Simple Rate of Return = (End Price - Beginning Price) / Beginning Price OR (EP / BP) - 1

In [11]:
# Shift provides the value from the previous day
# NaN is displayed because there was no previous day price for the 1st calculation
def add_daily_return_to_df(df, ticker):
    df['daily_return'] = (df['Close'] / df['Close'].shift(1)) - 1
    # Save data to a CSV file
    df.to_csv(PATH + ticker + '.csv')
    return df  

## Get Cumulative Return

In [12]:
def add_cum_return_to_df(df, ticker):
    df['cum_return'] = (1 + df['daily_return']).cumprod()
    df.to_csv(PATH + ticker + '.csv')
    return df

## Get Stock File Names in a List

In [13]:
files = [x for x in listdir(PATH) if isfile(join(PATH, x))]
tickers = [os.path.splitext(x)[0] for x in files]
tickers
try:
    tickers.remove('.ds_Store')
except Exception as e:
    pass
tickers.sort()

## Add Daily & Cumulative Return to All Stock Data

In [14]:
for x in tickers:
    try:
        # print("Working on :", x)
        new_df = get_stock_df_from_csv(x)
        new_df = add_daily_return_to_df(new_df, x)
        new_df = add_cum_return_to_df(new_df, x)
        new_df.to_csv(PATH + x + '.csv')
    except Exception as ex:
        print(ex)

## 11 Stock Market Sectors

We want to invest in stocks that are not correlated with other stocks. This means we want stocks that tend to go down when the others go up. Because all our stocks are expected to do well over the course of the year, this smoothes out the performance of our portfolio.

One way to easily find stocks that are not correlated is to create portfolios using stocks from different sectors of the market.

**Technology** : Manufacturing of electronics, software, or related to information technology 

**Health Care** : Biotech, hospitals, medical devices, drugs 

**Financials** : Banks, investment funds, and insurance

**Real Estate** : Residential, industrial, and retail real estate 

**Energy** : Production and supply of energy 

**Materials** : Mining, refining, chemical, and forestry 

**Consumer Discretionary** : Retailers, apparel, media, durables and services 

**Industrials** : Construction, machinery, fabrication, manufacturing, defense, and aerospace 

**Utilities** : Direct providers of electric, gas, and water 

**Consumer Staples** : Food, beverage as well as products consumers deem essential 

**Telecommunication** : Cable, internet providers, wireless, and satellite

## Get Sector Stocks

In [15]:
sec_df = pd.read_csv(STOCK_SECTORS)

indus_df = sec_df.loc[sec_df['Sector'] == "Industrials"]
health_df = sec_df.loc[sec_df['Sector'] == "Health Care"]
it_df = sec_df.loc[sec_df['Sector'] == "Information Technology"]
comm_df = sec_df.loc[sec_df['Sector'] == "Communication Services"]
staple_df = sec_df.loc[sec_df['Sector'] == "Consumer Staples"]
discretion_df = sec_df.loc[sec_df['Sector'] == "Consumer Discretionary"]
utility_df = sec_df.loc[sec_df['Sector'] == "Utilities"]
financial_df = sec_df.loc[sec_df['Sector'] == "Financials"]
material_df = sec_df.loc[sec_df['Sector'] == "Materials"]
restate_df = sec_df.loc[sec_df['Sector'] == "Real Estate"]
energy_df = sec_df.loc[sec_df['Sector'] == "Energy"]

## Returns a DF with Cumulative Return for all Stocks

In [16]:
def get_cum_ret_for_stocks(stock_df):
    tickers = []
    cum_rets = []

    for index, row in stock_df.iterrows():
        df = get_stock_df_from_csv(row['Symbol'].replace(".", "_"))
        if df is None:
            pass
        elif len(df.index) == 0:
            pass
        else:
            tickers.append(row['Symbol'].replace(".", "_"))
            cum = df['cum_return'].iloc[-1]
            cum_rets.append(cum)
    return pd.DataFrame({'Ticker':tickers, 'CUM_RET':cum_rets})

In [17]:
industrial = get_cum_ret_for_stocks(indus_df)
health_care = get_cum_ret_for_stocks(health_df)
it = get_cum_ret_for_stocks(it_df)
commun = get_cum_ret_for_stocks(comm_df)
staple = get_cum_ret_for_stocks(staple_df)
discretion = get_cum_ret_for_stocks(discretion_df)
utility = get_cum_ret_for_stocks(utility_df)
finance = get_cum_ret_for_stocks(financial_df)
material = get_cum_ret_for_stocks(material_df)
restate = get_cum_ret_for_stocks(restate_df)
energy = get_cum_ret_for_stocks(energy_df)

# Ichimoku Kinko Hyo

The Ichimoku (One Look) is considered an all in one indicator. It provides information on momentum, support and resistance. It is made up of 5 lines. If you are a short term trader you create 1 minute or 6 hour. Long term traders focus on day or weekly data.

 - Conversion Line (Tenkan-sen) : Represents support, resistance and reversals. Used to measure short term trends.
 - Baseline (Kijun-sen) : Represents support, resistance and confirms trend changes. Allows you to evaluate the strength of medium term trends. Called the baseline because it lags the price.
 - Leading Span A (Senkou A) : Used to identify future areas of support and resistance
 - Leading Span B (Senkou B) : Other line used to identify suture support and resistance
 - Lagging Span (Chikou) : Shows possible support and resistance. It is used to confirm signals obtained from other lines.
 - Cloud (Kumo) : Space between Span A and B. Represents the divergence in price evolution.
 
Formulas

 - Lagging Span = Price shifted back 26 periods
 - Base Line = (Highest Value in period + Lowest value in period)/2 (26 Sessions)
 - Conversion Line = (Highest Value in period + Lowest value in period)/2 (9 Sessions)
 - Leading Span A = (Conversion Value + Base Value)/2 (26 Sessions)
 - Leading Span B = (Conversion Value + Base Value)/2 (52 Sessions)

## Get Ichimoku Function

In [18]:
def get_fill_color(label):
    if label >= 1:
        return 'rgba(0,250,0,0.4)'
    else:
        return 'rgba(250,0,0,0.4)'

In [19]:
def get_Ichimoku(tickers):
    dataframe = yf.download(tickers=tickers, period='3mo', interval='1h', group_by="ticker", threads=THREADS)
    for ticker in tickers:
        df = dataframe[ticker].copy()
        display(md(f"#### {ticker}"))
        # Conversion
        hi_val = df['High'].rolling(window=9).max()
        low_val = df['Low'].rolling(window=9).min()
        df['Conversion'] = (hi_val + low_val) / 2

        # Baseline
        hi_val2 = df['High'].rolling(window=26).max()
        low_val2 = df['Low'].rolling(window=26).min()
        df['Baseline'] = (hi_val2 + low_val2) / 2

        # Spans
        df['SpanA'] = ((df['Conversion'] + df['Baseline']) / 2).shift(26)
        hi_val3 = df['High'].rolling(window=52).max()
        low_val3 = df['Low'].rolling(window=52).min()
        df['SpanB'] = ((hi_val3 + low_val3) / 2).shift(26)
        df['Lagging'] = df['Close'].shift(-26)

        candle = go.Candlestick(x=df.index, open=df['Open'],
        high=df['High'], low=df["Low"], close=df['Close'], name="Candlestick")

        df1 = df.copy()
        fig = go.Figure()
        df['label'] = np.where(df['SpanA'] > df['SpanB'], 1, 0)
        df['group'] = df['label'].ne(df['label'].shift()).cumsum()

        df = df.groupby('group')

        dfs = []
        for name, data in df:
            dfs.append(data)

        for df in dfs:
            fig.add_traces(go.Scatter(x=df.index, y=df.SpanA,
            line=dict(color='rgba(0,0,0,0)')))

            fig.add_traces(go.Scatter(x=df.index, y=df.SpanB,
            line=dict(color='rgba(0,0,0,0)'),
            fill='tonexty',
            fillcolor=get_fill_color(df['label'].iloc[0])))

        baseline = go.Scatter(x=df1.index, y=df1['Baseline'], 
        line=dict(color='pink', width=2), name="Baseline")

        conversion = go.Scatter(x=df1.index, y=df1['Conversion'], 
        line=dict(color='black', width=1), name="Conversion")

        lagging = go.Scatter(x=df1.index, y=df1['Lagging'], 
        line=dict(color='purple', width=2), name="Lagging")

        span_a = go.Scatter(x=df1.index, y=df1['SpanA'], 
        line=dict(color='green', width=2, dash='dot'), name="Span A")

        span_b = go.Scatter(x=df1.index, y=df1['SpanB'], 
        line=dict(color='red', width=1, dash='dot'), name="Span B")

        fig.add_trace(candle)
        fig.add_trace(baseline)
        fig.add_trace(conversion)
        fig.add_trace(lagging)
        fig.add_trace(span_a)
        fig.add_trace(span_b)
        
        fig.update_layout(height=600, width=1200, showlegend=True)

        fig.show()

## What the Lines Mean

 - Lagging Span : When above the price it is bullish and when below bearish. It is used with other indicators because it is mainly a filter.
 - Baseline : When below price this is considered support. When above price this is considered resistance. We are in an uptrend when the slope increases and vice versa. The slope of the curve tells us the strength of the trend.
 - Conversion : We focus on its position versus the Baseline. When the Conversion crosses above the Baseline we are in an upward trend and vice versa. This is considered a strong indicator when above the Cloud and weak when below.
 - Cloud : The thicker the Cloud, the stronger the trend and vice versa. When the Spans cross many times we are in a range. When they cross this is a sign of a reversal of trend.

In [20]:
def plot_Ichimoku(header, df, len):
    display(md(f"### Top {len} {header}"))
    top = [x.replace("_", ".") for x in df.head(len)["Ticker"].to_list()]
    get_Ichimoku(top)

## Analyze Best Performers with Ichimoku

In [21]:
# plot_Ichimoku("Industrial", industrial.sort_values(by=['CUM_RET'], ascending=False), 10)

In [22]:
# plot_Ichimoku("Communications", commun.sort_values(by=['CUM_RET'], ascending=False), 10)

In [23]:
# plot_Ichimoku("Staple", staple.sort_values(by=['CUM_RET'], ascending=False), 10)

In [24]:
# plot_Ichimoku("Discretionary spending", discretion.sort_values(by=['CUM_RET'], ascending=False), 10)

In [25]:
# plot_Ichimoku("Utilities", utility.sort_values(by=['CUM_RET'], ascending=False), 10)

In [26]:
plot_Ichimoku("Finance", finance.sort_values(by=['CUM_RET'], ascending=False), 5)

### Top 5 Finance

[*********************100%***********************]  5 of 5 completed


#### AEF.AX

#### HUB.AX

#### PNI.AX

#### RMC.AX

#### ASW.AX

In [27]:
# plot_Ichimoku("Materials", material.sort_values(by=['CUM_RET'], ascending=False), 10)

In [28]:
# plot_Ichimoku("Real Estate", restate.sort_values(by=['CUM_RET'], ascending=False), 10)

In [29]:
# plot_Ichimoku("Energy", energy.sort_values(by=['CUM_RET'], ascending=False), 10)