# Candlestick Image Generator for Stock Market Prediction

In [34]:
# A tutorial for this file is available at www.relataly.com

import pandas as pd 
from datetime import date, datetime
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.pyplot as plt
import pandas_datareader as pdr 
import yfinance as yf 
import os 


### 
# This generator queries historical stock market information and uses them to create a series of candlestick charts 
# The candle charts can serve as visual inputs in the development of stockmarket prediction models
# ###
    

def fetchData(symbol, start_date, end_date):
        plt.ioff()
        print("interactive mode OFF")
        
        try:
            print(f"trying to fetch stock market data for {symbol} between {start_date} and {end_date} using pandas datareader")
            prices_df = pdr.DataReader(symbol, start=start_date, end=end_date, data_source="yahoo")
        except: 
            print(f"error using pandas datareader. Trying yfinance instead")
            try:
                #Alternative package if webreader does not work: pip install yfinance
                prices_df = yf.download(symbol, start=start_date, end=end_date)
            except:
                print("error using yfinance")
        print("successfully feteched " + str(len(prices_df)) + " records")
        
        return prices_df
                
                
# iterate through a dataframe, slice it into smaller windows and add them to a list
def storeCandlesticksToDisk(save_path, symbol, x_size, y_size, df, window_size, target_distance):
        window_numbers = len(df) - window_size - target_distance
        print(f"generating {window_numbers} candlecharts")
        images_stock_df = pd.DataFrame(columns=[])
        for i in range(window_numbers):
            df_w = df[len(df) - window_size - i: len(df) - i]
            df_w_close = df[i + window_size - 1: i + window_size].Close[0]
            df_t_close = df[i + window_size: i + window_size + target_distance].Close[0]
            filename = f"{symbol}{i}.png"
            category = getCategory(df_w_close, df_t_close)
            images_stock_df = images_stock_df.append({"window": df_w, "filename": filename, "symbol": symbol, 
                                    "close_price": df_w_close, "target_close_price": df_t_close, "diff_abs": df_t_close - df_w_close,
                                    "diff_pct": 1 - (df_t_close - df_w_close) / df_w_close, "window_size": window_size,
                                    "category": category}, ignore_index=True)
            
            saveCandlestick(df_w, save_path, filename, x_size, y_size) 
            
        return images_stock_df
            
        
def createFolder(save_path):
        print("store images to disk")
        try:
            os.makedirs(save_path) # create dir
            print(f"succesfully created path {save_path}")
        except(FileExistsError):
            print(FileExistsError)
            
            
def saveCandlestick(prices, save_path, filename, x_size, y_size):
        #create figure
        plt.figure(figsize=[x_size, y_size])
        width, width2 = .4, .05 # width of candlestick elements
        col1, col2 = 'black', 'grey' #define colors 
        
        #define up and down prices
        up = prices[prices.Close>=prices.Open]
        down = prices[prices.Close<prices.Open]
        
        # plot up prices
        plt.bar(up.index, up.Close-up.Open, width, bottom=up.Open, color=col1)
        plt.bar(up.index, up.High-up.Close, width2, bottom=up.Close, color=col1)
        plt.bar(up.index, up.Low-up.Open, width2, bottom=up.Open, color=col1)

        # plot down prices
        plt.bar(down.index, down.Close-down.Open, width, bottom=down.Open, color=col2)
        plt.bar(down.index, down.High-down.Open, width2, bottom=down.Open, color=col2)
        plt.bar(down.index, down.Low-down.Close, width2, bottom=down.Close, color=col2)
        
        plt.tick_params(axis='both', which='both', bottom=False, top=False, labelbottom=False, left=False, labelleft=False) 
        plt.margins(x=0)
        plt.margins(y=0)
        
        plt.savefig(f"{save_path}/{filename}")


def getCategory(df_w_close, df_t_close):
        if df_t_close >= df_w_close * 1.05:
            return "buy"
        elif df_t_close <= df_w_close * 1.05:
            return "sell"
        else:
            return "hold"

end_date = date.today().strftime("%Y-%m-%d")
start_date = datetime.strptime('2022-01-01', '%Y-%m-%d').date()
window_size = 28 # tage
target_distance = 1 # 1 = the next day

stockname = 'Apple'
symbol_list = ['AAPL', 'META']    
pre_path = "candlecharts"

timestamp = datetime.today().strftime("%Y-%m-%d-%H-%M-%S")
save_path = f"{pre_path}/{timestamp}"

images_df = pd.DataFrame(columns=[])
createFolder(save_path)
for symbol in symbol_list:
    prices_df = fetchData(symbol, start_date, end_date)
    image_stock_df = storeCandlesticksToDisk(save_path, symbol, 10, 10, prices_df, window_size, target_distance)
    images_df = images_df.append(image_stock_df, ignore_index=True)
images_df[["filename", "close_price", "target_close_price", "category"]].to_csv(f'{save_path}/images_meta.csv', header=True)
images_df.head(9)

store images to disk
succesfully created path candlecharts/2022-04-03-00-57-23
interactive mode OFF
trying to fetch stock market data for AAPL between 2022-01-01 and 2022-04-03 using pandas datareader
successfully feteched 64 records
generating 35 candlecharts
interactive mode OFF
trying to fetch stock market data for META between 2022-01-01 and 2022-04-03 using pandas datareader
successfully feteched 62 records
generating 33 candlecharts


Unnamed: 0,window,filename,symbol,close_price,target_close_price,diff_abs,diff_pct,window_size,category
0,High Low Open...,AAPL0.png,AAPL,172.119995,168.639999,-3.479996,1.020218,28.0,sell
1,High Low Open...,AAPL1.png,AAPL,168.639999,168.880005,0.240005,0.998577,28.0,sell
2,High Low Open...,AAPL2.png,AAPL,168.880005,172.789993,3.909988,0.976848,28.0,sell
3,High Low Open...,AAPL3.png,AAPL,172.789993,172.550003,-0.23999,1.001389,28.0,sell
4,High Low Open...,AAPL4.png,AAPL,172.550003,168.880005,-3.669998,1.021269,28.0,sell
5,High Low Open...,AAPL5.png,AAPL,168.880005,167.300003,-1.580002,1.009356,28.0,sell
6,High Low Open...,AAPL6.png,AAPL,167.300003,164.320007,-2.979996,1.017812,28.0,sell
7,High Low Open...,AAPL7.png,AAPL,164.320007,160.070007,-4.25,1.025864,28.0,sell
8,High Low Open...,AAPL8.png,AAPL,160.070007,162.740005,2.669998,0.98332,28.0,sell


In [32]:
image_stock_df[1:2]

Unnamed: 0,window,filename,symbol,close_price,target_close_price,diff_abs,diff_pct,window_size,category
1,High Low Open Close V...,META1.png,META,12.26,12.26,0.0,1.0,28.0,sell


In [26]:
images_df.head()

Unnamed: 0,window,filename,symbol,close_price,target_close_price,diff_abs,diff_pct,window_size,category
0,High Low Open...,AAPL0.png,AAPL,174.309998,174.309998,0.0,1.0,28.0,sell
1,High Low Open...,AAPL1.png,AAPL,174.309998,174.309998,0.0,1.0,28.0,sell
2,High Low Open...,AAPL2.png,AAPL,174.610001,174.610001,0.0,1.0,28.0,sell
3,High Low Open...,AAPL3.png,AAPL,177.770004,177.770004,0.0,1.0,28.0,sell
4,High Low Open...,AAPL4.png,AAPL,178.960007,178.960007,0.0,1.0,28.0,sell
