# Candlestick Image Generator for Stock Market Prediction

In [34]:
# A tutorial for this file is available at www.relataly.com

import pandas as pd 
from datetime import date, datetime
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.pyplot as plt
import pandas_datareader as pdr 
import yfinance as yf 
import os 

class ImageGen:
    ### 
    # This generator queries historical stock market information and uses them to create a series of candlestick charts 
    # The candle charts can serve as visual inputs in the development of stockmarket prediction models
    # ###
    

    def __init__(self, x_size, y_size, symbol, start_date, end_date, window_size, target_distance):
        self.images_df = pd.DataFrame(columns=[])
        self.x_size, self.y_size = x_size, y_size
        self.window_size = window_size
        self.target_distance = target_distance
        
        plt.ioff()
        print("interactive mode OFF")
        
        try:
            print(f"trying to fetch stock market data for {symbol} between {start_date} and {end_date} using pandas datareader")
            self.prices_df = pdr.DataReader(symbol, start=start_date, end=end_date, data_source="yahoo")
        except: 
            print(f"error using pandas datareader. Trying yfinance instead")
            try:
                #Alternative package if webreader does not work: pip install yfinance
                self.prices_df = yf.download(symbol, start=start_date, end=end_date)
            except:
                print("error using yfinance")
        print("successful")        
        
                
    # iterate through a dataframe, slice it into smaller windows and add them to a list
    def storeCandlesticksToDisk(self, save_path):
        df = self.prices_df
        self.createFolder(save_path)
        window_numbers = len(df) - window_size - target_distance
        print(f"generating {window_numbers} candlecharts")
        for i in range(window_numbers):
            df_w = df[len(df) - window_size - i: len(df) - i]
            df_w_close = df[len(df) - i - 1: len(df) - i].Close[0]
            df_t = df[len(df) - window_size - i: len(df) - window_size - i + target_distance]
            df_t_close = df_t.Close[0]

            filename = "c"+str(i)
            
            category = self.getCategory(df_w_close, df_t_close)
            
            self.images_df = self.images_df.append({"window": df_w, "filename": filename + ".png", 
                                                    "close_price": df_w_close, 
                                                    "target_close_price": df_t_close, 
                                                   "diff_abs": df_t_close - df_w_close,
                                                   "diff_pct": (df_t_close - df_w_close) / df_w_close - 1,
                                                   "window_size": self.window_size,
                                                   "category": category},
                                                   ignore_index=True)
            
            gen.saveCandlestick(df_w, save_path, filename) 
            
        self.images_df[["filename", "close_price", "target_close_price", "category"]].to_csv(f'{save_path}/image_df.csv', header=True)
        
    def createFolder(self, save_path):
        print("store images to disk")
        try:
            os.makedirs(save_path) # create dir
            print(f"succesfully created path {save_path}")
        except(FileExistsError):
            print(FileExistsError)
            
    def saveCandlestick(self, prices, save_path, filename):
        #create figure
        plt.figure(figsize=[self.x_size, self.y_size])
        width, width2 = .4, .05 # width of candlestick elements
        col1, col2 = 'black', 'grey' #define colors 
        
        #define up and down prices
        up = prices[prices.Close>=prices.Open]
        down = prices[prices.Close<prices.Open]
        
        # plot up prices
        plt.bar(up.index, up.Close-up.Open, width, bottom=up.Open, color=col1)
        plt.bar(up.index, up.High-up.Close, width2, bottom=up.Close, color=col1)
        plt.bar(up.index, up.Low-up.Open, width2, bottom=up.Open, color=col1)

        # plot down prices
        plt.bar(down.index, down.Close-down.Open, width, bottom=down.Open, color=col2)
        plt.bar(down.index, down.High-down.Open, width2, bottom=down.Open, color=col2)
        plt.bar(down.index, down.Low-down.Close, width2, bottom=down.Close, color=col2)
        
        plt.tick_params(axis='both', which='both', bottom=False, top=False, labelbottom=False, left=False, labelleft=False) 
        plt.margins(x=0)
        plt.margins(y=0)
        
        plt.savefig(f"{save_path}/{filename}.png")

    def getCategory(self, df_w_close, df_t_close):
        if df_t_close >= df_w_close * 1.05:
            return "buy"
        elif df_t_close <= df_w_close * 1.05:
            return "sell"
        else:
            return "hold"

end_date = date.today().strftime("%Y-%m-%d")
start_date = datetime.strptime('2022-01-01', '%Y-%m-%d').date()
window_size = 28 # tage
target_distance = 1 # 1 = the next day

stockname = 'Apple'
symbol = 'AAPL'    
pre_path = "candlecharts"

timestamp = datetime.today().strftime("%Y-%m-%d-%H-%M-%S")
path = f"{pre_path}/{timestamp}"

gen = ImageGen(10, 10, symbol, start_date, end_date, window_size, target_distance)
gen.storeCandlesticksToDisk(path)
gen.images_df

interactive mode OFF
trying to fetch stock market data for AAPL between 2022-01-01 and 2022-04-02 using pandas datareader
successful
store images to disk
succesfully created path candlecharts/2022-04-02-13-19-46
generating 35 candlecharts


Unnamed: 0,window,filename,close_price,target_close_price,diff_abs,diff_pct,window_size,category
0,High Low Open...,c0.png,174.309998,162.740005,-11.569992,-1.066376,28.0,sell
1,High Low Open...,c1.png,174.309998,160.070007,-14.23999,-1.081693,28.0,sell
2,High Low Open...,c2.png,174.610001,164.320007,-10.289993,-1.058931,28.0,sell
3,High Low Open...,c3.png,177.770004,167.300003,-10.470001,-1.058896,28.0,sell
4,High Low Open...,c4.png,178.960007,168.880005,-10.080002,-1.056325,28.0,sell
5,High Low Open...,c5.png,175.600006,172.550003,-3.050003,-1.017369,28.0,sell
6,High Low Open...,c6.png,174.720001,172.789993,-1.930008,-1.011046,28.0,sell
7,High Low Open...,c7.png,174.070007,168.880005,-5.190002,-1.029816,28.0,sell
8,High Low Open...,c8.png,170.210007,168.639999,-1.570007,-1.009224,28.0,sell
9,High Low Open...,c9.png,168.820007,172.119995,3.299988,-0.980453,28.0,sell
