In [0]:
!pip install yfinance
!pip install mplfinance
#need to upload asx50 found at : https://www.asx50list.com/   and rename to asx50



In [0]:
import numpy as np 
import pandas as pd 
from pandas_datareader import data as pdr 
import random 
import mplfinance as mpf 
from PIL import Image
from numpy import asarray
from pathlib import Path
from matplotlib import pyplot as plt
from datetime import datetime
import math 
import yfinance as yf
yf.pdr_override()


def get_stock(code, start, end, interval):
        
    '''
    returns dataframe of ONE stock
    '''
    #download from yfinance
    df = yf.download(code, start=start, end=end, interval=interval)

    return df 

  

def get_all_stocks(stocks, start, end, interval):
    '''
    given list of stocks, start, end date, interval
    returns dictionary of dataframes of all ASX200 stocks
    if num_stocks less than 200, selects randomly 
    '''
    #didn't use tickers because it returned multiindex 

    #use dictionary for stock as ID corresponding to each df
    stocks_dict = {}

    #pull stock codes from ASX200
    #asx200 = pd.read_csv("asx200.csv")
    #stocks = asx200['S&P/ASX 200 Index (1 March 2020)'].tolist()
    #stocks.pop(0)

    #select random stocks equal to num_stocks 
    #if (num_stocks < 200):
    #    stocks = random.sample(stocks, k=num_stocks)
    
    for i in range(0,len(stocks)):
        stocks_dict[stocks[i]] = get_stock(stocks[i]+".AX", start, end, interval)


    #return all_stocks  
    return stocks_dict 




  from pandas.util.testing import assert_frame_equal


In [0]:
def plot_stock_candle(df, v, filename):
    try:
        #to get rid of axis and titles 
        rcparams = {'axes.spines.bottom':False,
            'axes.spines.left':False,
            'axes.spines.right':False,
            'axes.spines.top':False,
            'xtick.color':'none',
            'ytick.color':'none',
            'axes.labelcolor':'none'
            }
            
        #green/red style
        mc = mpf.make_marketcolors(base_mpf_style='charles',inherit=True)
        s = mpf.make_mpf_style(gridstyle='',rc=rcparams, marketcolors=mc, facecolor='black')

        #plot candlestick and export... not sure if there is better way than exporting 
        print("Exporting to candle for " + filename)
        mpf.plot(df,type='candle', volume=v,style=s,savefig=dict(fname='big/'+filename+'.jpeg',dpi=100))
        plt.close()
    except TypeError:
        print("Type error")

In [0]:
def get_images(images_dict, dir_path):
    '''
    given dictionary with (code,dataframe) pairs, for each dataframe plots candlestick, crops edge, resizes 
    Then exports as jpeg file to appropriate folder
    '''
    for key, value in images_dict.items():
        
        #plot using function 
        try: 
            plot_stock_candle(value, False, key)
            print("resizing image and exporting" + str(key))
            im = Image.open('big/'+key+'.jpeg')

            #transform to np array (3d) 
            data = asarray(im)

            #delete white edge (for axes) from image as mplfinance doesnt have the option to remove axes (i think)
            #select black pixel and get coordinates 
            mask = np.all(data == (0, 0, 0), axis=-1)
            coords = np.argwhere(mask)
            #get bounding box coords
            x0, y0 = coords[0]
            x1,y1 = coords[-1] + 1
            #crop image 
            cropped = data[x0:x1, y0:y1]
            #convert back to image and resize. 
            im = Image.fromarray(cropped)
            #downsample to 50 x 50, making (50,50,3)
            im = im.resize((50,50),resample=0)
            
            #export for dataset 
            output_file = key+'.jpeg'
            output_dir = Path('data/'+dir_path+'/images')
            output_dir.mkdir(parents=True, exist_ok=True)
            im.save(output_dir/output_file)
        except TypeError: 
            pass 
    #plt.close()
    #return im_list


def get_labels(labels_dict, dir_path):
    '''
    given dictionary of (code,dataframe) pairs, where each dataframe holds data for one day 
    generates labels, defined as 
    1, where (open <= close) (up -> green)
    0, (where open > close) (down -> red)
    '''
    labels_d = {}
    for key,value in labels_dict.items():
        value.reset_index(drop=True, inplace=True)
        try: 
            if (value.loc[0,'Open'] > value.loc[0,'Close']):
                labels_d[key] = 0
            else: 
                labels_d[key] = 1
        except KeyError: 
            pass 
    labels = pd.DataFrame([labels_d])
    labels = labels.T 
    #output_file = 'labels'+intv_num+'.csv'
    output_file = 'labels.csv'
    output_dir = Path('data/'+dir_path+'/labels')
    output_dir.mkdir(parents=True, exist_ok=True)
    labels.to_csv(output_dir/output_file, header=None, mode='a')

In [0]:
def get_images_labels(all_stocks, dir_path):
    '''
    given arguments from main, generates dictionary of (code, dataframe) pairs from get_stocks 
    then further passes appropriate data into get_images, get_labels for export 
    '''
    #dictionaries to pass into corresponding functions 
    labels_dict = all_stocks.copy()
    images_dict = all_stocks.copy()
    #number of rows needed for image (arbitrary element)
    n = len(next(iter(all_stocks.values()))) - 1
    for key, value in all_stocks.items():
        
        #all but last row for images
        images_dict[key] = value.head(n)
        #only last row for labels 
        labels_dict[key] = value.tail(1)
    #dir_path = args['start']+'-'+args['end']+'-'+args['interval2']+'-'+str(args['volume'])
    get_images(images_dict, dir_path)
    get_labels(labels_dict, dir_path)

In [0]:
asx50 = pd.read_csv("asx50.csv")
stocks = asx50['S&P/ASX 50 Index (1 March 2020)'].tolist()
stocks.pop(0)
all_stocks = get_all_stocks(stocks, '2005-01-01','2018-01-01', '1d')

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

1 Failed download:
- COL.AX: Data doesn't exist for startDate = 1104537600, endDate = 1514764800
[****

In [0]:
#ONLY NEED TO CHANGE THIS: 
curr_stock = 'COH'

x = all_stocks[curr_stock]

start = 0
end = 20
stocks_dict = {}
while (end <= len(x)):
  stocks_dict[curr_stock+str(start)] = x[start:end]
  start += 1
  end += 1

#double check- should be 3266 or something around that 
print(len(stocks_dict)) 


3266


In [0]:
#make folder 
!mkdir big 

#generate the images: 
get_images_labels(stocks_dict, '2005-01-01-2018-01-01')

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
resizing image and exportingCTX766
Exporting to candle for CTX767
resizing image and exportingCTX767
Exporting to candle for CTX768
resizing image and exportingCTX768
Exporting to candle for CTX769
resizing image and exportingCTX769
Exporting to candle for CTX770
resizing image and exportingCTX770
Exporting to candle for CTX771
resizing image and exportingCTX771
Exporting to candle for CTX772
resizing image and exportingCTX772
Exporting to candle for CTX773
resizing image and exportingCTX773
Exporting to candle for CTX774
resizing image and exportingCTX774
Exporting to candle for CTX775
resizing image and exportingCTX775
Exporting to candle for CTX776
resizing image and exportingCTX776
Exporting to candle for CTX777
resizing image and exportingCTX777
Exporting to candle for CTX778
resizing image and exportingCTX778
Exporting to candle for CTX779
resizing image and exportingCTX779
Exporting to candle for CTX780
resizing im

In [0]:
#Delete large images 
!rm -rf big

#CHANGE ZIP AND FOLDER NAME 
#download zip file containing current stock 

!zip -r COH.zip data/2005-01-01-2018-01-01/images/
from google.colab import files
files.download("COH.zip")

#delete current images 
!rm -rf data/2005-01-01-2018-01-01/images

#download current label csv (NOT NECESSARY UNTIL YOU FINISH ALL THE STOCKS AS IT KEEPS APPENDING. BUT DOWNLOAD JUST IN CASE SO IF ITS REMOVED JUST REUPLOAD THE MOST RECENT VERSION)
files.download("data/2005-01-01-2018-01-01/labels/labels.csv")

  adding: data/2005-01-01-2018-01-01/images/ (stored 0%)
  adding: data/2005-01-01-2018-01-01/images/CTX1265.jpeg (deflated 16%)
  adding: data/2005-01-01-2018-01-01/images/CTX3042.jpeg (deflated 18%)
  adding: data/2005-01-01-2018-01-01/images/CTX1316.jpeg (deflated 20%)
  adding: data/2005-01-01-2018-01-01/images/CTX2790.jpeg (deflated 15%)
  adding: data/2005-01-01-2018-01-01/images/CTX1636.jpeg (deflated 17%)
  adding: data/2005-01-01-2018-01-01/images/CTX1438.jpeg (deflated 16%)
  adding: data/2005-01-01-2018-01-01/images/CTX522.jpeg (deflated 18%)
  adding: data/2005-01-01-2018-01-01/images/CTX2052.jpeg (deflated 20%)
  adding: data/2005-01-01-2018-01-01/images/CTX3027.jpeg (deflated 16%)
  adding: data/2005-01-01-2018-01-01/images/CTX1096.jpeg (deflated 18%)
  adding: data/2005-01-01-2018-01-01/images/CTX1465.jpeg (deflated 18%)
  adding: data/2005-01-01-2018-01-01/images/CTX2713.jpeg (deflated 17%)
  adding: data/2005-01-01-2018-01-01/images/CTX2317.jpeg (deflated 16%)
  adding