### 2022 Fintech Summer Research
### Step 2: Basic GUIs
### David Park
### Last Updated: Thursday, July 14, 2022

Imports


In [1]:
import PySimpleGUI as sg
import yfinance as yf
import matplotlib.pyplot as plt
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
from matplotlib.pyplot import figure
import pprint
import os
import pandas as pd
import seaborn as sns
import numpy as np


### Stock Data Retrieval Functions
Basic Graph Data
###### Takes in stock name, HFT trading interval, and date interval up to 1 week, the start day (str), and the end day (str)
###### Returns a list of the DataFrame, start day (str), end day (str), and stock name (str)
###### EX: graph_data('AAPL', '1m', '2022-07-10', '2022-07-14')

In [2]:
def graph_data(name, tf, start_day, end_day):
    tickerData = yf.Ticker(name)
    # Retrieve Basic Data
    stock_df = tickerData.history(tickers = name, interval = tf, start=start_day, end=end_day)
    # Create Typical Price Column
    stock_df['Typical'] = (stock_df['High'] + stock_df['Low'] + stock_df['Close'])/3
    # Create closeOffHigh Column
    stock_df['closeOffHigh'] = 2 * ((stock_df['High'] - stock_df['Close'])/(stock_df['High'] - stock_df['Low'])) - 1
    # Create pseudoVolatility Column
    stock_df['pseudoVolatility'] = (stock_df['High'] - stock_df['Low'])/stock_df['Open']
    # Create LogReturn Column
    stock_df['LogReturn'] = np.log(stock_df['Close']/stock_df['Close'].shift(1))
    # stock_df = stock_df.drop(['Dividends', 'Stock Splits'], axis=1)
    return [stock_df, start_day, end_day, name]

Example DataFrame & Plot

In [3]:
df = graph_data('AAPL', '1m', '2022-07-10', '2022-07-14')[0]
df
# plt.plot(df['Close'])
# plt.title('AAPL Close')


Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,Typical,closeOffHigh,pseudoVolatility,LogReturn
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2022-07-11 09:30:00-04:00,145.669998,145.850006,145.130005,145.279999,1685904,0,0,145.420003,0.583351,0.004943,
2022-07-11 09:31:00-04:00,145.279999,145.850006,145.279999,145.630096,430116,0,0,145.586700,-0.228397,0.003924,0.002407
2022-07-11 09:32:00-04:00,145.630005,145.850006,145.399994,145.729507,319478,0,0,145.659836,-0.464465,0.003090,0.000682
2022-07-11 09:33:00-04:00,145.699997,145.809998,145.580994,145.660004,223596,0,0,145.683665,0.309968,0.001572,-0.000477
2022-07-11 09:34:00-04:00,145.649994,145.764999,145.413193,145.419998,237053,0,0,145.532730,0.961312,0.002415,-0.001649
...,...,...,...,...,...,...,...,...,...,...,...
2022-07-13 15:56:00-04:00,145.779999,145.940002,145.729996,145.860001,288249,0,0,145.843333,-0.238102,0.001441,0.000583
2022-07-13 15:57:00-04:00,145.860001,145.960007,145.660004,145.809998,367205,0,0,145.810003,0.000051,0.002057,-0.000343
2022-07-13 15:58:00-04:00,145.800003,145.899994,145.660004,145.660004,512344,0,0,145.740000,1.000000,0.001646,-0.001029
2022-07-13 15:59:00-04:00,145.664993,145.669998,145.429993,145.449997,1520506,0,0,145.516663,0.833302,0.001648,-0.001443


### GUI Functions
Creating Line Plots
###### Takes in list of DataFrame + start day (str) + end day (str) + stock name (str), and desired column name (str)
###### Returns a line plot of said data as output 
###### EX: create_line_plot(stock_df, 'Close')

In [4]:
def create_line_plot(df_list, col):
    # Plots the column of the DataFrame vs DateTime
    df_list[0][col].plot(color = 'blue', kind = 'line')
    # Labels columns and relevant axes
    plt.rcParams["figure.figsize"] = (15,10)
    plt.title(col + ' vs. Datetime')
    plt.ylabel(df_list[3], fontsize = 14)
    plt.grid(True)    
    plt.xlim([df_list[1], df_list[2]])
    plt.tight_layout()
    # Returns the plot as an object
    return plt.gcf()

Example Line Plot

In [5]:
# create_line_plot(graph_data('GOOG', '1m','2022-06-20', '2022-06-27'), 'Typical')

Creating Box Plots
###### Takes in list of DataFrame + start day (str) + end day (str) + stock name (str), and desired column name (str)
###### Returns a box plot of said data as output 
###### EX: create_box_plot(stock_df, 'Close')

In [6]:
def create_box_plot(df_list, col):
    # Plots the column of the boxplot
    plt.boxplot(df_list[0][col])
    # Labels columns and relevant axes
    plt.rcParams["figure.figsize"] = (15,10)
    plt.title('Boxplot')
    plt.ylabel(df_list[3], fontsize = 14)
    plt.xlabel(col)
    plt.grid(True)    
    plt.tight_layout()
    # Returns the plot as an object
    return plt.gcf()

Example Boxplot

In [7]:
# create_box_plot(graph_data('AAPL', '1m','2022-06-20', '2022-06-27'), 'LogReturn')


Creating Probability Density Functions
###### Takes in list of DataFrame + start day (str) + end day (str) + stock name (str), and desired column name (str)
###### Returns a PDF  of said data as output 
###### EX: create_pdf(stock_df, 'Close')

In [8]:
def create_pdf(df_list, col):
    # Plots the column of the boxplot
    sns.kdeplot(df_list[0][col], color = 'blue')
    # Labels columns and relevant axes
    plt.rcParams["figure.figsize"] = (15,10)
    plt.title('PDF')
    plt.ylabel(df_list[3], fontsize = 14)
    plt.xlabel(col)
    plt.grid(True)    
    plt.tight_layout()
    # Returns the plot as an object
    return plt.gcf()

In [9]:
# create_pdf(graph_data('AAPL', '1m','2022-06-20', '2022-06-27'), 'Typical')

Bollinger Bands
###### Takes in stock name, HFT trading interval, and date interval up to 1 week
###### Returns a list of the DataFrame, start day (str), end day (str), and stock name (str)
###### EX: bollinger_bands('AAPL', '1m', '2022-07-10', '2022-07-14')

In [10]:
def create_bollinger_bands(df_list, col):
    # Retrieve Basic Data
    stock_df = df_list[0]
    # Create Typical Price Column
    tp_data = stock_df[col].to_frame()
    # Create 20-minute Simple Moving Average Column and Moving Stddev Column
    # https://www.geeksforgeeks.org/how-to-calculate-moving-average-in-a-pandas-dataframe/
    tp_data['Moving AVG'] = tp_data[col].rolling(20).mean()
    tp_data['STDDEV20'] = tp_data[col].rolling(20).std()
    # Create Columns for Upper and Lower Bollinger Bands
    tp_data['Upper'] = tp_data['Moving AVG'] + 2 * tp_data['STDDEV20']
    tp_data['Lower'] = tp_data['Moving AVG'] - 2 * tp_data['STDDEV20']
    # Drop Typical Price Column
    tp_data = tp_data.drop([col, 'STDDEV20'], axis=1)
    # Plot SMA20, Upper, Lower
    tp_data.plot()
    # Labels columns and relevant axes
    plt.rcParams["figure.figsize"] = (15,10)
    plt.title('Bollinger ' + col + ' vs. Datetime')
    plt.ylabel(df_list[3], fontsize = 14)
    plt.grid(True)    
    plt.xlim([df_list[1], df_list[2]])
    plt.tight_layout()
    # Returns the plot as an object
    return plt.gcf()



In [11]:
# create_bollinger_bands(graph_data('GOOG', '1m','2022-07-20', '2022-07-27'), 'Typical')

Violin Plot

In [12]:
def create_violin(df_list, col):
    # Retrieve Basic Data
    stock_df = df_list[0]
    # Create Typical Price Column
    tp_data = stock_df[col].to_frame()
    sns.violinplot(data = tp_data, color = 'blue')
    plt.rcParams["figure.figsize"] = (15,10)
    plt.title('Violin Plot of ' + col)
    plt.ylabel(df_list[3], fontsize = 14)
    plt.grid(True)    
    plt.tight_layout()
    # Returns the plot as an object
    return plt.gcf()


In [13]:
# create_violin(graph_data('GOOG', '1m','2022-07-21', '2022-07-25'), 'Close')

Multiplot

In [14]:
# COMPARISON DENSITY PLOTS
def denPlotting(symbolList,colname, df_list):
    count = 0
    ind = 0
    lineind = 0
    styles = ['-', '--', '-.', ':',(0, (5, 10)),(0, (3, 5, 1, 5, 1, 5))]
    allsymb = ["GOOG","AAPL","MSFT","AMZN","META","BWA",
    "JPM","BAC","HSBC","GS","ABBV","PFE","MRK","JNJ","BMY","BTC","ETH","XRP"]
    plt.figure(figsize=(10,8))
    for key, value in symbolList.items():
        for i in value:
            df_list = graph_data(i,"5m",df_list[1],df_list[2])
            df = df_list[0]
            if count == 0:
                sns.kdeplot(df[colname], color = 'b', linestyle = styles[lineind],label = str(key)+"_"+allsymb[ind])
                lineind +=1
                ind += 1
            if count == 1:
                sns.kdeplot(df[colname], color = 'g',linestyle = styles[lineind],label = str(key)+"_"+allsymb[ind])
                lineind +=1
                ind+=1
            if count ==2:
                sns.kdeplot(df[colname], color = 'r',linestyle = styles[lineind],label = str(key)+"_"+allsymb[ind])
                lineind +=1
                ind+=1
            if count ==3:
                sns.kdeplot(df[colname], color = 'orange',linestyle = styles[lineind], label = str(key)+"_"+allsymb[ind])
                lineind +=1
                ind+=1
        count += 1
        lineind=0
    plt.legend()
    return plt.gcf() 

def create_skewKurt(df, colName):
    data = df.loc[:,[colName]]
    skew = data.skew(axis=0)
    skew = skew.values[0]
    kurt = data.kurtosis(axis=0)
    kurt = kurt.values[0]
    sk_dict = {
        'skew':[skew],
        'kurtosis':[kurt],
        'type':[colName]
    }
    sk_df = pd.DataFrame(sk_dict)

    return [sk_df, colName.upper() + ":\n" + "Kurtosis: "+str(kurt)+"\nSkewness: "+str(skew)]

def create_stats(df, colName):
    max_c = np.max(df[colName])
    min_c = np.min(df[colName])
    mean_c= np.mean(df[colName])
    med_c = np.median(df[colName])
    std_c = np.std(df[colName],ddof=1)
    stats_dict = {
        'max':[max_c], 
        'min':[min_c],
        'mean':[mean_c],
        'median':[med_c],
        'stdev':[std_c],
        'type':[colName]
    }
    stats_df = pd.DataFrame(stats_dict)
    return [stats_df, colName.upper() + ":\n" + "standard deviation: " + str(std_c) + "\nmean: " + str(mean_c)+"\nmedian: "+ str(med_c)+"\nmax: "+str(max_c)+"\nmin: "+str(min_c)]

In [15]:
# create_skewKurt(graph_data('AAPL', '1m', '2022-07-10', '2022-07-14')[0], 'Typical')

Drawing Figures on the GUI Canvas

In [16]:
def draw_figure(canvas, figure):
    figure_canvas_agg = FigureCanvasTkAgg(figure, canvas)
    figure_canvas_agg.draw()
    figure_canvas_agg.get_tk_widget().pack(side = 'top', fill = 'both', expand = 1)
    return figure_canvas_agg

In [17]:
# Deletes the figure 
def delete_figure_agg(figure_agg):
    figure_agg.get_tk_widget().forget()
    plt.close('all')

Create a Window Using Layout and Draw Figure

Layout

###### Creates column for the GUI
###### Inspired by: https://youtu.be/XpKtgNasiBw

In [18]:
# Initialize Stock List and Columns
button_menu_def = [
    ['AAPL'], ['ABBV'], ['AMZN'], ['BAC'], ['BMY'], 
    ['BTH-USD'], ['BWA'], ['ETH-USD'], ['GOOG'], ['GS'],
    ['HSBC'], ['JNJ'], ['JPM'], ['META'], ['MRK'],
    ['MSFT'], ['PFE'], ['XRP-USD']
]

retrieval_col = [
    [sg.Text('STOCKPLOT V1', size = (15,1), font = 'Any 20', justification = 'left')],
    [sg.Text('Choose Stock/Crypto Symbol', key = '-C_STOCK-')],
    [sg.Listbox(values = button_menu_def, size = (30, 6), key = '-STOCK_LIST-', select_mode = 'LISTBOX_SELECT_MODE_SINGLE')],
    [sg.Text('Choose Interval')],
    [sg.Radio('1min',"RADIO1", default = True, key = '-IN1-'), sg.Radio('5min', "RADIO1", default = False, key='-IN2-')],
    [sg.Text('Choose 1st Date', key = '-C_DATE1-')],
    [sg.Input(key = '-START-', size = (20,1)), sg.CalendarButton("START DATE", close_when_date_chosen = True, format ='%Y-%m-%d', target = '-START-', location = (0,0), no_titlebar = False)],
    [sg.Text('Choose 2nd Date')],
    [sg.Input(key = '-END-', size = (20,1), visible = True), sg.CalendarButton("END DATE", close_when_date_chosen = True, format ='%Y-%m-%d', target='-END-', location=(0,0), no_titlebar = False)],
    [sg.Button(button_text = 'Retrieve Data', key = "-RDATA-"), sg.Exit(), sg.Button(button_text = 'Manual', key = '-MANUAL-')],
    [sg.Text('Error: Fill in all parameters! 7 days past 30 days', key = '-ERROR1-', visible = False)],
]

analysis_col = [
    [sg.Text('STOCKPLOT V1', size = (15,1), font = 'Any 20', justification = 'left')],
    [sg.Button(button_text = '< Back to Retrieval', key = "-BACK-")],
    [sg.Text('Choose Column Type')],
    [sg.Listbox(values = [['Close'], ['Typical'], ['closeOffHigh'], ['pseudoVolatility'], ['LogReturn']], select_mode= 'LISTBOX_SELECT_MODE_SINGLE', size=(30, 6), key = '-COL_LIST-')],
    [sg.Text('Choose Graph Type')],
    [sg.Listbox(values = [['Line'], ['Box'], ['PDF'], ['Bollinger'], ['Skewness/Kurtosis'],['Statistics'],['AllDensity'],['Violin']], select_mode= 'LISTBOX_SELECT_MODE_SINGLE', size=(30, 6), key = '-G_LIST-')],
    [sg.Button(button_text = 'Plot Data', key = "-PLOT-"), sg.Button(button_text = 'Clear', key = "-CLEAR-")],
    [sg.Text('Error: Pick a column!', key = '-ERROR2-', visible = False)],
    [sg.Text('Error: Pick a graph type!', key = '-ERROR3-', visible = False)],
    [sg.Text('Error: Clear figure first!', key = '-ERROR4-', visible = False)],
    [sg.Text('save', key = '-SAVE-', visible = False, size = (30,15))],
]

canvas_column = [
    [sg.Canvas(size = (1000,1000), key = "-CANVAS-")],
]

# Set theme
sg.change_look_and_feel('DarkBlue13')

# Combine columns
layout = [
    [
        sg.Column(retrieval_col, justification = 'center', key = '-R_COL-'),
        sg.Column(analysis_col, justification = 'left', visible = False, key = '-A_COL-'),
        sg.VSeparator(),
        sg.Column(canvas_column, key = '-C_COL-', visible = False)
    ]
]

# Function for opening stats window
def stats_win(stat_string, is_clear):
    layout = [
        [
        sg.Text(stat_string, key = '-STATS-'),
        ]
    ]
    window = sg.Window('Fintech Stats',layout, resizable=True, grab_anywhere = True,modal=True,size=(400, 400))
    while True:
        event, values = window.read()
        if event == sg.WIN_CLOSED or event == 'Exit' or is_clear == True:
            break
    window.close()

# Function for opening manual window
def manual_win():
    manual_col = [
        [
            [sg.Text('Instructional Manual for STOCK_PLOT_V1')],
            [sg.Text('Last Updated by David Park, Cyndi Chen, and Professor Henry Han in July 2022')],
            [sg.Text('PART 1: DATA RETRIEVAL')],
            [sg.Text('1. Choose stock/cryptocurrency symbol for dropdown menu (e.g. choose AAPL stock for Apple')],
            [sg.Text('2. Select Trading Interval:')],
            [sg.Text('  1m -> 1 minute trading intervals')],
            [sg.Text('  5m -> 5 minute trading intervals')],
            [sg.Text('3. Select start/end dates (currently only supports 7-day data collection for the past 30 days')],
            [sg.Text('4. Retrieve data (must do steps 1-3)')],
            [sg.Text('PART 2: DATA ANALYSIS')],
            [sg.Text('1. Select a column (the dependent variable you will analyze)')],
            [sg.Text('  EX: Close -> Close Price')],
            [sg.Text('  EX: Typical -> Typical Price')],
            [sg.Text('2. Select a graph/statistical analysis type')],
            [sg.Text('3. Plot data')],
            [sg.Text('4. Clear data, exit, or return to data retrieval')]
        ]
    ]
    layout = [
        [
        manual_col
        ]
    ]
    window = sg.Window('Manual',layout, resizable=True, grab_anywhere = True,modal=True,size=(600, 1000))
    while True:
        event, values = window.read()
        if event == sg.WIN_CLOSED or event == 'Exit':
            break
    window.close()

window = sg.Window("PySimpleGUI + MatPlotLib Line Plot", layout, finalize = True, grab_anywhere = True)

fig = None
stock_name = None
date1 = ''
date2 = ''
stock_name = None
column = None
ints = None
function = None

while True:
    # First window
    # Data Retrieval
    event, values = window.read()
    if event == sg.WIN_CLOSED or event == 'Exit':
        break
    if event == "-MANUAL-":
        manual_win()
    if values["-IN1-"] == True:
        ints = '1m'
    elif values["-IN2-"] == True:
        ints = '5m'
    if event == "-RDATA-":
        try:
            stock_name = values["-STOCK_LIST-"][0][0]
            date1 = values["-START-"]
            date2 = values["-END-"]
            data = graph_data(stock_name, ints, date1, date2)
            # If all data is complete, go to data analysis window
            if stock_name != None and date1 != '' and date2 != '' and data != None:
                window['-ERROR1-'].update(visible = False)
                window['-R_COL-'].update(visible = False)
                window['-A_COL-'].update(visible = True)
                window['-C_COL-'].update(visible = True)
            else: 
                window['-ERROR1-'].update(visible = True)
        except:
            if stock_name == None or date1 == '' or date2 == '' or data == None:
                window['-ERROR1-'].update(visible = True)
    # Back button
    if event == '-BACK-':
        window['-R_COL-'].update(visible = True)
        window['-A_COL-'].update(visible = False)
        window['-C_COL-'].update(visible = False)
    # Plot button
    if event == "-PLOT-":
        # Try to get dependent column variable
        try:
            column = values["-COL_LIST-"][0][0]
            if column != None:
                window['-ERROR2-'].update(visible = False)
        except:
            window['-ERROR2-'].update(visible = True)
        # Try to get graph type
        try:
            if values["-G_LIST-"][0][0] == "Line":
                function = create_line_plot
            elif values["-G_LIST-"][0][0] == "Box":
                function = create_box_plot
            elif values["-G_LIST-"][0][0] == "PDF":
                function = create_pdf
            elif values["-G_LIST-"][0][0] == "Bollinger":
                function = create_bollinger_bands
            elif values["-G_LIST-"][0][0] == "AllDensity":
                function = denPlotting
            elif values["-G_LIST-"][0][0] == "Statistics":
                function = create_stats
            elif values["-G_LIST-"][0][0] == "Skewness/Kurtosis":
                function = create_skewKurt
            elif values["-G_LIST-"][0][0] == "Violin":
                function = create_violin
            if function != None:
                window['-ERROR3-'].update(visible = False)
        except:
            window['-ERROR3-'].update(visible = True)
        # If the figure doesn't exist, plot
        if fig:
            window['-ERROR4-'].update(visible = True)
        elif not fig and data != None and column != None:
            csv_list = None
            plot = None
            plt_name = ''
            try:
                if function == denPlotting:
                    allList = {'it':['GOOG','AAPL','MSFT','AMZN','META','BWA'], 'bank':['JPM','BAC','HSBC','GS'], 
                    'pharm':['ABBV','PFE','MRK','JNJ','BMY'],'cryp':['BTC-USD','ETH-USD','XRP-USD']}
                    plot = function(allList, column, data) 
                    plt_name = 'MULTI_'
                elif function == create_skewKurt:
                    csv_list = create_skewKurt(data[0], column)
                    stats_win(csv_list[1], False) 
                    plt_name = 'SKEW_KURT_'
                elif function == create_stats:
                    csv_list = create_stats(data[0], column)
                    stats_win(csv_list[1], False)
                    plt_name = 'STATS_'
                else:
                    plot = function(data, column)
                    if function == create_line_plot:
                        plt_name = 'LINE_'
                    elif function == create_box_plot:
                        plt_name = 'BOX_'
                    elif function == create_pdf:
                        plt_name = 'PDF_'
                    elif function == create_bollinger_bands:
                        plt_name = 'BOL_'
                    elif function == create_violin:
                        plt_name = 'VIOL_'
                if function != create_stats and function != create_skewKurt:
                    fig_name = plt_name + column + '_' + ints + '_' + date1 + '_to_' + date2 + '.png'
                    plt.savefig(fig_name)
                    if os.path.isfile(fig_name):
                        window['-SAVE-'].update(fig_name + '. is saved on' + os.getcwd())
                        window['-SAVE-'].update(visible = True)
                    else:
                        raise Exception('can not save file\n')
                    fig = draw_figure(window['-CANVAS-'].TKCanvas, plot)
                elif function == create_stats or function == create_skewKurt:
                    csv_name = plt_name + column + '_' + ints + '_' + date1 + '_to_' + date2 + '.csv' 
                    csv_list[0].to_csv(csv_name)
                    if os.path.isfile(csv_name):
                        window['-SAVE-'].update(csv_name + '. is saved on' + os.getcwd())
                        window['-SAVE-'].update(visible = True)
                    else:
                        raise Exception('can not save file\n')
                window.refresh()
            except:
                pass
    if event == "-CLEAR-":
        if fig:
            delete_figure_agg(fig)
            fig = None
            window['-ERROR4-'].update(visible = False)
            window.refresh()
        window['-SAVE-'].update(visible = False)
        # stats_win('', True)

            
window.close()