# UBS Stock Data

In [22]:
import pandas as pd
import os
#%pip install pytrends
import pytrends
from pytrends.request import TrendReq
#pip install pageviewapi
import pageviewapi
#%pip install yfinance
import yfinance as yf

In [23]:
def Big_scraper(kw_list_1, kw_list_2, ticker, start,end):
    
    """
    Description:
    ------------
    
    The function initially grabs historical, indexed, hourly data for when the keyword 
    was searched most as shown on Google Trends' Interest Over Time section.
    It then cleans the data to show daily hits on the keyword in Google news.

    
    input:
    -----
    kw_list_1: List of up to 5 key words that will be scraped from google trends for the dates given.
             Here, the scraping will pull the total posted items in google news that contains
             one of the key words.
    
    kw_list_2: List of wikipedia article titles (unlimited length) that will pull the amount of
            views the article recieved each day. 

    ticker: the ticker abriviation of the desired stock. Must be netered in as an all capitalized string 
    example UBS Inc. woud be "UBS"
             
    start: the start of the desired timeline you want scrape. Date Must be entered in as "YYYYMMDD"
    
    end: the end of the desired timeline you want scrape. Date Must be entered in as "YYYYMMDD"
             
    return:
    -------
    
    combined: a dataframe containing the sum of the daily keyword hits in google news (key words labeled _x),
    
    data frame cointaing stock info including open, close, high, low prices of the stock,
    as well as the stocks daily trading volume and the amount if there was a split or dividend 
    preformed on the stock that day,
    
    and the sum of how many times key word wikipedia pages were viewed in a day (key words labeled _y)
    """
    
    year_s = int(start[0:4])
    month_s = int(start[4:6])
    day_s = int(start[6:8])
    year_e = int(end[0:4])
    month_e = int(end[4:6])
    day_e = int(end[6:8])
    
    starter = pd.to_datetime(f"'{year_s}-{month_s}-{day_s}'")
    ender = pd.to_datetime(f"'{year_e}-{month_e}-{day_e}'")
    
    
    pytrends = TrendReq(hl='en-US', tz=360, retries=10)
    jeff = pytrends.get_historical_interest(kw_list_1, \
                                 year_start = year_s, month_start = month_s, day_start = day_s, hour_start = 1, \
                                 year_end = year_e, month_end = month_e, day_end = day_e, hour_end = 23, \
                                 cat = 0, geo = '', gprop = 'news', sleep = 60)
    
    jeff = jeff.iloc[:, 0:-1] # eliminates the isPartial Column
    jeff = jeff.reset_index().drop_duplicates(subset = "date") #removing duplicates from the index
    jeff = jeff.groupby(pd.Grouper(key="date", freq="D")).mean() # coverts to the mean of daily search score
    
    dow = yf.Ticker("^DJI")
    dow_h = dow.history(start=starter, end=ender)
    dow_h = pd.DataFrame(dow_h)
    dow_names = {"Open":"dow_open","Close":"dow_close","Low": "dow_low",
    'High':'dow_high','Volume':'dow_vol'}
    dow_h=dow_h.rename(dow_names, axis=1).drop(["Dividends","Stock Splits"], axis=1)

    nas = yf.Ticker("^IXIC")
    nas_h = nas.history(start=starter, end=ender)
    nas_h = pd.DataFrame(nas_h)
    nas_names = {"Open":"nas_open", "Close":"nas_close", "Low": "nas_low",
    'High':'nas_high','Volume':'nas_vol'}
    nas_h=nas_h.rename(nas_names, axis=1).drop(["Dividends","Stock Splits"], axis=1)

    market = dow_h.merge(nas_h,left_index=True, right_index=True, how="left")

    tick = yf.Ticker(ticker)
    hist = tick.history(start=starter, end=ender)
    hist = pd.DataFrame(hist)
    
    combined = jeff.merge(hist, left_index=True, right_index=True, how="left") 
    
    d = pd.DataFrame()
    for key_word in kw_list_2:
        geoff = pageviewapi.per_article('en.wikipedia', key_word, start, end,
                                    access='all-access', agent='all-agents', granularity='daily')
        dicty = dict(geoff)
        views = pd.DataFrame(dicty["items"])
        views["timestamp"] = pd.to_datetime((views["timestamp"]), format="%Y%m%d%H")
        views = views.set_index("timestamp")
        page = pd.Series(views["views"])
        d[key_word] = page
        
    combined = combined.merge(d, left_index=True, right_index=True, how="left") 
    
    return combined

In [24]:
kw_list_1 = ["UBS", "UBS Financial Services Inc.", "UBS Investment Bank", "UBS Global Wealth Management", "UBS Asset Management"]
kw_list_2 = ["UBS", "Union Bank of Switzerland", "UBS tax evasion controversies", "Banking in Switzerland"]

In [26]:
UBS = Big_scraper(kw_list_1, kw_list_2,"UBS", "20190101", "20220331")

In [None]:
UBS.head()

In [None]:
# UBS_0 = Big_scraper(kw_list_1, kw_list_2,"UBS", "20210101", "20211231")

In [None]:
# UBS_1 = Big_scraper(kw_list_1, kw_list_2,"UBS", "20190101", "20191231")

In [None]:
# UBS_2 = Big_scraper(kw_list_1, kw_list_2,"UBS", "20200101", "20201231")

In [None]:
# UBS_3 = Big_scraper(kw_list_1, kw_list_2,"UBS", "20220101", "20220331")

In [None]:
# UBS_0.to_csv("2021_UBS_Data.csv")
# UBS_1.to_csv("2019_UBS_Data.csv")
# UBS_2.to_csv("2020_UBS_Data.csv")
# UBS_3.to_csv("2022_UBS_Data.csv")

In [None]:
UBS.to_csv("UBS_Full_Data.csv")

In [27]:
def Big_scraper(kw_list_1, kw_list_2, ticker, start,end):
    
    """
    Description:
    ------------
    
    The function initially grabs historical, indexed, hourly data for when the keyword 
    was searched most as shown on Google Trends' Interest Over Time section.
    It then cleans the data to show daily hits on the keyword in Google news.

    
    input:
    -----
    kw_list_1: List of up to 5 key words that will be scraped from google trends for the dates given.
             Here, the scraping will pull the total posted items in google news that contains
             one of the key words.
    
    kw_list_2: List of wikipedia article titles (unlimited length) that will pull the amount of
            views the article recieved each day. 

    ticker: the ticker abriviation of the desired stock. Must be netered in as an all capitalized string 
    example UBS Inc. woud be "UBS"
             
    start: the start of the desired timeline you want scrape. Date Must be entered in as "YYYYMMDD"
    
    end: the end of the desired timeline you want scrape. Date Must be entered in as "YYYYMMDD"
             
    return:
    -------
    
    combined: a dataframe containing the sum of the daily keyword hits in google news (key words labeled _x),
    
    data frame cointaing stock info including open, close, high, low prices of the stock,
    as well as the stocks daily trading volume and the amount if there was a split or dividend 
    preformed on the stock that day,
    
    and the sum of how many times key word wikipedia pages were viewed in a day (key words labeled _y)
    """
    
    year_s = int(start[0:4])
    month_s = int(start[4:6])
    day_s = int(start[6:8])
    year_e = int(end[0:4])
    month_e = int(end[4:6])
    day_e = int(end[6:8])
    
    starter = pd.to_datetime(f"'{year_s}-{month_s}-{day_s}'")
    ender = pd.to_datetime(f"'{year_e}-{month_e}-{day_e}'")
    
    dow = yf.Ticker("^DJI")
    dow_h = dow.history(start=starter, end=ender)
    dow_h = pd.DataFrame(dow_h)
    dow_names = {"Open":"dow_open","Close":"dow_close","Low": "dow_low",
    'High':'dow_high','Volume':'dow_vol'}
    dow_h=dow_h.rename(dow_names, axis=1).drop(["Dividends","Stock Splits"], axis=1)

    nas = yf.Ticker("^IXIC")
    nas_h = nas.history(start=starter, end=ender)
    nas_h = pd.DataFrame(nas_h)
    nas_names = {"Open":"nas_open", "Close":"nas_close", "Low": "nas_low",
    'High':'nas_high','Volume':'nas_vol'}
    nas_h=nas_h.rename(nas_names, axis=1).drop(["Dividends","Stock Splits"], axis=1)

    market = dow_h.merge(nas_h,left_index=True, right_index=True, how="left")
    
    return market

In [29]:
Nas_Dow = Big_scraper(kw_list_1, kw_list_2,"UBS", "20190101", "20220331")
Nas_Dow.head()

Unnamed: 0_level_0,dow_open,dow_high,dow_low,dow_close,dow_vol,nas_open,nas_high,nas_low,nas_close,nas_vol
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2019-01-02,23058.609375,23413.470703,22928.589844,23346.240234,321570000,6506.910156,6693.709961,6506.879883,6665.939941,2261800000
2019-01-03,23176.390625,23176.390625,22638.410156,22686.220703,424240000,6584.77002,6600.209961,6457.129883,6463.5,2607290000
2019-01-04,22894.919922,23518.640625,22894.919922,23433.160156,396020000,6567.140137,6760.689941,6554.240234,6738.859863,2579550000
2019-01-07,23474.259766,23687.740234,23301.589844,23531.349609,334200000,6757.529785,6855.600098,6741.399902,6823.470215,2507550000
2019-01-08,23680.320312,23864.650391,23581.449219,23787.449219,317420000,6893.439941,6909.580078,6795.859863,6897.0,2380290000


In [31]:
Nas_Dow.to_csv("Nas_Dow_Data.csv")

Raw data saved, now we will read it back in to start cleaning it.

## Data Clean and Shape

In [34]:
UBS = pd.read_csv("UBS_Full_Data.csv")
UBSS = pd.concat([UBS, Nas_Dow])
UBSS.head()

Unnamed: 0,date,UBS_x,UBS Financial Services Inc.,UBS Investment Bank,UBS Global Wealth Management,UBS Asset Management,Open,High,Low,Close,...,dow_open,dow_high,dow_low,dow_close,dow_vol,nas_open,nas_high,nas_low,nas_close,nas_vol
0,2019-01-01,0.0,0.0,0.0,0.0,0.0,,,,,...,,,,,,,,,,
1,2019-01-02,6.708333,0.0,0.0,0.0,0.0,10.378242,10.70018,10.282508,10.632403,...,,,,,,,,,,
2,2019-01-03,5.708333,0.0,0.0,0.0,0.0,10.522266,10.606987,10.465503,10.49685,...,,,,,,,,,,
3,2019-01-04,14.791667,0.0,0.0,0.0,0.0,10.734068,10.937397,10.687472,10.878093,...,,,,,,,,,,
4,2019-01-05,9.125,0.0,0.0,0.0,0.0,,,,,...,,,,,,,,,,


In [35]:
#converting the date column back to datetime 
UBSS.date = pd.to_datetime(UBSS.date)

In [36]:
from pandas.tseries.holiday import USFederalHolidayCalendar
from pandas.tseries.offsets import CustomBusinessDay

# create dateframe 2019-01-01 to 2022-03-31 which excludes weekends and holidays
us_bd = CustomBusinessDay(calendar=USFederalHolidayCalendar())
dates = pd.date_range(start='2019-01-01', end='2022-03-31', freq=us_bd)
dates = pd.DataFrame(dates)
dates = dates.rename(columns={0: "date"})

# Merge on new date range to remove weekends and holidays in netflix dataset
UBSS = dates.merge(UBSS, on="date", how="left")
UBSS = UBSS.set_index("date")
UBSS.head()

Unnamed: 0_level_0,UBS_x,UBS Financial Services Inc.,UBS Investment Bank,UBS Global Wealth Management,UBS Asset Management,Open,High,Low,Close,Volume,...,dow_open,dow_high,dow_low,dow_close,dow_vol,nas_open,nas_high,nas_low,nas_close,nas_vol
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-01-02,6.708333,0.0,0.0,0.0,0.0,10.378242,10.70018,10.282508,10.632403,3210100.0,...,,,,,,,,,,
2019-01-03,5.708333,0.0,0.0,0.0,0.0,10.522266,10.606987,10.465503,10.49685,2534700.0,...,,,,,,,,,,
2019-01-04,14.791667,0.0,0.0,0.0,0.0,10.734068,10.937397,10.687472,10.878093,2743500.0,...,,,,,,,,,,
2019-01-07,12.208333,0.0,0.0,0.0,0.0,10.878092,11.056005,10.823024,10.988229,3163300.0,...,,,,,,,,,,
2019-01-08,2.333333,0.0,0.0,0.0,0.0,11.047532,11.089893,10.945868,11.013644,2384500.0,...,,,,,,,,,,


In [37]:
# creating base varibles to be used in variable creator functions
UBSS["Wiki_total"] = (UBSS["UBS_x"] + 
    UBSS["Union Bank of Switzerland"] + UBSS["UBS tax evasion controversies"] + 
    UBSS["Banking in Switzerland"])

UBSS["Google_total"] = (UBSS["UBS_y"] +
    UBSS["UBS Financial Services Inc."] + UBSS["UBS Investment Bank"] +
    UBSS["UBS Global Wealth Management"] + UBSS["UBS Asset Management"])

UBSS["Stock_total"] = UBSS["Close"]
UBSS["Nas_total"] = UBSS["nas_close"]
UBSS["Dow_total"] = UBSS["dow_close"]

In [38]:
def variables_creator(df, variable_list, w=7):
    
    '''
    descrition:
    -----------
    takes dataframe and returns new varibles based on recommmended calcualtions, 
    it should be done seporately with internet based and stock based dataframes

    Note: When using for aggreated varaibles, for example Wiki_total, the sum of all the wiki pages daily page view counts, 
    you MUST calculate Wiki_total in the dataframe seperately BEFORE this function can be used. 

    For example if you had wiki page counts for Union Bank of Switzerland and UBS tax evasion controversies, Wiki_total would equal Union Bank of Switzerland + UBS tax evasion controversies counts, 
    AGAIN Wiki_total must be calculated in the desired dateframe before using this function 

    input:
    ------
    df: dataframe containing the google trends, yahoo finance, and or wikipedia page count data

    variable_list: list of strings to be added to the equations to calculate the new varaibles. 
    ex. insertting the string "Wiki" will add to df[f"{}_total"] to become "Wiki_total"

    w: the window length for one period shift. Default is 7 providing 7 day moving averages for wiki and google data, 
        FOR STOCK DATA THIS WILL NEED TO BE CHANGED TO 5.

    output:
    -------
    df: the same dataframe as was inputted but now containing variables for 
    Momemtum, Disparity, Moving Average, Exponential Moving Aerage, Rator Change, and RSI index score.
    Also containg are moving variables, which are boolean with 1 indicating an increase in the above variables
    '''   
    
    for i in variable_list:
        # Momentum_1
        df[f"{i}_Moment_1"] =  (df[f"{i}_total"] / df[f"{i}_total"].shift(w)) * 100
        # Momentum_2
        df[f"{i}_Moment_2"] =  (df[f"{i}_total"] - df[f"{i}_total"].shift(w)) * 100
        # Momentum_1_s three day shift (instead of w)
        df[f"{i}_Moment_1_s"] =  (df[f"{i}_total"] / df[f"{i}_total"].shift(3)) * 100
        # Momentum_2_s
        df[f"{i}_Moment_2_s"] =  (df[f"{i}_total"] - df[f"{i}_total"].shift(3)) * 100
        # Moving average
        df[f"{i}_MAvg"] = df[f"{i}_total"].rolling(f"{w}d").mean()
        # Moving average 3 day
        df[f"{i}_MAvg_s"] = df[f"{i}_total"].rolling("3d").mean()
        # Disparity
        df[f"{i}_Disparity"] = (df[f"{i}_total"]/df[f"{i}_MAvg"]) * 100
        # Disparity 3 day
        df[f"{i}_Disparity_s"] = (df[f"{i}_total"]/df[f"{i}_MAvg_s"]) * 100
        # Rate of Change Normal Way
        df[f"{i}_ROC"] = (df[f"{i}_total"]-df[f"{i}_total"].shift(w))/(df[f"{i}_total"].shift(w)) *100
        df[f"{i}_ROC_s"] = (df[f"{i}_total"]-df[f"{i}_total"].shift(3))/(df[f"{i}_total"].shift(3)) *100
        #Rate of Change Paper Way (doesn't make sense but just in case)
        df[f'{i}_Rocp'] = (df[f"{i}_total"]/df[f"{i}_Moment_2"]) *100
        # Exponential Moving Average
        df[f"{i}_EMA"] = (df[f"{i}_total"]-df[f"{i}_MAvg"].shift(1))*(2/(w+1))+df[f"{i}_MAvg"].shift(1)

        # calculating the Relative Strength Index, based on 14 day window
        df[f"{i}_diff"] = df[f"{i}_total"].diff(1)
        df[f"{i}_gain"] = df[f"{i}_diff"].clip(lower=0).round(2) #keeps all values above or below a given threshold, lower=lower bound
        df[f"{i}_loss"] = df[f"{i}_diff"].clip(upper=0).round(2)
        df[f'{i}_avg_gain'] = df[f'{i}_gain'].rolling(14).mean()
        df[f'{i}_avg_loss'] = df[f'{i}_loss'].rolling(14).mean()
        df[f'{i}_rs'] = df[f'{i}_avg_gain'] / df[f'{i}_avg_loss']
        df[f'{i}_RSI'] = 100 - (100 / (1.0 + df[f'{i}_rs']))

        # Calculatiing the Move Variables 
        df[f"{i}_Move"] = df[f"{i}_total"] > df[f"{i}_total"].shift(1) 
        df[f"{i}_Move"] = df[f"{i}_Move"].replace({True:1,False: 0})
    
      
        df[f"{i}_MAvg_Move"] = df[f"{i}_MAvg"] > df[f"{i}_MAvg"].shift(1) 
        df[f"{i}_MAvg_Move"] = df[f"{i}_MAvg_Move"].replace({True:1,False: 0})
        df[f"{i}_MAvg_s_Move"] = df[f"{i}_MAvg_s"] > df[f"{i}_MAvg_s"].shift(1) 
        df[f"{i}_MAvg_s_Move"] = df[f"{i}_MAvg_s_Move"].replace({True:1,False: 0})

        df[f"{i}_EMA_Move"] = df[f"{i}_EMA"] > df[f"{i}_EMA"].shift(1) 
        df[f"{i}_EMA_Move"] = df[f"{i}_EMA_Move"].replace({True:1,False: 0})

        df[f"{i}_Disparity_Move"] = df[f"{i}_Disparity"] > df[f"{i}_Disparity"].shift(1) 
        df[f"{i}_Disparity_Move"] = df[f"{i}_Disparity_Move"].replace({True:1,False: 0})
        df[f"{i}_Disparity_s_Move"] = df[f"{i}_Disparity_s"] > df[f"{i}_Disparity_s"].shift(1) 
        df[f"{i}_Disparity_s_Move"] = df[f"{i}_Disparity_s_Move"].replace({True:1,False: 0})

        df[f"{i}_RSI_Move"] = df[f"{i}_RSI"] > df[f"{i}_RSI"].shift(1) 
        df[f"{i}_RSI_Move"] = df[f"{i}_RSI_Move"].replace({True:1,False: 0})
        
    return df

In [39]:
UBSS = variables_creator(UBSS, ["Wiki","Google", "Stock", "Nas", "Dow"], 5)

In [40]:
def target_creator(df):
    '''
    description: creates the differnt types of target variables based on tomorrow minus today,
    '''
        
    # target 1, Open(t+1) - Close(t)
    df["target_1"] = (df["Open"].shift(-1) - df["Close"]) > 0
    df["target_1"] = df["target_1"].replace({True:1,False: 0})
    # target 2
    df["target_2"] = (df["Open"].shift(-1) - df["Open"]) > 0
    df["target_2"] = df["target_2"].replace({True:1,False: 0})
    # target 3
    df["target_3"] = (df["Close"].shift(-1) - df["Close"]) > 0
    df["target_3"] = df["target_3"].replace({True:1,False: 0})
    # target 4
    df["target_4"] = (df["Close"].shift(-1) - df["Open"]) > 0
    df["target_4"] = df["target_4"].replace({True:1,False: 0})
    # target 5
    df["target_5"] = (df["Volume"].shift(-1) - df["Volume"]) > 0
    df["target_5"] = df["target_5"].replace({True:1,False: 0})
 
    return df

In [41]:
UBSS = target_creator(UBSS)

In [42]:
pd.set_option('display.max_columns', None)
UBSS.head(30)

Unnamed: 0_level_0,UBS_x,UBS Financial Services Inc.,UBS Investment Bank,UBS Global Wealth Management,UBS Asset Management,Open,High,Low,Close,Volume,Dividends,Stock Splits,UBS_y,Union Bank of Switzerland,UBS tax evasion controversies,Banking in Switzerland,dow_open,dow_high,dow_low,dow_close,dow_vol,nas_open,nas_high,nas_low,nas_close,nas_vol,Wiki_total,Google_total,Stock_total,Nas_total,Dow_total,Wiki_Moment_1,Wiki_Moment_2,Wiki_Moment_1_s,Wiki_Moment_2_s,Wiki_MAvg,Wiki_MAvg_s,Wiki_Disparity,Wiki_Disparity_s,Wiki_ROC,Wiki_ROC_s,Wiki_Rocp,Wiki_EMA,Wiki_diff,Wiki_gain,Wiki_loss,Wiki_avg_gain,Wiki_avg_loss,Wiki_rs,Wiki_RSI,Wiki_Move,Wiki_MAvg_Move,Wiki_MAvg_s_Move,Wiki_EMA_Move,Wiki_Disparity_Move,Wiki_Disparity_s_Move,Wiki_RSI_Move,Google_Moment_1,Google_Moment_2,Google_Moment_1_s,Google_Moment_2_s,Google_MAvg,Google_MAvg_s,Google_Disparity,Google_Disparity_s,Google_ROC,Google_ROC_s,Google_Rocp,Google_EMA,Google_diff,Google_gain,Google_loss,Google_avg_gain,Google_avg_loss,Google_rs,Google_RSI,Google_Move,Google_MAvg_Move,Google_MAvg_s_Move,Google_EMA_Move,Google_Disparity_Move,Google_Disparity_s_Move,Google_RSI_Move,Stock_Moment_1,Stock_Moment_2,Stock_Moment_1_s,Stock_Moment_2_s,Stock_MAvg,Stock_MAvg_s,Stock_Disparity,Stock_Disparity_s,Stock_ROC,Stock_ROC_s,Stock_Rocp,Stock_EMA,Stock_diff,Stock_gain,Stock_loss,Stock_avg_gain,Stock_avg_loss,Stock_rs,Stock_RSI,Stock_Move,Stock_MAvg_Move,Stock_MAvg_s_Move,Stock_EMA_Move,Stock_Disparity_Move,Stock_Disparity_s_Move,Stock_RSI_Move,Nas_Moment_1,Nas_Moment_2,Nas_Moment_1_s,Nas_Moment_2_s,Nas_MAvg,Nas_MAvg_s,Nas_Disparity,Nas_Disparity_s,Nas_ROC,Nas_ROC_s,Nas_Rocp,Nas_EMA,Nas_diff,Nas_gain,Nas_loss,Nas_avg_gain,Nas_avg_loss,Nas_rs,Nas_RSI,Nas_Move,Nas_MAvg_Move,Nas_MAvg_s_Move,Nas_EMA_Move,Nas_Disparity_Move,Nas_Disparity_s_Move,Nas_RSI_Move,Dow_Moment_1,Dow_Moment_2,Dow_Moment_1_s,Dow_Moment_2_s,Dow_MAvg,Dow_MAvg_s,Dow_Disparity,Dow_Disparity_s,Dow_ROC,Dow_ROC_s,Dow_Rocp,Dow_EMA,Dow_diff,Dow_gain,Dow_loss,Dow_avg_gain,Dow_avg_loss,Dow_rs,Dow_RSI,Dow_Move,Dow_MAvg_Move,Dow_MAvg_s_Move,Dow_EMA_Move,Dow_Disparity_Move,Dow_Disparity_s_Move,Dow_RSI_Move,target_1,target_2,target_3,target_4,target_5
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1
2019-01-02,6.708333,0.0,0.0,0.0,0.0,10.378242,10.70018,10.282508,10.632403,3210100.0,0.0,0.0,1402.0,117.0,29.0,399.0,,,,,,,,,,,551.708333,1402.0,10.632403,,,,,,,551.708333,551.708333,100.0,100.0,,,,,,,,,,,,0,0,0,0,0,0,0,,,,,1402.0,1402.0,100.0,100.0,,,,,,,,,,,,0,0,0,0,0,0,0,,,,,10.632403,10.632403,100.0,100.0,,,,,,,,,,,,0,0,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,0,1,0,1,0
2019-01-03,5.708333,0.0,0.0,0.0,0.0,10.522266,10.606987,10.465503,10.49685,2534700.0,0.0,0.0,1556.0,126.0,15.0,421.0,,,,,,,,,,,567.708333,1556.0,10.49685,,,,,,,559.708333,559.708333,101.429316,101.429316,,,,557.041667,16.0,16.0,0.0,,,,,1,1,1,0,1,1,0,,,,,1479.0,1479.0,105.20622,105.20622,,,,1453.333333,154.0,154.0,0.0,,,,,1,1,1,0,1,1,0,,,,,10.564627,10.564627,99.358456,99.358456,,,,10.587219,-0.135553,0.0,-0.14,,,,,0,0,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,1,1,1,1,1
2019-01-04,14.791667,0.0,0.0,0.0,0.0,10.734068,10.937397,10.687472,10.878093,2743500.0,0.0,0.0,1467.0,125.0,29.0,398.0,,,,,,,,,,,566.791667,1467.0,10.878093,,,,,,,562.069444,562.069444,100.840149,100.840149,,,,562.069444,-0.916667,0.0,-0.92,,,,,0,1,1,1,0,0,0,,,,,1475.0,1475.0,99.457627,99.457627,,,,1475.0,-89.0,0.0,-89.0,,,,,0,0,0,1,0,0,0,,,,,10.669115,10.669115,101.958713,101.958713,,,,10.669115,0.381243,0.38,0.0,,,,,1,1,1,1,1,1,0,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,0,1,1,1,1
2019-01-07,12.208333,0.0,0.0,0.0,0.0,10.878092,11.056005,10.823024,10.988229,3163300.0,0.0,0.0,2017.0,130.0,37.0,418.0,,,,,,,,,,,597.208333,2017.0,10.988229,,,,,108.247111,4550.0,577.236111,597.208333,103.459974,100.0,,8.247111,,573.782407,30.416667,30.42,0.0,,,,,1,1,1,1,1,0,0,,,143.865906,61500.0,1680.0,2017.0,120.059524,100.0,,43.865906,,1655.666667,550.0,550.0,0.0,,,,,1,1,1,1,1,1,0,,,103.346613,35.582542,10.787724,10.988229,101.85864,100.0,,3.346613,,10.775487,0.110136,0.11,0.0,,,,,1,1,1,1,0,0,0,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,1,1,1,1,0
2019-01-08,2.333333,0.0,0.0,0.0,0.0,11.047532,11.089893,10.945868,11.013644,2384500.0,0.0,0.0,2023.0,146.0,39.0,422.0,,,,,,,,,,,609.333333,2023.0,11.013644,,,,,107.33211,4162.5,591.111111,603.270833,103.082707,101.004938,,7.33211,,587.935185,12.125,12.12,0.0,,,,,1,1,1,1,0,1,0,,,130.012853,46700.0,1835.666667,2020.0,110.205193,100.148515,,30.012853,,1794.333333,6.0,6.0,0.0,,,,,1,1,1,1,0,1,0,,,104.923327,51.67942,10.959989,11.000937,100.489559,100.115515,,4.923327,,10.863031,0.025415,0.03,0.0,,,,,1,1,1,1,0,1,0,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,1,1,0,0,1
2019-01-09,9.083333,0.0,0.0,0.0,0.0,11.081421,11.132253,10.95434,10.979756,3655200.0,0.0,0.0,2001.0,136.0,34.0,413.0,,,,,,,,,,,592.083333,2001.0,10.979756,,,107.318178,4037.5,104.462251,2529.166667,599.541667,599.541667,98.755994,98.755994,7.318178,4.462251,14.664603,591.435185,-17.25,0.0,-17.25,,,,,0,1,0,1,0,0,0,142.724679,59900.0,136.400818,53400.0,2013.666667,2013.666667,99.370965,99.370965,42.724679,36.400818,3.340568,1890.777778,-22.0,0.0,-22.0,,,,,0,1,0,1,0,0,0,103.266928,34.735298,100.934572,10.166359,10.993876,10.993876,99.871564,99.871564,3.266928,0.934572,31.609794,10.966578,-0.033888,0.0,-0.03,,,,,0,1,0,1,0,0,0,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,1,0,1,1,0
2019-01-10,6.416667,0.0,0.0,0.0,0.0,11.03059,11.15767,11.022117,11.098366,2622300.0,0.0,0.0,2017.0,116.0,22.0,423.0,,,,,,,,,,,567.416667,2017.0,11.098366,,,99.948624,-29.166667,95.011512,-2979.166667,591.510417,589.611111,95.926741,96.235749,-0.051376,-4.988488,-1945.428571,588.833333,-24.666667,0.0,-24.67,,,,,0,0,0,0,0,0,0,129.627249,46100.0,100.0,0.0,2014.5,2013.666667,100.1241,100.165536,29.627249,0.0,4.375271,2014.777778,16.0,16.0,0.0,,,,,1,1,0,1,1,1,0,105.730441,60.151577,101.002318,11.013699,11.019999,11.030589,100.711134,100.614446,5.730441,1.002318,18.450665,11.028706,0.118609,0.12,0.0,,,,,1,1,1,1,1,1,0,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,0,1,1,1,0
2019-01-11,13.166667,0.0,0.0,0.0,0.0,11.056006,11.233919,11.03059,11.166142,2160300.0,0.0,0.0,1629.0,120.0,28.0,308.0,,,,,,,,,,,469.166667,1629.0,11.166142,,,82.775858,-9762.5,76.996718,-14016.666667,567.041667,542.888889,82.739364,86.420385,-17.224142,-23.003282,-4.805805,550.729167,-98.25,0.0,-98.25,,,,,0,0,0,0,0,0,0,111.042945,16200.0,80.523974,-39400.0,1937.4,1882.333333,84.081759,86.541526,11.042945,-19.476026,10.055556,1886.0,-388.0,0.0,-388.0,,,,,0,0,0,0,0,0,0,102.64798,28.80497,101.38463,15.249825,11.049228,11.081422,101.058128,100.764531,2.64798,1.38463,38.764639,11.068713,0.067777,0.07,0.0,,,,,1,1,1,1,1,1,0,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,0,0,1,1,1
2019-01-14,16.041667,0.0,0.0,0.0,0.0,11.03906,11.293221,11.03906,11.233917,2759300.0,0.0,0.0,1591.0,147.0,25.0,427.0,,,,,,,,,,,615.041667,1591.0,11.233917,,,102.986116,1783.333333,103.877551,2295.833333,550.541667,615.041667,111.715735,100.0,2.986116,3.877551,34.488318,583.041667,145.875,145.88,0.0,,,,,1,0,1,1,1,1,0,78.879524,-42600.0,79.510245,-41000.0,1745.666667,1591.0,91.139966,100.0,-21.120476,-20.489755,-3.734742,1821.933333,-38.0,0.0,-38.0,,,,,0,0,0,0,1,1,0,102.235924,24.568844,102.314813,25.416088,11.166142,11.233917,100.606972,100.0,2.235924,2.314813,45.72424,11.110791,0.067775,0.07,0.0,,,,,1,1,1,1,0,0,0,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,0,1,0,1,0
2019-01-15,9.625,0.0,0.0,0.0,0.0,11.098366,11.208502,11.056006,11.20003,2680400.0,0.0,0.0,1664.0,148.0,29.0,468.0,,,,,,,,,,,654.625,1664.0,11.20003,,,107.432987,4529.166667,115.369364,8720.833333,579.611111,634.833333,112.942107,103.117616,7.432987,15.369364,14.453542,585.236111,39.583333,39.58,0.0,,,,,1,1,1,1,1,1,0,82.254078,-35900.0,82.498761,-35300.0,1628.0,1627.5,102.211302,102.242704,-17.745922,-17.501239,-4.635097,1718.444444,73.0,73.0,0.0,,,,,1,0,1,0,1,1,0,101.69232,18.638611,100.916032,10.166454,11.20003,11.216974,100.000003,99.848948,1.69232,0.916032,60.090478,11.177438,-0.033887,0.0,-0.03,,,,,0,1,0,1,0,0,0,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,1,1,1,1,0


In [49]:
UBSS.to_csv("UBS_Cleaned_Date.csv")

## Initial Variable Slection 

In [47]:
plt.figure(figsize=(22, 22))
sns.heatmap(
    UBSS.corr(),
    linewidths=0.1,
    cmap='RdBu',
)

NameError: name 'sns' is not defined

<Figure size 1584x1584 with 0 Axes>

# Early Model Work 

In [50]:
import tensorflow as tf
import tensorflow.keras as keras
import tensorflow.keras.layers as layers
import matplotlib.pyplot as plt
import numpy as np
from tensorflow.keras.layers import Dense, Normalization
%matplotlib inline

In [51]:
UBSS = pd.read_csv("UBS_Cleaned_Date.csv")
UBSS.date = pd.to_datetime(UBS.date)
UBSS = UBSS.set_index("date")

In [52]:
UBSS.head()

Unnamed: 0_level_0,UBS_x,UBS Financial Services Inc.,UBS Investment Bank,UBS Global Wealth Management,UBS Asset Management,Open,High,Low,Close,Volume,Dividends,Stock Splits,UBS_y,Union Bank of Switzerland,UBS tax evasion controversies,Banking in Switzerland,dow_open,dow_high,dow_low,dow_close,dow_vol,nas_open,nas_high,nas_low,nas_close,nas_vol,Wiki_total,Google_total,Stock_total,Nas_total,Dow_total,Wiki_Moment_1,Wiki_Moment_2,Wiki_Moment_1_s,Wiki_Moment_2_s,Wiki_MAvg,Wiki_MAvg_s,Wiki_Disparity,Wiki_Disparity_s,Wiki_ROC,Wiki_ROC_s,Wiki_Rocp,Wiki_EMA,Wiki_diff,Wiki_gain,Wiki_loss,Wiki_avg_gain,Wiki_avg_loss,Wiki_rs,Wiki_RSI,Wiki_Move,Wiki_MAvg_Move,Wiki_MAvg_s_Move,Wiki_EMA_Move,Wiki_Disparity_Move,Wiki_Disparity_s_Move,Wiki_RSI_Move,Google_Moment_1,Google_Moment_2,Google_Moment_1_s,Google_Moment_2_s,Google_MAvg,Google_MAvg_s,Google_Disparity,Google_Disparity_s,Google_ROC,Google_ROC_s,Google_Rocp,Google_EMA,Google_diff,Google_gain,Google_loss,Google_avg_gain,Google_avg_loss,Google_rs,Google_RSI,Google_Move,Google_MAvg_Move,Google_MAvg_s_Move,Google_EMA_Move,Google_Disparity_Move,Google_Disparity_s_Move,Google_RSI_Move,Stock_Moment_1,Stock_Moment_2,Stock_Moment_1_s,Stock_Moment_2_s,Stock_MAvg,Stock_MAvg_s,Stock_Disparity,Stock_Disparity_s,Stock_ROC,Stock_ROC_s,Stock_Rocp,Stock_EMA,Stock_diff,Stock_gain,Stock_loss,Stock_avg_gain,Stock_avg_loss,Stock_rs,Stock_RSI,Stock_Move,Stock_MAvg_Move,Stock_MAvg_s_Move,Stock_EMA_Move,Stock_Disparity_Move,Stock_Disparity_s_Move,Stock_RSI_Move,Nas_Moment_1,Nas_Moment_2,Nas_Moment_1_s,Nas_Moment_2_s,Nas_MAvg,Nas_MAvg_s,Nas_Disparity,Nas_Disparity_s,Nas_ROC,Nas_ROC_s,Nas_Rocp,Nas_EMA,Nas_diff,Nas_gain,Nas_loss,Nas_avg_gain,Nas_avg_loss,Nas_rs,Nas_RSI,Nas_Move,Nas_MAvg_Move,Nas_MAvg_s_Move,Nas_EMA_Move,Nas_Disparity_Move,Nas_Disparity_s_Move,Nas_RSI_Move,Dow_Moment_1,Dow_Moment_2,Dow_Moment_1_s,Dow_Moment_2_s,Dow_MAvg,Dow_MAvg_s,Dow_Disparity,Dow_Disparity_s,Dow_ROC,Dow_ROC_s,Dow_Rocp,Dow_EMA,Dow_diff,Dow_gain,Dow_loss,Dow_avg_gain,Dow_avg_loss,Dow_rs,Dow_RSI,Dow_Move,Dow_MAvg_Move,Dow_MAvg_s_Move,Dow_EMA_Move,Dow_Disparity_Move,Dow_Disparity_s_Move,Dow_RSI_Move,target_1,target_2,target_3,target_4,target_5
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1
2019-01-01,6.708333,0.0,0.0,0.0,0.0,10.378242,10.70018,10.282508,10.632403,3210100.0,0.0,0.0,1402.0,117.0,29.0,399.0,,,,,,,,,,,551.708333,1402.0,10.632403,,,,,,,551.708333,551.708333,100.0,100.0,,,,,,,,,,,,0,0,0,0,0,0,0,,,,,1402.0,1402.0,100.0,100.0,,,,,,,,,,,,0,0,0,0,0,0,0,,,,,10.632403,10.632403,100.0,100.0,,,,,,,,,,,,0,0,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,0,1,0,1,0
2019-01-02,5.708333,0.0,0.0,0.0,0.0,10.522266,10.606987,10.465503,10.49685,2534700.0,0.0,0.0,1556.0,126.0,15.0,421.0,,,,,,,,,,,567.708333,1556.0,10.49685,,,,,,,559.708333,559.708333,101.429316,101.429316,,,,557.041667,16.0,16.0,0.0,,,,,1,1,1,0,1,1,0,,,,,1479.0,1479.0,105.20622,105.20622,,,,1453.333333,154.0,154.0,0.0,,,,,1,1,1,0,1,1,0,,,,,10.564627,10.564627,99.358456,99.358456,,,,10.587219,-0.135553,0.0,-0.14,,,,,0,0,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,1,1,1,1,1
2019-01-03,14.791667,0.0,0.0,0.0,0.0,10.734068,10.937397,10.687472,10.878093,2743500.0,0.0,0.0,1467.0,125.0,29.0,398.0,,,,,,,,,,,566.791667,1467.0,10.878093,,,,,,,562.069444,562.069444,100.840149,100.840149,,,,562.069444,-0.916667,0.0,-0.92,,,,,0,1,1,1,0,0,0,,,,,1475.0,1475.0,99.457627,99.457627,,,,1475.0,-89.0,0.0,-89.0,,,,,0,0,0,1,0,0,0,,,,,10.669115,10.669115,101.958713,101.958713,,,,10.669115,0.381243,0.38,0.0,,,,,1,1,1,1,1,1,0,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,0,1,1,1,1
2019-01-04,12.208333,0.0,0.0,0.0,0.0,10.878092,11.056005,10.823024,10.988229,3163300.0,0.0,0.0,2017.0,130.0,37.0,418.0,,,,,,,,,,,597.208333,2017.0,10.988229,,,,,108.247111,4550.0,577.236111,597.208333,103.459974,100.0,,8.247111,,573.782407,30.416667,30.42,0.0,,,,,1,1,1,1,1,0,0,,,143.865906,61500.0,1680.0,2017.0,120.059524,100.0,,43.865906,,1655.666667,550.0,550.0,0.0,,,,,1,1,1,1,1,1,0,,,103.346613,35.582542,10.787724,10.988229,101.85864,100.0,,3.346613,,10.775487,0.110136,0.11,0.0,,,,,1,1,1,1,0,0,0,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,1,1,1,1,0
2019-01-05,2.333333,0.0,0.0,0.0,0.0,11.047532,11.089893,10.945868,11.013644,2384500.0,0.0,0.0,2023.0,146.0,39.0,422.0,,,,,,,,,,,609.333333,2023.0,11.013644,,,,,107.33211,4162.5,591.111111,603.270833,103.082707,101.004938,,7.33211,,587.935185,12.125,12.12,0.0,,,,,1,1,1,1,0,1,0,,,130.012853,46700.0,1835.666667,2020.0,110.205193,100.148515,,30.012853,,1794.333333,6.0,6.0,0.0,,,,,1,1,1,1,0,1,0,,,104.923327,51.67942,10.959989,11.000937,100.489559,100.115515,,4.923327,,10.863031,0.025415,0.03,0.0,,,,,1,1,1,1,0,1,0,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,1,1,0,0,1


In [None]:
# UBS 1, Basic Varaibles,  
UBS_1 = UBSS[UBSS.columns.drop(list(UBSS.filter(regex='Move')))]
UBS_1 = UBS_1.iloc[14:]

UBS_1 = UBS_1[["target_1", "Volume", 
                "Wiki_total", "Google_total", 
                "nas_close", "dow_close",
                 "Close", "nas_vol", 
                 "dow_vol", "Stock_diff",
                 ]]
UBS_1.head()  

Unnamed: 0_level_0,target_1,Volume,Wiki_total,Google_total,nas_close,dow_close,Close,nas_vol,dow_vol,Stock_diff
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2019-01-23,0,45196900.0,14836.0,19.5,7025.77002,24575.619141,7.56201,2274420000.0,318600000.0,-0.145074
2019-01-24,1,79516400.0,15219.0,32.625,7073.459961,24553.240234,7.797754,2400290000.0,320170000.0,0.235744
2019-01-25,0,53098800.0,14645.0,35.5,7164.859863,24737.199219,8.033502,2440840000.0,376890000.0,0.235747
2019-01-28,1,42116300.0,14390.0,21.958333,7085.680176,24528.220703,7.852159,2435480000.0,347170000.0,-0.181343
2019-01-29,1,30485000.0,14412.0,24.75,7028.290039,24579.960938,7.94283,2089690000.0,330870000.0,0.090671


In [None]:
#splitting into training sets 
n = len(UBS_1)
train_f1 = UBS_1.loc["2019-1-1":'2021-6-30']
val_f1 = UBS_1.loc['2021-7-1':'2021-12-31']
test_f1 = UBS_1.loc['2022-1-1':]

num_features = UBS_1.shape[1]

In [None]:
#normalizing the data, may come back later to take normalization with moving averages to avoid allwoing
# the training set to have access to futre traing data
train_f1_mean = train_f1.mean()
train_f1_std = train_f1.std()

train_df = (train_f1 - train_f1_mean) / train_f1_std
val_df = (val_f1 - train_f1_mean) / train_f1_std
test_df = (test_f1 - train_f1_mean) / train_f1_std

In [None]:
T_input = 60  # see below -- just a number for now

In [None]:
model_f1 = keras.Sequential([
    layers.Conv1D(32, 7, activation="relu", input_shape = [32, 5, 9]),
    layers.Conv1D(32, 7, activation="relu"),
    layers.MaxPooling1D(),
    layers.Flatten(),
    layers.Dense(30, activation="relu"),
    layers.Dense(1),
])
model_f1.compile(optimizer="adam", loss='categorical_crossentropy',
                   metrics=["accuracy"])

ValueError: One of the dimensions in the output is <= 0 due to downsampling in conv1d_10. Consider increasing the input size. Received input shape [None, 32, 5, 9] which would produce output shape with a zero or negative value in a dimension.

In [None]:
# model_f1 data prep
X_train=train_df.drop("target_1", axis=1)
X_val=val_df.drop("target_1", axis=1)
X_test=test_df.drop("target_1", axis=1)

y_train=train_df["target_1"]
y_val=val_df["target_1"]
y_test=test_df["target_1"]


In [None]:

train_dataset = keras.preprocessing.timeseries_dataset_from_array(
    X_train, 
    y_train,  
    5
)
test_dataset = keras.preprocessing.timeseries_dataset_from_array(
    X_val, 
    y_val,  
    5
)

In [None]:
x_t = list(train_dataset)[0][0].numpy()
y_t = list(train_dataset)[0][1].numpy()
x_t

array([[[-0.50216695,  0.22066315, -0.88809277, ..., -0.81250273,
         -0.25732813, -0.70113182],
        [ 0.46875738,  0.33095479,  1.14319888, ..., -0.73529334,
         -0.24465995,  1.00666459],
        [-0.27861629,  0.16566131,  1.58814848, ..., -0.71041973,
          0.21300844,  1.00667742],
        [-0.58931943,  0.09222954, -0.50762862, ..., -0.71370758,
         -0.02679947, -0.86377808],
        [-0.91837761,  0.09856483, -0.07557611, ..., -0.92581719,
         -0.15832265,  0.35607742]],

       [[ 0.46875738,  0.33095479,  1.14319888, ..., -0.73529334,
         -0.24465995,  1.00666459],
        [-0.27861629,  0.16566131,  1.58814848, ..., -0.71041973,
          0.21300844,  1.00667742],
        [-0.58931943,  0.09222954, -0.50762862, ..., -0.71370758,
         -0.02679947, -0.86377808],
        [-0.91837761,  0.09856483, -0.07557611, ..., -0.92581719,
         -0.15832265,  0.35607742],
        [-0.93475513, -0.09264575, -1.08154911, ..., -0.64711011,
          0.37

In [None]:
model_f1.fit(x_t,y_t,epochs = 10)

Epoch 1/10


ValueError: in user code:

    File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/keras/engine/training.py", line 1021, in train_function  *
        return step_function(self, iterator)
    File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/keras/engine/training.py", line 1010, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/keras/engine/training.py", line 1000, in run_step  **
        outputs = model.train_step(data)
    File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/keras/engine/training.py", line 859, in train_step
        y_pred = self(x, training=True)
    File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 67, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/keras/engine/input_spec.py", line 264, in assert_input_compatibility
        raise ValueError(f'Input {input_index} of layer "{layer_name}" is '

    ValueError: Input 0 of layer "sequential_3" is incompatible with the layer: expected shape=(None, 60, 1), found shape=(32, 5, 9)
