In [None]:
!pip install yfinance

In [1]:
import warnings
warnings.filterwarnings('ignore')

import yfinance as yf
import pandas as pd
import numpy as np
import datetime
import project_functions2 as pf

from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
tf.test.gpu_device_name()

In [None]:
from tensorflow.python.client import device_lib
device_lib.list_local_devices()

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# These top two lines are needed to produce altair plots on google colab
# Comment these two lines out if you are running locally
!pip install altair_data_server
alt.data_transformers.enable('data_server')

# These bottem two lines are needed tto produce altair plots on local machine
# Comment these two line out to run on google colab
#alt.renderers.enable('default')
#alt.data_transformers.enable('json')

In [None]:
#stock_list = ['AMZN', 'AAPL', 'FB','GOOGL', 'MSFT', 'TSLA']
stock_list = ['AMZN', 'AAPL', 'FB','GOOG', 'TSLA']
stock_objects = {}
for stock in stock_list:
    stock_objects[stock] = yf.Ticker(stock)

In [None]:
stock_sentiments = {}
for key in stock_objects:
    stock_sentiments[key] = pd.read_csv('/content/drive/MyDrive/SENG474_Project/data/sentiment/investing_'+key+'_sentiment.csv')
    stock_sentiments[key].set_index('date', inplace=True)

In [None]:
def combiner(stock_dfs):
    combine_df = None

    for key in stock_dfs:
        if combine_df is not None:
            combine_df = pd.concat([combine_df, stock_dfs[key]])
        else:
            combine_df = stock_dfs[key]

    combine_df.sort_values(by=['Date'], inplace=True)
    return combine_df

In [None]:
def feature_reduction_nn(stock_objects, split_time):
    scaler = MinMaxScaler()
    drop_list = []
    i = 4
    
    stock_dfs = {}
    for key in stock_objects:
        stock_dfs[key] = stock_objects[key].history(period='max')

    for key in stock_dfs:
        stock_dfs[key] = pf.date_time_prep(stock_dfs[key]) 
        stock_dfs[key] = pf.rolling_aves(stock_dfs[key])
        stock_dfs[key] = stock_dfs[key].merge(stock_sentiments[key], how='left', left_index=True, right_index=True)
        stock_dfs[key].fillna(0, inplace=True)
        stock_dfs[key] = pf.future_close_setup(stock_dfs[key], 5)
        
    combine_df = pf.combiner(stock_dfs)
    
    X = combine_df.iloc[:,:-1]
    y = combine_df.iloc[:,-1:]
    
    split_mark = int(len(combine_df)-(split_time*len(stock_dfs)))
    X_train = X.head(split_mark)
    X_test = X.tail(len(combine_df) - split_mark)
    y_train = y.head(split_mark)
    y_test = y.tail(len(combine_df) - split_mark)
    
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    early_stopping = EarlyStopping(monitor='loss',
                                   patience=3, restore_best_weights=True)
    
    np.random.seed(45)
    stock_model = Sequential()
    stock_model.add(Dense(units=500, input_dim=X_train_scaled.shape[1], activation='relu'))
    stock_model.add(Dense(units=500, activation='relu'))
    stock_model.add(Dense(units=500, activation='relu'))
    stock_model.add(Dense(units=1, activation='relu'))
    stock_model.compile(optimizer = 'adam', loss = 'mean_squared_error')
    stock_model.fit(X_train_scaled, y_train, epochs=60, batch_size=32, verbose=1,
                    workers=-1, callbacks=[early_stopping])
    stock_close_pred = stock_model.predict(X_test_scaled)
    max_score = r2_score(y_test.dropna(), stock_close_pred[:len(y_test.dropna())])
    
    total_cols = len(combine_df.columns)

    while i + len(drop_list) < total_cols:
        print('Max Score: ' + str(max_score))
        stock_dfs = {}
        for key in stock_objects:
            stock_dfs[key] = stock_objects[key].history(period='max')
    
        for key in stock_dfs:
            stock_dfs[key] = pf.date_time_prep(stock_dfs[key]) 
            stock_dfs[key] = pf.rolling_aves(stock_dfs[key])
            stock_dfs[key] = stock_dfs[key].merge(stock_sentiments[key], how='left', left_index=True, right_index=True)
            stock_dfs[key].fillna(0, inplace=True)
            stock_dfs[key] = pf.future_close_setup(stock_dfs[key], 5)
            
        combine_df = pf.combiner(stock_dfs)
        
        if len(drop_list) > 0:
            combine_df.drop(drop_list, inplace=True, axis=1)
            
        curr_col = combine_df.columns[i]
        combine_df.drop(curr_col, inplace=True, axis=1)
            
        X = combine_df.iloc[:,:-1]
        y = combine_df.iloc[:,-1:]
        
        split_mark = int(len(combine_df)-(split_time*len(stock_dfs)))
        X_train = X.head(split_mark)
        X_test = X.tail(len(combine_df) - split_mark)
        y_train = y.head(split_mark)
        y_test = y.tail(len(combine_df) - split_mark)
        
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)
        
        early_stopping = EarlyStopping(monitor='loss',
                                       patience=3, restore_best_weights=True)
        
        np.random.seed(45)
        stock_model = Sequential()
        stock_model.add(Dense(units=500, input_dim=X_train_scaled.shape[1], activation='relu'))
        stock_model.add(Dense(units=500, activation='relu'))
        stock_model.add(Dense(units=500, activation='relu'))
        stock_model.add(Dense(units=1, activation='relu'))
        stock_model.compile(optimizer = 'adam', loss = 'mean_squared_error')
        stock_model.fit(X_train_scaled, y_train, epochs=60, batch_size=32, verbose=1,
                        workers=-1, callbacks=[early_stopping])
        stock_close_pred = stock_model.predict(X_test_scaled)
        curr_score = r2_score(y_test.dropna(), stock_close_pred[:len(y_test.dropna())])
        
        if curr_score >= max_score:
            drop_list.append(curr_col)
            max_score = curr_score
        else:
            i += 1
    
    return max_score, drop_list

In [None]:
max_score, drop_list = feature_reduction(stock_objects, 365)

In [None]:
max_score

In [None]:
drop_list

In [None]:
def feature_reduction2_nn(stock_objects, split_time):
    scaler = MinMaxScaler()
    drop_list = []
    i = 0
    drop_flag = True
    drop_col = ''
    
    stock_dfs = {}
    for key in stock_objects:
        stock_dfs[key] = stock_objects[key].history(period='max')

    for key in stock_dfs:
        stock_dfs[key] = pf.date_time_prep(stock_dfs[key]) 
        stock_dfs[key] = pf.rolling_aves(stock_dfs[key])
        stock_dfs[key] = stock_dfs[key].merge(stock_investing[key], how='left', left_index=True, right_index=True)
        stock_dfs[key] = stock_dfs[key].merge(stock_stocks[key], how='left', left_index=True, right_index=True)
        stock_dfs[key].fillna(0, inplace=True)
        stock_dfs[key] = pf.future_close_setup(stock_dfs[key], 5)
        
    combine_df = pf.combiner(stock_dfs)
    
    X = combine_df.iloc[:,:-1]
    y = combine_df.iloc[:,-1:]
    
    split_mark = int(len(combine_df)-(split_time*len(stock_dfs)))
    X_train = X.head(split_mark)
    X_test = X.tail(len(combine_df) - split_mark)
    y_train = y.head(split_mark)
    y_test = y.tail(len(combine_df) - split_mark)
    
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    early_stopping = EarlyStopping(monitor='loss',
                                   patience=3, restore_best_weights=True)
    
    np.random.seed(45)
    tf.random.set_seed(45)
    stock_model = Sequential()
    stock_model.add(Dense(units=500, input_dim=X_train_scaled.shape[1], activation='relu'))
    stock_model.add(Dense(units=500, activation='relu'))
    stock_model.add(Dense(units=500, activation='relu'))
    stock_model.add(Dense(units=1, activation='relu'))
    stock_model.compile(optimizer = 'adam', loss = 'mean_squared_error')
    stock_model.fit(X_train_scaled, y_train, epochs=60, batch_size=32, verbose=0,
                    workers=-1, callbacks=[early_stopping])
    stock_close_pred = stock_model.predict(X_test_scaled)
    max_score = r2_score(y_test.dropna(), stock_close_pred[:len(y_test.dropna())])
    
    total_cols = len(combine_df.columns)

    while drop_flag:
        print('Max Score: ' + str(max_score))
        print('Drop List: ' + str(drop_list))
        i = 0
        drop_flag = False
        
        while i + len(drop_list) < total_cols:
            stock_dfs = {}
            
            for key in stock_objects:
                stock_dfs[key] = stock_objects[key].history(period='max')
        
            for key in stock_dfs:
                stock_dfs[key] = pf.date_time_prep(stock_dfs[key]) 
                stock_dfs[key] = pf.rolling_aves(stock_dfs[key])
                stock_dfs[key] = stock_dfs[key].merge(stock_investing[key], how='left', left_index=True, right_index=True)
                stock_dfs[key] = stock_dfs[key].merge(stock_stocks[key], how='left', left_index=True, right_index=True)
                stock_dfs[key].fillna(0, inplace=True)
                stock_dfs[key] = pf.future_close_setup(stock_dfs[key], 5)
                
            combine_df = pf.combiner(stock_dfs)
            
            if len(drop_list) > 0:
                combine_df.drop(drop_list, inplace=True, axis=1)
                
            curr_col = combine_df.columns[i]
            combine_df.drop(curr_col, inplace=True, axis=1)
                
            X = combine_df.iloc[:,:-1]
            y = combine_df.iloc[:,-1:]
            
            split_mark = int(len(combine_df)-(split_time*len(stock_dfs)))
            X_train = X.head(split_mark)
            X_test = X.tail(len(combine_df) - split_mark)
            y_train = y.head(split_mark)
            y_test = y.tail(len(combine_df) - split_mark)
            
            X_train_scaled = scaler.fit_transform(X_train)
            X_test_scaled = scaler.transform(X_test)
            
            stock_model = Sequential()
            stock_model.add(Dense(units=500, input_dim=X_train_scaled.shape[1], activation='relu'))
            stock_model.add(Dense(units=500, activation='relu'))
            stock_model.add(Dense(units=500, activation='relu'))
            stock_model.add(Dense(units=1, activation='relu'))
            stock_model.compile(optimizer = 'adam', loss = 'mean_squared_error')
            stock_model.fit(X_train_scaled, y_train, epochs=60, batch_size=32, verbose=0,
                            workers=-1, callbacks=[early_stopping])
            stock_close_pred = stock_model.predict(X_test_scaled)
            curr_score = r2_score(y_test.dropna(), stock_close_pred[:len(y_test.dropna())])
            
            if curr_score >= max_score:
                drop_col = curr_col
                max_score = curr_score
                drop_flag = True
            
            i += 1
        drop_list.append(drop_col)
    
    return max_score, drop_list