# ANN Model

In [1]:
import numpy as np
import pandas as pd
from numpy.random import seed # Set random seed
seed(1)

# Target_1

In [2]:
df = pd.read_csv("UBS_Cleaned_Date.csv").iloc[2:, :].set_index("date")

### Select columns from data set

In [3]:
# Basic Varaibles
df = df[["Change_Close",
            "Wiki_RSI_Move", "Dow_Disparity_Move", 
            "Wiki_MAvg_Move", "Nas_Move",
            "Google_rs", "Google_ROC_Move", 
            "Nas_Disparity_Move", "Google_Disparity_s_Move",
            "Nas_Disparity_s_Move", "Google_gain",
            "Dow_Disparity_s_Move", "Google_RSI_Move", "Google_diff",
            "Dow_Move", "Google_Moment_2_s", "Dow_Rocp", "Nas_gain",
            "Wiki_Disparity_s", "Nas_MAvg_s_Move", "Google_Move",
            "Google_Disparity_s", "Google_total", "UBS_y", "Google_Rocp",
            "target_1"
            ]]

In [4]:
df = df.dropna() # to remove NaN
df = df[~(df.isin([np.inf, -np.inf]).any(axis=1))] # to remove inf

### Split data set into independent and dependent variables

In [5]:
X = df.iloc[:, :-1]
Y = df.iloc[:, -1]

### Split data set into training/test set

In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 0)

### Feature scaling

In [7]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = pd.DataFrame(sc.fit_transform(X_train), columns = X_train.columns)
X_test = pd.DataFrame(sc.transform(X_test), columns = X_test.columns)

### Feature selection (remove highly correlated features)

In [9]:
from feature_selector import FeatureSelector
n = len(X_train.T)
fs = FeatureSelector(data = X_train)
fs.identify_collinear(correlation_threshold = 0.7) # select features from training set
corr = fs.ops['collinear']
X_train = fs.remove(methods = ['collinear']) # remove selected features from training set
to_remove = pd.unique(fs.record_collinear['drop_feature']) # features to remove
X_test = X_test.drop(columns = to_remove) # remove selected features from test set

ImportError: attempted relative import with no known parent package

### Create the artificial neural network

In [None]:
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout

In [None]:
num_input_nodes = len(X_train.T)
num_output_nodes = 1
num_hidden_nodes = int((num_input_nodes + num_output_nodes) / 2) # a typical value

### Add layers

In [None]:
classifier = Sequential()
classifier.add(Dense(output_dim = num_hidden_nodes, init = "uniform", activation = "sigmoid",
                     input_dim = num_input_nodes))
classifier.add(Dense(output_dim = num_hidden_nodes, init = "uniform", activation = "sigmoid"))

TypeError: __init__() missing 1 required positional argument: 'units'

In [None]:
# Use sigmoid activation function for the output layer because we're predicting 
# a probability that the stock price will go up

classifier.add(Dense(output_dim = num_output_nodes, init = "uniform", activation = "sigmoid"))

NameError: name 'classifier' is not defined

### Compile and train the ANN

In [None]:
classifier.compile(optimizer = "adam", loss = "binary_crossentropy", metrics = ["accuracy"])
classifier.fit(X_train, Y_train, batch_size = 10, nb_epoch = 25)

### Make predictions

In [None]:
Y_test_pred_prob = classifier.predict(X_test)
Y_test_pred = (Y_test_pred_prob >= 0.5)
Y_train_pred_prob = classifier.predict(X_train)
Y_train_pred = (Y_train_pred_prob >= 0.5)

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
def metrics(actual, pred):
    print("Accuracy:    ", round(accuracy_score(actual, pred) * 100, 2), "%")
    print("Precision:   ", round(precision_score(actual, pred) * 100, 2), "%")
    print("Recall:      ", round(recall_score(actual, pred) * 100, 2), "%")
    print("F1 Score:    ", round(f1_score(actual, pred) * 100, 2), "%")

In [None]:
print("Test set =============")
metrics(Y_test, Y_test_pred)
print("")
print("Train set ============")
metrics(Y_train, Y_train_pred)