In [None]:
"""Sell Side Recommendation Model

Build a classification model that replaces the sell-side equity analyst team covering 
a particular stock or index

* Output a buy, sell, or hold reccomendation by producing a model that shows 
the projected price vs. the street consensus (average of the major wall street bank 
price targets vs. our price target)"""

In [None]:
"""Look at a variety of factors that could potentially predict the price action
of the stock:
* Macro: Yield curve, interest rates 
* Fundamental: EBITDA, EPS, revenue
* Technical: Closing prices, moving averages, VWAP
* Sentiment: Sentiment analysis thru NLP """

In [None]:
"""ML MODELS WE WILL USE
* random forests
* logistic regression
* neural network """

In [None]:
"""If the backtested model shows that the stock will move in a certain way, we will use a linear regression
of the price to report the anticipated price action (mix of both quantitative and qualitative factors)
"""

In [None]:
"""Finally, incorporate a UI through Streamlit"""

In [None]:
# Build a Neural Network using these factors to create a multi-classification model
# to determine buy, sell, or hold

In [1]:
# Imports
import pandas as pd
from pathlib import Path
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder

KeyboardInterrupt: 

In [8]:
import pandas as pd
target = pd.DataFrame(['B', 'H', 'H', 'S', 'S', 'B'])
pd.get_dummies(target, prefix = "")

Unnamed: 0,_B,_H,_S
0,1,0,0
1,0,1,0
2,0,1,0
3,0,0,1
4,0,0,1
5,1,0,0


In [None]:
# Read the applicants_data.csv file from the Resources folder into a Pandas DataFrame
applicant_data_df = df = pd.read_csv(
    Path("Resources/applicants_data.csv")
)


# Review the DataFrame
applicant_data_df

"""FIT TO OUR DATASET ONCE WE HAVE DATA READY"""

In [None]:
# Review the data types associated with the columns
print(applicant_data_df.dtypes)

In [None]:
# Drop the 'EIN' and 'NAME' columns from the DataFrame
applicant_data_df = applicant_data_df.drop(columns = ["EIN", "NAME"])

# Review the DataFrame
applicant_data_df

"""FIT TO OUR DATASET ONCE WE HAVE DATA READY"""

In [None]:
# Create a list of categorical variables 
categorical_variables = applicant_data_df.select_dtypes(include = ["object", "category"]).columns.tolist()

# Display the categorical variables list
categorical_variables

"""FIT TO OUR DATASET ONCE WE HAVE DATA READY"""

In [None]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse_output = False)

In [None]:
# Encode the categorcal variables using OneHotEncoder
encoded_data = enc.fit_transform(applicant_data_df[categorical_variables])

In [None]:
# Create a DataFrame with the encoded variables
encoded_df = pd.DataFrame(encoded_data, columns = enc.get_feature_names_out(categorical_variables))

# Review the DataFrame
encoded_df

In [None]:
# Add the numerical variables from the original DataFrame to the one-hot encoding DataFrame
numerical_variables = applicant_data_df.select_dtypes(["int64", "float64"])
encoded_df = pd.concat([encoded_df, numerical_variables], axis = 1)

# Review the Dataframe
encoded_df

In [None]:
# Define the target set y using the IS_SUCCESSFUL column
y = encoded_df["IS_SUCCESSFUL"]

# Display a sample of y
y

"""FIT TO OUR DATASET ONCE WE HAVE DATA READY"""

In [None]:
# Define features set X by selecting all columns but IS_SUCCESSFUL
X = encoded_df.drop(columns = "IS_SUCCESSFUL")

# Review the features DataFrame
X

"""FIT TO OUR DATASET ONCE WE HAVE DATA READY"""

In [None]:
# Split the preprocessed data into a training and testing dataset
# Assign the function a random_state equal to 1
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 1)

In [None]:
# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the scaler to the features training dataset
X_scaler = scaler.fit(X_train)

# Fit the scaler to the features training dataset
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
# Define the the number of inputs (features) to the model
number_input_features = number_input_features = X_train_scaled.shape[1]

# Review the number of features
number_input_features

In [None]:
# Define the number of neurons in the output layer
number_output_neurons = 3

In [None]:
# Define the number of hidden nodes for the second hidden layer
hidden_nodes_layer2 = hidden_nodes_layer1 // 2

# Review the number hidden nodes in the second layer
hidden_nodes_layer2

In [None]:
# Create the Sequential model instance
nn = Sequential()

In [None]:
# Add the first hidden layer
nn.add(Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu"))

In [None]:
# Add the second hidden layer
nn.add(Dense(units=hidden_nodes_layer2, activation="softmax"))

In [None]:
# Add the output layer to the model specifying the number of output neurons and activation function
nn.add(Dense(units=number_output_neurons, activation="softmax"))

In [None]:
# Display the Sequential model summary
nn.summary()

In [None]:
# Compile the Sequential model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
fit_model = nn.fit(X_train_scaled, y_train, epochs = 50)

In [None]:
# Evaluate the model loss and accuracy metrics using the evaluate method and the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled, y_test)

# Display the model loss and accuracy results
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
# Set the model's file path
file_path = "AlphabetSoup.h5"

# Export your model to a HDF5 file
nn.save(file_path)

"""FIT TO OUR DATASET ONCE WE HAVE DATA READY"""

In [None]:
# Optimize the Model

In [None]:
# Define the the number of inputs (features) to the model
number_input_features = len(X_train.iloc[0])

# Review the number of features
number_input_features

In [None]:
# Define the number of neurons in the output layer
number_output_neurons_A1 = 3

In [None]:
# Define the number of hidden nodes for the first hidden layer
hidden_nodes_layer1_A1 = 20

# Review the number of hidden nodes in the first layer
hidden_nodes_layer1_A1

In [None]:
# Create the Sequential model instance
nn_A1 = Sequential()

In [None]:
# First hidden layer
nn_A1.add(Dense(units=hidden_nodes_layer1_A1, input_dim=number_input_features, activation="softmax"))

# Output layer
nn_A1.add(Dense(units= number_output_neurons_A1, activation="softmax"))

# Check the structure of the model
nn_A1.summary

In [None]:
# Compile the Sequential model
nn_A1.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
# Fit the model using 50 epochs and the training data
fit_model_A1 = nn.fit(X_train_scaled, y_train, epochs=50)

In [None]:
# Alternative Model 2

In [None]:
# Define the the number of inputs (features) to the model
number_input_features = len(X_train.iloc[0])

# Review the number of features
number_input_features

In [None]:
# Define the number of neurons in the output layer
number_output_neurons_A2 = 3

In [None]:
# Define the number of hidden nodes for the first hidden layer
hidden_nodes_layer1_A2 = 50

# Review the number of hidden nodes in the first layer
hidden_nodes_layer1_A2

In [None]:
# Create the Sequential model instance
nn_A2 = Sequential()

In [None]:
# First hidden layer
nn_A2.add(Dense(units=hidden_nodes_layer1_A2, input_dim=number_input_features, activation="softmax"))

# Output layer
nn_A2.add(Dense(units= number_output_neurons_A1, activation="softmax"))

# Check the structure of the model
nn_A2.summary


In [None]:
# Compile the Sequential model
nn_A2.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
# Fit the model
fit_model_A2 = nn.fit(X_train_scaled, y_train, epochs=50)

In [None]:
# Compare the results of the models

In [None]:
print("Original Model Results")

# Evaluate the model loss and accuracy metrics using the evaluate method and the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled, y_test)

# Display the model loss and accuracy results
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
print("Alternative Model 1 Results")

# Evaluate the model loss and accuracy metrics using the evaluate method and the test data
model_loss, model_accuracy = nn_A1.evaluate(X_test_scaled, y_test)

# Display the model loss and accuracy results
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
print("Alternative Model 2 Results")

# Evaluate the model loss and accuracy metrics using the evaluate method and the test data
model_loss, model_accuracy = nn_A2.evaluate(X_test_scaled, y_test)

# Display the model loss and accuracy results
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
# Save Results of Alternative Models

In [None]:
# Set the file path for the first alternative model
file_path_A1 = "AlternativeModel1.h5"

# Export your model to a HDF5 file
nn_A1.save(file_path_A1)

In [None]:
# Set the file path for the first alternative model
file_path_A2 = "AlternativeModel2.h5"

# Export your model to a HDF5 file
nn_A2.save(file_path_A2)