# Dependencies

In [93]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.preprocessing import MinMaxScaler
from __future__ import print_function
import keras
from keras.datasets import cifar10
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
import tensorflow as tf
import matplotlib.pyplot as plt

# Creating/Cleaning Training Data

In [94]:
# pd.set_option("display.max_rows", None, "display.max_columns", None)
training_df = pd.read_csv('training_data.csv', index_col=0)
training_df['Buy'] = ""
buy_classification = ['buy', 'outperform', 'positive'] 

for index, row in training_df.iterrows():
    ticker = row['Ticker'].lower()
    analyst_suggestions = pd.read_csv('../Data_Collection/Scrapers/Final_Analyst_Rating.csv', index_col=0)
    for inner_index, inner_row in analyst_suggestions.iterrows():
        if analyst_suggestions.at[inner_index, 'Ticker'].lower() == ticker:
            if analyst_suggestions.at[inner_index, 'Rating'].lower() in buy_classification:
                training_df.at[index, 'Buy'] = 1
            else:
                training_df.at[index, 'Buy'] = 0

# Scaling the data
Scaler = MinMaxScaler()
cols = [x for x in training_df.columns if x not in ['Buy', 'Ticker']]
training_df[cols] = Scaler.fit_transform(training_df[cols])

##### TEMPORARY ######
cols_to_change = [x for x in training_df.columns if x not in ['Buy', 'Ticker']]
for col in cols_to_change:
    training_df.fillna(value=training_df[col].mean(), inplace=True)
######################

# training_df.drop(['payoutRatio', 'regularMarketVolume', 'sharesShort', 'mostRecentQuarter', 'forwardEps'
#                  ,'bookValue', 'sharesShort', 'sharesPercentSharesOut', 'mostRecentQuarter', 'nextFiscalYearEnd'
#                  , 'enterpriseValue', 'sharesShortPriorMonth', 'shortPercentOfFloat', 'earningsQuarterlyGrowth'],
#                  axis=1, inplace=True)
training_df

Unnamed: 0,Ticker,Conversations,Headlines,payoutRatio,beta,regularMarketVolume,profitMargins,52WeekChange,forwardEps,bookValue,...,heldPercentInsiders,mostRecentQuarter,nextFiscalYearEnd,shortRatio,enterpriseValue,earningsQuarterlyGrowth,sharesShortPriorMonth,shortPercentOfFloat,pegRatio,Buy
0,AAPL,0.641172,0.741175,0.080603,0.500690,0.843714,0.269652,0.021798,0.078714,0.048720,...,0.001245,0.830986,0.298734,0.074169,1.000000,0.002477,0.920416,0.002408,0.017440,1
1,AMC,0.981364,0.939011,0.000000,0.516828,1.000000,0.236303,0.235874,0.012879,0.039449,...,0.005729,0.849765,0.541772,0.056266,0.098521,0.380623,0.759281,0.529500,0.017174,0
2,TSLA,0.469102,0.655823,0.000000,0.616606,0.167755,0.243252,0.036696,0.092108,0.078593,...,0.337336,0.849765,0.541772,0.090367,0.340473,0.016949,0.299452,0.086093,0.017698,1
3,NVDA,0.397425,0.758636,0.028268,0.522890,0.239261,0.279415,0.041997,0.067999,0.055150,...,0.072552,1.000000,0.620253,0.051151,0.289678,0.005492,0.265999,0.019266,0.017475,1
5,AMD,0.438584,0.711404,0.000000,0.623979,0.518347,0.270656,0.025647,0.051515,0.051441,...,0.007650,0.830986,0.529114,0.041773,0.135216,0.006619,0.828714,0.161650,0.017467,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
137,IBM,0.000000,0.740272,0.547961,0.504191,0.012577,0.246010,0.380623,0.144447,0.076816,...,0.002348,0.422535,0.541772,0.549872,0.150407,0.000688,0.229367,0.071945,0.017331,0
138,HON,0.454357,0.625031,0.276551,0.496705,0.006080,0.255209,0.380623,0.113950,0.079559,...,0.002099,0.422535,0.541772,0.166240,0.145114,0.000832,0.037687,0.010536,0.017624,0
139,MCD,0.328349,0.470448,0.364369,0.413112,0.014648,0.271388,0.380623,0.118896,0.029698,...,0.000907,0.422535,0.541772,0.167945,0.167329,0.001609,0.046995,0.008730,0.017419,0
140,RTX,0.057699,0.580205,0.565851,0.478572,0.021366,0.240979,0.380623,0.380623,0.380623,...,0.002046,0.849765,0.541772,0.218244,0.144836,0.380623,0.114644,0.013546,0.017349,1


In [95]:
X_total = training_df[[x for x in training_df.columns if x not in ['Buy', 'Ticker']]]
y_total = training_df['Buy']
X_train, X_test, y_train, y_test = train_test_split(X_total, y_total, test_size=0.33, random_state=42)

In [96]:
def create_baseline():
    # create model
    model = Sequential()
    model.add(Dense(60, input_dim=22, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [99]:
from pandas import read_csv
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold

# evaluate model with standardized dataset
estimator = KerasClassifier(build_fn=create_baseline, epochs=100, batch_size=5, verbose=0)
kfold = StratifiedKFold(n_splits=10, shuffle=True)
results = cross_val_score(estimator, X_total, y_total, cv=kfold)
print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Baseline: 50.44% (13.85%)
