In [42]:
import pandas_datareader.data as web
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import numpy as np
from sklearn import linear_model
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import MinMaxScaler
from keras import models, layers, optimizers
import math
from pytrends.request import TrendReq
import pytrends

In [43]:
def stock_bot(stock_name, comp_name, verbose=0, epochs=2000):
    print(comp_name)
    # ---- Read stock data and Google trends data.
    start = datetime.today() - timedelta(370*5)
    end = datetime.today() - timedelta(1)
    df = web.DataReader(stock_name, 'iex', start, end)
    stock = df.loc[:,'open']
    num = len(stock)
    
    pytrends = TrendReq(hl='en-US', tz=300)
    frame = df.index[0] + ' ' + df.index[-1]
    pytrends.build_payload([comp_name], timeframe=frame, cat=0, geo='US')
    search = pytrends.interest_over_time()
    
    # ---- Rescale trends data to be between -.5 and .5
    trend_max = max(search.values[:,0])
    trend_min = min(search.values[:,0])
    trend_range = trend_max - trend_min
    trend_med = trend_min + (trend_range / 2)
    search_scaled = np.zeros((len(search.values)))
    search_scaled = (search.values[:, 0] - trend_med) / trend_range
    
    # ---- Format input features as (day, trend value)
    x = np.zeros((num, 2))
    ind_cur = 0
    trend_cur = search_scaled[ind_cur]
    for i in range(len(x)):
        x[i, 0] = (i / num) - .7
        if (ind_cur < len(search_scaled) and search.index[ind_cur].year == int(df.index[i][:4]) and search.index[ind_cur].month == int(df.index[i][5:7]) and search.index[ind_cur].day <= int(df.index[i][8:])+2 and search.index[ind_cur].day >= int(df.index[i][8:])-2):
            trend_cur = search_scaled[ind_cur]
            ind_cur += 1
        x[i, 1] = trend_cur
    
    # ---- Normalize stock data
    y = stock
    scaler = MinMaxScaler()
    y = np.array(stock).reshape(len(stock), 1)
    scaler = scaler.fit(y)
    y = scaler.transform(y)
    y = y.reshape(len(stock))
    
    # ---- Shuffle data
    shuffle_indices = np.random.permutation(np.arange(len(y)))
    y = y[shuffle_indices]
    x = x[shuffle_indices]
    
    # ---- Construct network
    network = models.Sequential()
    network.add(layers.Dense(256, activation='relu', input_shape=(2,)))
    network.add(layers.Dense(256, activation='relu'))
    network.add(layers.Dense(256, activation='relu'))
    network.add(layers.Dense(128, activation='relu'))
    network.add(layers.Dense(128, activation='relu'))
    network.add(layers.Dense(128, activation='relu'))
    network.add(layers.Dense(64, activation='relu'))
    network.add(layers.Dense(64, activation='relu'))
    network.add(layers.Dense(64, activation='relu'))
    network.add(layers.Dense(1))
    
    # ---- Compile and run network
    network.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_squared_error'])
    history = network.fit(x, y, epochs=epochs, batch_size=50, verbose=verbose)
    
    # ---- Build test data set
    xrange = 1400
    x_test = np.zeros((xrange, 2))
    ind_cur = 0
    trend_cur = search_scaled[ind_cur]
    for i in range(len(x_test)):
        x_test[i, 0] = (i / num) - .7
        if (ind_cur < len(search_scaled) and search.index[ind_cur].year == int(df.index[i][:4]) and search.index[ind_cur].month == int(df.index[i][5:7]) and search.index[ind_cur].day <= int(df.index[i][8:])+2 and search.index[ind_cur].day >= int(df.index[i][8:])-2):
            trend_cur = search_scaled[ind_cur]
            ind_cur += 1
        x_test[i, 1] = trend_cur
    y_pred = network.predict(x_test)
    y_pred = scaler.inverse_transform(y_pred)[:,0]
    
    return num, y_pred

In [44]:
companies_names = np.array(['Apple', 'Google', 'Microsoft', 'Amazon', 'Facebook', 'Berkshire Hathaway', 'Alibaba Group', 
                            'Johnson & Johnson', 'JPMorgan', 'ExxonMobil', 'Bank of America', 'Walmart', 'Wells Fargo', 
                            'Royal Dutch Shell', 'Visa', 'Procter & Gamble', 'Anheuser-Busch Inbev','AT&T', 
                            'Chevron Corporation', 'UnitedHealth Group', 'Pfizer', 'China Mobile', 'Home Depot', 'Intel', 
                            'Taiwan Semiconductor', 'Verizon Communications', 'Oracle Corporation', 'Citigroup',
                            'Novartis'])
companies_stocks = ['AAPL', 'GOOGL', 'MSFT', 'AMZN', 'FB', 'BRK.A', 'BABA', 'JNJ', 'JPM', 'XOM', 'BAC', 'WMT', 'WFC', 'RDS.A', 
                    'V', 'PG', 'BUD', 'T', 'CVX', 'UNH', 'PFE', 'CHL', 'HD', 'INTC', 'TSM', 'VZ', 'ORCL', 'C', 'NVS']
percent_returns = []

for i in range(len(companies_names)):
    num, prediction = stock_bot(companies_stocks[i], companies_names[i], epochs=10)
    percent_change = (prediction[num+20] - prediction[num])/prediction[num]
    percent_returns.append(percent_change)

sort_ind = np.argsort(percent_returns)
print('\n\n')
print(companies_names[sort_ind][-3:])

x = np.arange(len(companies_names))
plt.bar(x, percent_returns)
plt.xticks(x, companies_names, rotation=90)
plt.show()
plt.close()

Apple
Google
Microsoft
Amazon
Facebook
Berkshire Hathaway
Alibaba Group
Johnson & Johnson
JPMorgan
ExxonMobil
Bank of America
Walmart
Wells Fargo
Royal Dutch Shell
Visa
Procter & Gamble
Anheuser-Busch Inbev
AT&T
Chevron Corporation
UnitedHealth Group
Pfizer
China Mobile
Home Depot
Intel
Taiwan Semiconductor
Verizon Communications
Oracle Corporation


ConnectionError: HTTPSConnectionPool(host='trends.google.com', port=443): Max retries exceeded with url: / (Caused by NewConnectionError('<urllib3.connection.VerifiedHTTPSConnection object at 0x00000287428AC978>: Failed to establish a new connection: [WinError 10060] A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond',))