In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
import datetime, random
import numpy as np; np.random.seed(0)
import warnings
import itertools, os
from zipfile import ZipFile
from urllib.request import urlopen
from io import BytesIO
warnings.filterwarnings('ignore')



**Loading data** 


In [2]:
# Load locally
local = False

if not local:
    # this will download a copy from GitHub and save it locally
    URL = 'https://github.com/amunategui/amunategui.github.io/raw/master/all_stocks_5yr.zip'
    z = urlopen(URL)
    myzip = ZipFile(BytesIO(z.read())).extract('all_stocks_5yr.csv')
    stocks_df = pd.read_csv(myzip)
    
else:
    # read locally saved version
    stocks_df = pd.read_csv('all_stocks_5yr.csv')

In [3]:
stocks_df.head()

Unnamed: 0,date,open,high,low,close,volume,Name
0,2013-02-08,15.07,15.12,14.63,14.75,8407500,AAL
1,2013-02-11,14.89,15.01,14.26,14.46,8882000,AAL
2,2013-02-12,14.45,14.51,14.1,14.27,8126000,AAL
3,2013-02-13,14.3,14.94,14.25,14.66,10259500,AAL
4,2013-02-14,14.94,14.96,13.16,13.99,31879900,AAL


In [4]:
stocks_df.shape

(619040, 7)

**Preparing data**

In [5]:
stock_symbols=list(set(stocks_df["Name"]))
print(stock_symbols[0:10])


['LB', 'IRM', 'HAS', 'NFX', 'JPM', 'LLY', 'MCO', 'AMP', 'JEC', 'ADP']


In [6]:
# build dataset  
X = []
y = []
symbols = []
prediction_dates = []
last_market_dates = []

# rolling predictions
rolling_period = 5
predict_out_period = 5

for stock in stock_symbols:
    stock_data = stocks_df[stocks_df['Name']==stock].copy()
    
    for per in range(rolling_period, len(stock_data)-predict_out_period):
        X_tmp = []
        y_tmp = 0
        for rollper in range(per-rolling_period,per):
            # build the 'features'
            #get the log to normalize the prices
            X_tmp += [np.log(stock_data['close'].values[rollper])]

        X.append(np.array(X_tmp))
        # build 'labels'
        y.append(np.log(stock_data['close'].values[per + predict_out_period]))
        prediction_dates.append(stock_data['date'].values[per + predict_out_period])
        last_market_dates.append(stock_data['date'].values[per])
        symbols.append(stock)

 



In [7]:
stock_model_ready_df = pd.DataFrame(X)
stock_model_ready_df.columns = [str(f) for f in list(stock_model_ready_df)]
stock_model_ready_df.head()
stock_model_ready_df['outcome'] = y
stock_model_ready_df['date'] = prediction_dates
stock_model_ready_df['last_market_date'] = last_market_dates
stock_model_ready_df['symbol'] = symbols


# write a copy to file to use in our web application
stock_model_ready_df.to_csv('stock_market_historical_data.csv', index=None)

In [8]:
stock_model_ready_df.tail()


Unnamed: 0,0,1,2,3,4,outcome,date,last_market_date,symbol
613985,3.455054,3.470723,3.471966,3.483392,3.484312,3.517201,2018-02-01,2018-01-25,CSRA
613986,3.470723,3.471966,3.483392,3.484312,3.491038,3.476614,2018-02-02,2018-01-26,CSRA
613987,3.471966,3.483392,3.484312,3.491038,3.498627,3.432696,2018-02-05,2018-01-29,CSRA
613988,3.483392,3.484312,3.491038,3.498627,3.504355,3.419365,2018-02-06,2018-01-30,CSRA
613989,3.484312,3.491038,3.498627,3.504355,3.492256,3.445214,2018-02-07,2018-01-31,CSRA


In [9]:
# loop the amount of days we don't have an outome
stock_data_live = stocks_df[stocks_df['Name']=='AAPL'].copy()
no_outcome_id = len(stock_data_live)-predict_out_period
future_data = []
prediction_dates = []
last_market_dates = []

# shift everything out into 'no outcome' zone
for per in range(no_outcome_id+1, len(stock_data_live)+1): 
    X_tmp = []
    y_tmp = 0
    for rollper in range(per-rolling_period,per):
        X_tmp += [np.log(stock_data_live['close'].values[rollper])]
    
    future_data.append(np.array(X_tmp))
    last_market_dates.append(stock_data_live['date'].values[rollper])
    # create a business day offset for future dates to avoid weekends and holidays 
    prediction_dates.append(np.busday_offset(np.datetime64(stock_data_live['date'].values[rollper],'D'),predict_out_period, roll='forward'))

future_model_ready_df = pd.DataFrame(future_data)
future_model_ready_df.columns = [str(f) for f in list(future_model_ready_df)]

# outcome here is just a random placeholder, it won't be used
future_model_ready_df['outcome'] = -1
future_model_ready_df['date'] = prediction_dates
future_model_ready_df['last_market_date'] = last_market_dates
future_model_ready_df.head()

Unnamed: 0,0,1,2,3,4,outcome,date,last_market_date
0,5.144642,5.123726,5.117814,5.120565,5.122654,-1,2018-02-08,2018-02-01
1,5.123726,5.117814,5.120565,5.122654,5.078294,-1,2018-02-09,2018-02-02
2,5.117814,5.120565,5.122654,5.078294,5.052992,-1,2018-02-12,2018-02-05
3,5.120565,5.122654,5.078294,5.052992,5.093934,-1,2018-02-13,2018-02-06
4,5.122654,5.078294,5.052992,5.093934,5.072295,-1,2018-02-14,2018-02-07


**Building the model**

In [10]:
features=[ f for f in stock_model_ready_df.columns if f not in ['outcome']]
features_=[f  for f in stock_model_ready_df.columns if f not in ['date', 'last_market_date', 'symbol','outcome']]
label = 'outcome'

In [11]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import model_from_json
from sklearn.model_selection import train_test_split

# fix random seed for reproducibility
np.random.seed(7)

x_train, x_test, y_train, y_test = train_test_split(stock_model_ready_df[features], stock_model_ready_df[label], test_size=0.33, random_state=42 )
model= Sequential()
model.add(Dense(5,activation='relu',input_dim=5))
model.add(Dense(5,activation='relu'))
model.add(Dense(1, kernel_initializer='normal'))
model.compile(loss='mean_squared_error', optimizer='adam')

model.fit(x_train[features_],y_train, epochs=3, batch_size=5, verbose=1)



ModuleNotFoundError: No module named 'tensorflow'

In [None]:
scores = model.evaluate(x_test[features_], y_test, verbose=0)
print("%s: %.2f%%" % (model.metrics_names[0], scores*100))


In [None]:
# serialize model to JSON
model_json = model.to_json()
with open("model.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("model.h5")
print("Saved model to disk")

In [None]:
# deserialize the model
# load json and create model
json_file = open('model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights("model.h5")
print("Loaded model from disk")

In [None]:
# evaluate loaded model on test data
loaded_model.compile(loss='mean_squared_error', optimizer='adam')
score = loaded_model.evaluate(x_test[features_], y_test, verbose=0)
print("%s: %.2f%%" % (loaded_model.metrics_names[0], score*100))

In [None]:
# predictions have to be restored for processing
x_evaluate_tmp = stock_model_ready_df.copy()
x_evaluate_tmp = x_evaluate_tmp[x_evaluate_tmp['symbol'] == 'AAPL']

predictions = loaded_model.predict(x_evaluate_tmp[features_])
predictions

In [None]:
actuals = x_evaluate_tmp['outcome']
forecasts =  [ item for elem in predictions.tolist() for item in elem] 
print(actuals[0:10])

from sklearn.metrics import mean_squared_error
print('RMSE: (+/- off)', np.sqrt(mean_squared_error(list(actuals), forecasts)))

In [None]:
x_done = x_evaluate_tmp.copy()
x_done['forecast'] = forecasts

x_done = x_done.sort_values('date')
x_done = x_done.tail(50)
x_done['date'] = pd.to_datetime(x_done['date'])

fig, ax = plt.subplots(figsize=(15,8))
plt.suptitle('AAPL Test Forecast: ' + str(np.min(x_done['date']))[:10] + ' to '+ str(np.max(x_done['date']))[:10])
plt.plot(x_done['date'] , x_done['outcome'],  color='black', label='Actual')
plt.plot(x_done['date'] , x_done['forecast'],  color='blue', label='Forecast')
plt.legend()
plt.grid()
plt.xticks(rotation='vertical')
ax.xaxis_date()      
fig.autofmt_xdate()
plt.show()

Forecast on live data (basically the last x days where we don't have an outcome...)


In [None]:
# forecast on live data (basically the last x days where we don't have an outcome...)
predictions = loaded_model.predict(x_evaluate_tmp[features_])
forecasts =  [ item for elem in predictions.tolist() for item in elem] 

future_df_tmp = x_evaluate_tmp.copy()
future_df_tmp['forcast'] = np.exp(forecasts)
future_df_tmp['date'] = pd.to_datetime(future_df_tmp['date'])  

print(np.max(future_df_tmp['date']))
future_df_tmp = future_df_tmp.tail(10)

future_df_tmp = future_df_tmp.sort_values('date')

fig, ax = plt.subplots(figsize=(16, 8))
plt.suptitle('AAPL')
plt.plot(future_df_tmp['date'], future_df_tmp['forcast'], color='blue', label='Forecast')
plt.legend()
plt.grid()
plt.xticks(rotation='vertical')
plt.show()

In [None]:
actuals = list(future_df_tmp.tail(1).values[0])[0:predict_out_period-1]
# transform log price to price of past data
actuals = list(np.exp(actuals))
days_before = future_df_tmp['last_market_date'].values[-1]
days_before_list = []
for d in range(rolling_period-1):
    days_before_list.append(str(np.busday_offset(np.datetime64(days_before,'D'),-d, roll='backward')))
     
plt.plot(days_before_list, actuals, color='green', linewidth=4)
 
for d in range(1, predict_out_period+1):
    days_before_list.append(str(np.busday_offset(np.datetime64(days_before,'D'),d, roll='forward')))
    actuals.append(future_df_tmp['forcast'].values[-1])
     

plt.suptitle('Forcast for ' + str(future_df_tmp['date'].values[-1])[0:10] + ': $' + str(np.round(future_df_tmp['forcast'].values[-1],2)))
plt.plot(days_before_list, actuals, color='blue', linestyle='dashed')
plt.grid()
plt.xticks(days_before_list, days_before_list, rotation='vertical')

**Creating our local Flask web application**

In [None]:
stock_market_live_data = []
for stock in stock_symbols:
    stock_data = stocks_df[stocks_df['Name']==stock].copy()
    # get only the last rolling_period 
    stock_data = stock_data.tail(rolling_period)

    no_outcome_id = len(stock_data)-predict_out_period
    future_data = []
    future_dates = []
    last_market_dates = []
    # shift everything out into 'no outcome' zone
    for per in range(no_outcome_id+1, len(stock_data)+1): 
        X_tmp = []
        y_tmp = 0
        for rollper in range(per-rolling_period,per):
            X_tmp +=[ np.log(stock_data['close'].values[rollper])]

        future_data.append(np.array(X_tmp))
        last_market_dates.append(stock_data['date'].values[rollper])
        # create a business day offset for future dates to avoid weekends and holidays 
        future_date = np.busday_offset(np.datetime64(stock_data['date'].values[rollper],'D'),predict_out_period, roll='forward')
    
    stock_market_live_data.append(X_tmp + [-1] + [str(future_date)] + [stock_data['date'].values[rollper]] + [stock])
   
stock_market_live_data = pd.DataFrame(stock_market_live_data)
stock_market_historical_data = pd.read_csv('stock_market_historical_data.csv')
stock_market_live_data.columns = list(stock_market_historical_data)
# write a copy to file to use in our web application
stock_market_live_data.to_csv('stock_market_live_data.csv', index=None)

In [None]:
stock_market_live_data.head()

In [None]:
import os

ROLLING_PERIOD = 5
PREDICT_OUT_PERIOD = 5
BASE_DIR = ''
FEATURES = [str(id) for id in range(0,ROLLING_PERIOD)]

stock_market_historical_data = None
stock_market_live_data = None
predict_fn = None
saved_regression_model_path ='model.json'
saved_model_weights_path = "model.h5"



def prepare_data():
    global stock_market_historical_data, stock_market_live_data, predict_fn
    
    # deserialize the model
    json_file = open(saved_regression_model_path, 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    predict_fn = model_from_json(loaded_model_json)
    # load weights into new model
    predict_fn.load_weights(saved_model_weights_path)
      
    stock_market_historical_data = pd.read_csv(os.path.join(BASE_DIR, 'stock_market_historical_data.csv'))
    stock_market_live_data = pd.read_csv(os.path.join(BASE_DIR, 'stock_market_live_data.csv'))

    
prepare_data()
stock_market_live_data.head()

In [None]:
def get_stock_prediction(symbol):
 
    temp_df = stock_market_live_data[stock_market_live_data['symbol']==symbol]
    print(temp_df)
    # forecast on live data (basically the last x days where we don't have an outcome...)
    predictions = predict_fn.predict(temp_df[features_])
    forecasts =  [ item for elem in predictions.tolist() for item in elem] 
 
    return(forecasts)
 

# run a prediction test
predictions = get_stock_prediction('DUK')
np.exp(predictions)

In [None]:
# create a plot to display
def get_plot_prediction(symbol):
    
    predictions = get_stock_prediction(symbol)
    
    if (len(predictions) > 0):
        temp_df = stock_market_live_data[stock_market_live_data['symbol']==symbol]

        actuals = list(temp_df[FEATURES].values[0])
        # transform log price to price of past data
        actuals = list(np.exp(actuals))
        
        days_before = temp_df['last_market_date'].values[-1]
        days_before_list = []
        for d in range(ROLLING_PERIOD):
            days_before_list.append(str(np.busday_offset(np.datetime64(days_before,'D'),-d, roll='backward')))
        
        fig, ax = plt.subplots(figsize=(8,3))
        plt.plot(days_before_list, actuals, color='green', linewidth=4)

        for d in range(1, PREDICT_OUT_PERIOD+1):
            days_before_list.append(str(np.busday_offset(np.datetime64(days_before,'D'),d, roll='forward')))
            actuals.append(np.exp(predictions[-1]))

        plt.suptitle('Forecast for ' + str(temp_df['date'].values[-1])[0:10] + ': $' + 
                     str(np.round(np.exp(predictions[-1]),2)))
        
        plt.plot(days_before_list, actuals, color='blue', linestyle='dashed')
        plt.grid()
        plt.xticks(days_before_list, days_before_list, rotation='vertical')
        plt.show()

get_plot_prediction('DUK') 

In [None]:
# load nasdaq corollary material
stock_company_info_amex = None
stock_company_info_nasdaq = None
stock_company_info_nyse = None

def load_fundamental_company_info():
    global stock_company_info_amex, stock_company_info_nasdaq, stock_company_info_nyse
    import pandas as pd
    stock_company_info_amex = pd.read_csv('stock_company_info_amex.csv')
    
    stock_company_info_nasdaq = pd.read_csv('stock_company_info_nasdaq.csv')
    
    stock_company_info_nyse = pd.read_csv('stock_company_info_nyse.csv')
    
load_fundamental_company_info()

In [None]:
def get_fundamental_information(symbol):
    CompanyName = "No company name"
    Sector = "No sector"
    Industry = "No industry"
    MarketCap = "No market cap"
    Exchange = 'No exchange'
    
    if (symbol in list(stock_company_info_nasdaq['Symbol'])):
        data_row = stock_company_info_nasdaq[stock_company_info_nasdaq['Symbol'] == symbol]  
        CompanyName = data_row['Name'].values[0]
        Sector = data_row['Sector'].values[0]
        Industry = data_row['Industry'].values[0]
        MarketCap = data_row['MarketCap'].values[0]
        Exchange = 'NASDAQ'
        
    elif (symbol in list(stock_company_info_amex['Symbol'])):
        data_row = stock_company_info_amex[stock_company_info_amex['Symbol'] == symbol]  
        CompanyName = data_row['Name'].values[0]
        Sector = data_row['Sector'].values[0]
        Industry = data_row['Industry'].values[0]
        MarketCap = data_row['MarketCap'].values[0]
        Exchange = 'AMEX'
 
    elif (symbol in list(stock_company_info_nyse['Symbol'])):
        data_row = stock_company_info_nyse[stock_company_info_nyse['Symbol'] == symbol]  
        CompanyName = data_row['Name'].values[0]
        Sector = data_row['Sector'].values[0]
        Industry = data_row['Industry'].values[0]
        MarketCap = data_row['MarketCap'].values[0]
        Exchange = 'NYSE'
 
    return (CompanyName, Sector, Industry, MarketCap, Exchange)

fundamentals = get_fundamental_information('AAPL')
fundamentals

In [None]:
pip install wikipedia

In [None]:

def get_wikipedia_intro(symbol):
    import wikipedia
    company_name = get_fundamental_information(symbol)[0]
    description = wikipedia.page(company_name).content
    return(description.split('\n')[0])

get_wikipedia_intro('DUK')