In [47]:
# For data manipulation
from os import set_inheritable
import pandas as pd

# To extract fundamental data
from bs4 import BeautifulSoup as bs
from pandas.core.tools import numeric
import requests
import yfinance as yf
import os
import datetime
import requests
import csv
from numpy import loadtxt
from keras.models import Sequential
from keras.layers import Dense
import keras
import numpy as np
import schedule
import time

In [48]:
### Data Collection

def fundamental_metric(soup, metric):

    return soup.find(text = metric).find_next(class_='snapshot-td2').text

def get_fundamental_data(df):
    for symbol in df.index:
        try:
            headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36'}
            url = ("http://finviz.com/quote.ashx?t=" + symbol.lower())
            r=requests.get(url,headers=headers)
            r.raise_for_status()
            try:
                soup = bs(r.content,'html.parser') 
            except:
                print('fail at parse')
            for m in df.columns:               
                df.loc[symbol,m] = fundamental_metric(soup,m)
        except:
            print(symbol, ' not found')             
    return df

def pull_wsb_data():
    try:
        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36'}
        url='https://stocks.comment.ai/trending.html'
        r=requests.get(url,headers=headers)
        r.raise_for_status()
        doc=bs(r.text,'html.parser')
        trs=doc.find_all('tr')
        tickers=[]
        sentiment=[]
        for tr in trs[1:]:
            tds=tr.find_all('td')
            td_ls=[]
            for td in tds:
                td_ls.append(td.text)
            try:
                sentiment.append(int(td_ls[0]))
            except:
                sentiment.append(None)
            
            try:
                tickers.append(str(td_ls[2]))
            except:
                tickers.append(None)
    except requests.ConnectionError as e:
        print("Unable to reach WSB trending data site",e )
    return tickers, sentiment

def first_pull():
    metric = ['P/B','P/E','Forward P/E','PEG','Debt/Eq','EPS (ttm)','Dividend %','ROE','ROI','EPS Q/Q','Insider Own']
    ret=pull_wsb_data()
    df = pd.DataFrame(index=ret[0],columns=metric)
    df = get_fundamental_data(df)
    df['Sentiment']=ret[1]
    global result
    result=df
    return result

def second_pull(df):
    index=list(df.index)
    per_change_ls=[]
    for ticker in index:
        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36'}
        url = ("http://finviz.com/quote.ashx?t=" + ticker.lower())
        r=requests.get(url,headers=headers)
        try:
            r.raise_for_status()
        except:
            print(ticker, 'request failed')
            per_change_ls.append(None)
            continue
        try:
            soup = bs(r.content,'html.parser') 
        except:
            print('fail at parse')
        m="Change"
        per_change_ls.append(str(fundamental_metric(soup,m)))
    df['Percent Change']=per_change_ls
    global result
    result=df
    return result

def get_current_price(ticker_obj):
    todays_data = ticker_obj.history(period='1d')
    return todays_data['Close'][0]

def store_data(df):
    folder='Stocks'
    if not os.path.exists(folder):
        os.mkdir(folder)
    for ticker in list(df.index):
        folder1=os.path.join(folder,ticker)
        if not os.path.exists(folder1):
            os.mkdir(folder1)
        new_data_file=os.path.join(folder1,'new_data.pkl')
        if not os.path.exists(new_data_file):
            f=open(new_data_file,'w')
            f.close()
            data=pd.DataFrame(df.loc[ticker]).T
            data.to_pickle(new_data_file)
        elif os.path.exists(new_data_file):
            data=pd.read_pickle(new_data_file)
            data=pd.concat([data,pd.DataFrame(df.loc[ticker]).T])
            data.to_pickle(new_data_file)

def clean_df(df):
    for ticker in list(df.index):
        try:
            df.loc[ticker,'Percent Change']=float(df.loc[ticker,'Percent Change'].replace('%',''))
        except: 
            continue
    df = df.apply(pd.to_numeric, errors='coerce')
    df=df.replace(np.nan,0)
    global result
    result=df
    return result
    
def store_data_long_term():
    stocks_folder=os.listdir('Stocks')
    for folder in stocks_folder:
        long_data_file=os.path.join('Stocks',folder,'long_data.pkl')
        new_data_file=os.path.join('Stocks',folder,'new_data.pkl')
        data=pd.read_pickle(new_data_file)
        if not os.path.exists(long_data_file):
            f=open(long_data_file,'w')
            f.close()
            data.to_pickle(long_data_file)
            os.remove(new_data_file)
        else:
            long_data=pd.read_pickle(long_data_file)
            for row in list(data.index):
                long_data=pd.concat([long_data, pd.DataFrame(data.loc[row])])
            long_data.to_pickle(long_data_file)
            os.remove(new_data_file)

def load_data(ticker):
    filename=(os.path.join('Stocks',ticker,'new_data.pkl'))
    data=pd.read_pickle(filename)
    return data

def create_prac_data(df):
    for i in range(7):
        store_data(df)

def store_daily_df(df):
    filename=os.join('Stocks','daily.pkl')
    f=open(filename,'w')
    f.close()
    df.to_pickle(filename)

def load_daily_df():
    filename=os.join('Stocks','daily.pkl')
    df=pd.read_pickle(filename)
    os.remove(filename)
    return df

def run_data_first():
    df=first_pull()
    store_daily_df(df)

def run_data_second():
    df=load_daily_df()
    df=second_pull(df)
    df=clean_df(df)
    store_data(df)


In [49]:
df=first_pull()
df=second_pull(df)
df=clean_df(df)

In [50]:
### KERAS Modeling

def init_model(): 
    model = Sequential()
    model.add(Dense(12, input_dim=12, activation='relu'))
    model.add(Dense(8, activation='relu'))
    model.add(Dense(1, activation='linear'))
    model.compile( optimizer='adam', loss='mean_squared_error')
    return model

def format_data(ticker):
    data=load_data(ticker)
    target_output_val=pd.DataFrame(data['Percent Change'])
    input_val=data.drop(columns=['Percent Change'])
    return input_val, target_output_val

def run_model(ticker):
    filename=os.path.join('Stocks',ticker,'model.h5')
    if os.path.exists(filename):
        model=load_model(ticker)
    else:
        model=init_model()
    input_val, target_output_val=format_data(ticker)
    model.fit(input_val,target_output_val,epochs=150,batch_size=1, verbose=1)
    store_model(model,ticker)
    return model

def store_model(model,ticker):
    filename=os.path.join('Stocks',ticker,'model.h5')
    if not os.path.exists(filename):
        f=open(filename,'w')
        f.close()
    model.save(filename)

def load_model(ticker):
    filename=os.path.join('Stocks',ticker,'model.h5')
    model=keras.models.load_model(filename)
    return model

def eval_model(ticker):
    model=load_model(ticker)
    input_val, target_output_val=format_data(ticker)
    _, accuracy = model.evaluate(input_val,target_output_val, verbose=0)
    return accuracy
    


In [51]:
count=1
stock_ls=list(os.listdir('Stocks'))
for ticker in stock_ls:
    start=datetime.datetime.now()
    print('Running model: ', ticker,' ',count,'/',len(stock_ls))
    try:
        run_model(ticker)
        count+=1
    except:
        print('Model failed: ', ticker)
        count+=1
        continue
    print('Time: ',datetime.datetime.now()-start)
    #print('Accuracy: ', eval_model(ticker))



Running model:  AAP   1 / 100
Time:  0:00:01.221553
Running model:  AAPL   2 / 100
Time:  0:00:01.305317
Running model:  AI   3 / 100
Time:  0:00:01.959777
Running model:  AMC   4 / 100
Time:  0:00:01.837582
Running model:  AMD   5 / 100
Time:  0:00:01.831011
Running model:  AMZN   6 / 100
Time:  0:00:01.840031
Running model:  ATVI   7 / 100
Time:  0:00:01.069660
Running model:  AUR   8 / 100
Time:  0:00:01.781613
Running model:  AXON   9 / 100
Time:  0:00:01.958777
Running model:  BA   10 / 100
Time:  0:00:03.258948
Running model:  BABA   11 / 100
Time:  0:00:05.570575
Running model:  BB   12 / 100
Time:  0:00:02.556901
Running model:  BBBY   13 / 100
Time:  0:00:02.625273
Running model:  BGFV   14 / 100
Time:  0:00:02.924382
Running model:  BITF   15 / 100
Time:  0:00:04.587432
Running model:  BODY   16 / 100
Time:  0:00:02.515743
Running model:  BROS   17 / 100
Time:  0:00:02.291065
Running model:  CC   18 / 100
Time:  0:00:02.682954
Running model:  CEI   19 / 100
Time:  0:00:02.473

In [53]:
for ticker in stock_ls:
    print(ticker, ' Accuracy: ', eval_model(ticker))

TypeError: cannot unpack non-iterable float object

In [None]:
load_data('AAPL')

Unnamed: 0,P/B,P/E,Forward P/E,PEG,Debt/Eq,EPS (ttm),Dividend %,ROE,ROI,EPS Q/Q,Insider Own,Sentiment,Percent Change
AAPL,39.43,26.93,26.28,1.75,1.99,5.61,0.0,0.0,0.0,0.0,0.0,7.0,0.67
AAPL,39.43,26.93,26.28,1.75,1.99,5.61,0.0,0.0,0.0,0.0,0.0,7.0,0.67
AAPL,39.43,26.93,26.28,1.75,1.99,5.61,0.0,0.0,0.0,0.0,0.0,7.0,0.67
AAPL,39.43,26.93,26.28,1.75,1.99,5.61,0.0,0.0,0.0,0.0,0.0,7.0,0.67
AAPL,39.43,26.93,26.28,1.75,1.99,5.61,0.0,0.0,0.0,0.0,0.0,7.0,0.67
AAPL,39.43,26.93,26.28,1.75,1.99,5.61,0.0,0.0,0.0,0.0,0.0,7.0,0.67
AAPL,39.43,26.93,26.28,1.75,1.99,5.61,0.0,0.0,0.0,0.0,0.0,7.0,0.67
