In [2]:
import numpy as np 
import pandas as pd 
import pandas_datareader as dr
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error,accuracy_score
from sklearn.tree import DecisionTreeRegressor,DecisionTreeClassifier
import matplotlib.pyplot as plt
import os
print("done")

done


In [3]:
table=pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
symbols = table[0]

In [12]:
def GAP_LAP(X,windows=[1,2,5,10]):
    for window in windows:
        X['GapUp_'+str(window)] = (X['Open'].shift(-window).fillna(value = 0) > X['High']).astype(int)
        X['LapUp_'+str(window)] = (X['Open'].shift(-window).fillna(value = 0) > X['Adj_Close']).astype(int)
        X['GapUp_Sum_'+str(window)] = (X['GapUp_'+str(window)].rolling(5).aggregate(lambda x : x.sum()))
        X['LapUp_Sum_'+str(window)] = (X['LapUp_'+str(window)].rolling(5).aggregate(lambda x : x.sum()))
    return X


def retrieve_data(SYMBOL,start,stop,window_days):
    #Load & Select Data 
    stock_data = dr.DataReader(SYMBOL , 'yahoo',start=start,end=stop)
 
     
    stock_data = stock_data.fillna(0)
    stock_data = stock_data[['Open', 'High', 'Low', 'Close','Adj Close', 'Volume']]
    stock_data.columns = ['Open', 'High', 'Low', 'Close','Adj_Close', 'Volume']
    columns = stock_data.columns.tolist()
    columns.remove('Adj_Close')
    stock_data = GAP_LAP(stock_data)
    for col in columns:
        for window in [2,5,10,15,20]:
                stock_data['Δ'+col+'_'+str(window)] = pd.DataFrame(np.where(stock_data[col].diff(periods = window).fillna(value=0) >= 0,1,-1)).rolling(20).aggregate(lambda x : x.sum())

    
    stock_data['HL_Range'] = stock_data['High'] - stock_data['Low']
    stock_data['HO_Range'] = stock_data['High'] - stock_data['Open'] 
    stock_data['OL_Range'] = stock_data['Open'] - stock_data['Low'] 
    stock_data['HO/OL_Range'] =  stock_data['HO_Range']/stock_data['OL_Range']
        
    Y = pd.DataFrame(stock_data['Adj_Close'].diff(periods = window_days).values,columns=[SYMBOL]).shift(-window_days)

    stock_data.columns = [x+'_'+SYMBOL for x in list(stock_data.columns)]
    
    stock_data.drop(['Adj_Close_'+SYMBOL],axis=1, inplace=True)
    Y[SYMBOL].reset_index(drop =True, inplace =True)
    stock_data.reset_index(drop =True, inplace =True)
    
    df_out_x = stock_data.replace(to_replace=[np.NaN,np.inf,-np.inf],value = 0)
    
    df_out_y = pd.DataFrame(Y).replace(to_replace=[np.NaN,np.inf,-np.inf],value = 0)

  
    return df_out_x, df_out_y 
def categorical(pred,real):
    pred = np.where(pred > 0 ,1,0)
    real = np.where(real > 0 ,1,0)
    return accuracy_score(real,pred)

In [13]:
# Load Data
start = '2017-10-02'
stop  = '2020-04-18'
shift_window_of_days = 10

categorical_acc = []
ticker = []
prediction_in_TIME = []
today_price = []
for unique_sub_industry in symbols['GICS Sub Industry'].unique().tolist():
    print('---------------------------------  '+unique_sub_industry+'  ---------------------------------')
    
    sub_companies = symbols.loc[(symbols['GICS Sub Industry'] == unique_sub_industry),'Symbol'].tolist()
    for index,company in enumerate(sub_companies):
        _,target = retrieve_data(company,start,stop,shift_window_of_days)

#             production_test = target[-shift_window_of_days:]
#             production_features = _[-shift_window_of_days:]
        today_price.append(dr.DataReader(company , 'yahoo','2020-04-17',)['Adj Close'])
        _ = _[:-shift_window_of_days]
        target = target[:-shift_window_of_days]
        Y_train,Y_test = train_test_split(target,test_size=0.5,shuffle=False)
        
        if index == 0:
            train_1 = pd.DataFrame()
            for unique_symbol in sub_companies:
                try:
                    train_2,y = retrieve_data(unique_symbol,start,stop,shift_window_of_days)
                    train_2 = train_2
                    train_1 = pd.concat((train_1,train_2),axis=1)
                except:
                    pass
                
            production_features = train_1[-shift_window_of_days:]
            X_train,X_test = train_test_split(train_1[:-shift_window_of_days],test_size=0.5,shuffle=False)
            
        dct = DecisionTreeRegressor()
        try:
            dct.fit(pd.DataFrame(X_train).replace(to_replace=[np.NaN,np.inf,-np.inf],value = 0).values,
                    pd.DataFrame(Y_train).replace(to_replace=[np.NaN,np.inf,-np.inf],value = 0).values.flatten())
            predictions = dct.predict(pd.DataFrame(X_test).replace(to_replace=[np.NaN,np.inf,-np.inf],value = 0).values)

            print('Mae'+' : '+company)
            print(mean_absolute_error(Y_test,predictions))
            print('Cat'+' : '+company)
            print(categorical(predictions,Y_test))  

            categorical_acc.append(categorical(predictions,Y_test))
            ticker.append(company)
            prediction_in_TIME.append(dct.predict(pd.DataFrame(production_features).replace(to_replace=[np.NaN,np.inf,-np.inf],value = 0))[-1])
        except:
            pass

---------------------------------  Industrial Conglomerates  ---------------------------------
Mae : MMM
5.088988337441096
Cat : MMM
0.873015873015873
Mae : GE
1.0152152167426216
Cat : GE
0.9015873015873016
Mae : HON
4.853606184702071
Cat : HON
0.9206349206349206
Mae : ROP
12.672506956070189
Cat : ROP
0.8952380952380953
---------------------------------  Health Care Equipment  ---------------------------------
Mae : ABT
2.6858257717556424
Cat : ABT
0.8539682539682539
Mae : ABMD
36.28876444498698
Cat : ABMD
0.834920634920635
Mae : A
2.468004814026848
Cat : A
0.8920634920634921
Mae : BAX
2.377502211313399
Cat : BAX
0.8603174603174604
Mae : BDX
9.573143756200396
Cat : BDX
0.8158730158730159
Mae : BSX
1.5950477024865528
Cat : BSX
0.8984126984126984
Mae : DHR
3.504499780564081
Cat : DHR
0.8571428571428571
Mae : EW
11.663208467998201
Cat : EW
0.8634920634920635
Mae : HOLX
2.5315556480771018
Cat : HOLX
0.8444444444444444
Mae : IDXX
10.490477062406994
Cat : IDXX
0.7904761904761904
Mae : ISRG
2

Mae : ADS
9.936131825522772
Cat : ADS
0.8857142857142857
Mae : BR
6.011090644957528
Cat : BR
0.9079365079365079
Mae : FIS
3.2428476242792037
Cat : FIS
0.8126984126984127
Mae : FISV
3.422205522325304
Cat : FISV
0.8380952380952381
Mae : FLT
9.33396708170573
Cat : FLT
0.8857142857142857
Mae : GPN
5.086545853387742
Cat : GPN
0.8666666666666667
Mae : JKHY
3.9111835055881077
Cat : JKHY
0.8158730158730159
Mae : MA
8.892822992234002
Cat : MA
0.8666666666666667
Mae : PAYX
2.4746551876976377
Cat : PAYX
0.7047619047619048
Mae : PYPL
3.814127483065166
Cat : PYPL
0.8571428571428571
Mae : V
4.511268979027157
Cat : V
0.8158730158730159
Mae : WU
0.5453277830093626
Cat : WU
0.8603174603174604
---------------------------------  Electric Utilities  ---------------------------------
Mae : LNT
1.4958539508637927
Cat : LNT
0.726984126984127
Mae : AEP
2.3997083512563555
Cat : AEP
0.5619047619047619
Mae : ED
2.601707470606244
Cat : ED
0.8158730158730159
Mae : D
2.2330545576791914
Cat : D
0.7523809523809524
Ma

Mae : AVB
8.802139475988964
Cat : AVB
0.45396825396825397
Mae : EQR
2.2090437631758433
Cat : EQR
0.7587301587301587
Mae : ESS
11.993003917875743
Cat : ESS
0.43492063492063493
Mae : MAA
4.9666123647538445
Cat : MAA
0.6476190476190476
Mae : UDR
1.1986727517748637
Cat : UDR
0.8571428571428571
---------------------------------  Technology Hardware, Storage & Peripherals  ---------------------------------
Mae : AAPL
6.9204777551075765
Cat : AAPL
0.9015873015873016
Mae : HPE
1.0043810677906824
Cat : HPE
0.8666666666666667
Mae : HPQ
0.9784094553145152
Cat : HPQ
0.8984126984126984
Mae : NTAP
2.8665907118055554
Cat : NTAP
0.8920634920634921
Mae : STX
2.3425113617427766
Cat : STX
0.8571428571428571
Mae : WDC
3.4106031750875805
Cat : WDC
0.9047619047619048
Mae : XRX
1.492334977407304
Cat : XRX
0.8825396825396825
---------------------------------  Semiconductor Equipment  ---------------------------------
Mae : AMAT
2.6125021677168587
Cat : AMAT
0.8031746031746032
Mae : KLAC
5.514648570711651
Cat 

KeyError: 'Date'

In [None]:
Predictions_Certainty = pd.DataFrame([*zip(ticker,categorical_acc,np.array(prediction_in_TIME).flatten()/np.array(today_price[:-1]).flatten())],columns= ['Symbols','Confidence',
                                                                                          'Prediction'])

In [None]:
for index,confidence_level in enumerate(Predictions_Certainty['Confidence'].tolist()):
    if confidence_level > 1:
        Predictions_Certainty.loc[index,'Confidence'] = .99

In [None]:
plt.scatter(np.arange(len(Predictions_Certainty['Confidence'])),Predictions_Certainty['Confidence'])

In [None]:
Predictions_Certainty.sort_values(by=['Prediction'], inplace=True,ascending=False)
positive_growth = Predictions_Certainty.loc[Predictions_Certainty['Prediction'] > 0 ,:]
if len(positive_growth) > 10:
    positive_growth = positive_growth[:10]
    
Predictions_Certainty.sort_values(by=['Prediction'], inplace=True,ascending=True)
negative_growth = Predictions_Certainty.loc[Predictions_Certainty['Prediction'] < 0 ,:]
if len(negative_growth) > 10:
    negative_growth = negative_growth[:10]

In [None]:
Stock_Changes = pd.concat((negative_growth,positive_growth),axis=0)

In [None]:
import time
import smtplib, ssl
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart

sender_email = "eladwarshawsky@gmail.com"
receiver_email = "eladwarshawsky@gmail.com"
# receiver_email = "ron@memfix.com"
password = '!Elad2004'

message = MIMEMultipart("alternative")
message["Subject"] = "Lowest and Highest Predictions"
message["From"] = sender_email
message["To"] = receiver_email


# Turn these into plain/html MIMEText objects
part1 = MIMEText(Stock_Changes, "plain")


# Add HTML/plain-text parts to MIMEMultipart message
# The email client will try to render the last part first
body="Date: %s\n\nYour daily log is attached."%(time.strftime("%m/%d/%Y"))
message.attach(body)
message.attach(part1)


# Create secure connection with server and send email
context = ssl.create_default_context()
with smtplib.SMTP_SSL("eladwarshawsky@gmail.com", 465, context=context) as server:
    server.login(sender_email, password)
    server.sendmail(
        sender_email, receiver_email, message.as_string()
    )