In [None]:
import numpy as np
import pandas as pd 
import pandas_datareader as dr
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error,accuracy_score
from sklearn.tree import DecisionTreeRegressor,DecisionTreeClassifier
import matplotlib.pyplot as plt


import os

In [None]:
table=pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
symbols = table[0]

In [3]:
def GAP_LAP(X,windows=[1,2,5,10]):
    for window in windows:
        X['GapUp_'+str(window)] = (X['Open'].shift(-window).fillna(value = 0) > X['High']).astype(int)
        X['LapUp_'+str(window)] = (X['Open'].shift(-window).fillna(value = 0) > X['Adj_Close']).astype(int)
        X['GapUp_Sum_'+str(window)] = (X['GapUp_'+str(window)].rolling(5).aggregate(lambda x : x.sum()))
        X['LapUp_Sum_'+str(window)] = (X['LapUp_'+str(window)].rolling(5).aggregate(lambda x : x.sum()))
    return X

def comparitive_stds(X,windows = [10]):
    for window in windows:
        base = X['Open', 'High', 'Low'].mean(axis=0)
        local_means = base.rolling(window).mean()[::window].tolist()
        for mean in local_means:
            local_groups = np.array_split(base,window)

def retrieve_data(SYMBOL,start,stop,window_days):
    #Load & Select Data 
    stock_data = dr.DataReader(SYMBOL , 'yahoo',start=start,end=stop)
 
     
    stock_data = stock_data.fillna(0)
    stock_data = stock_data[['Open', 'High', 'Low', 'Close','Adj Close', 'Volume']]
    stock_data.columns = ['Open', 'High', 'Low', 'Close','Adj_Close', 'Volume']
    columns = stock_data.columns.tolist()
    columns.remove('Adj_Close')
    stock_data = GAP_LAP(stock_data)
    for col in columns:
        for window in [2,5,10,15,20]:
                stock_data['Δ'+col+'_'+str(window)] = pd.DataFrame(np.where(stock_data[col].diff(periods = window).fillna(value=0) >= 0,1,-1)).rolling(20).aggregate(lambda x : x.sum())

    
    stock_data['HL_Range'] = stock_data['High'] - stock_data['Low']
    stock_data['HO_Range'] = stock_data['High'] - stock_data['Open'] 
    stock_data['OL_Range'] = stock_data['Open'] - stock_data['Low'] 
    stock_data['HO/OL_Range'] =  stock_data['HO_Range']/stock_data['OL_Range']
        
    Y = pd.DataFrame(stock_data['Adj_Close'].diff(periods = window_days).values,columns=[SYMBOL]).shift(-window_days)

    stock_data.columns = [x+'_'+SYMBOL for x in list(stock_data.columns)]
    
    stock_data.drop(['Adj_Close_'+SYMBOL],axis=1, inplace=True)
    Y[SYMBOL].reset_index(drop =True, inplace =True)
    stock_data.reset_index(drop =True, inplace =True)
    
    df_out_x = stock_data.replace(to_replace=[np.NaN,np.inf,-np.inf],value = 0)
    
    df_out_y = pd.DataFrame(Y).replace(to_replace=[np.NaN,np.inf,-np.inf],value = 0)

  
    return df_out_x, df_out_y 
def categorical(pred,real):
    pred = np.where(pred > 0 ,1,0)
    real = np.where(real > 0 ,1,0)
    return accuracy_score(real,pred)

In [4]:
# Load Data
start = '2017-10-02'
stop  = '2020-04-17'
shift_window_of_days = 10

categorical_acc = []
ticker = []
prediction_in_TIME = []
today_price = []
for unique_sub_industry in symbols['GICS Sub Industry'].unique().tolist():
    print('---------------------------------  '+unique_sub_industry+'  ---------------------------------')
    
    sub_companies = symbols.loc[(symbols['GICS Sub Industry'] == unique_sub_industry),'Symbol'].tolist()
    for index,company in enumerate(sub_companies):
        _,target = retrieve_data(company,start,stop,shift_window_of_days)

#             production_test = target[-shift_window_of_days:]
#             production_features = _[-shift_window_of_days:]
        today_price.append(dr.DataReader(company , 'yahoo','2020-04-17',)['Adj Close'])
        _ = _[:-shift_window_of_days]
        target = target[:-shift_window_of_days]
        Y_train,Y_test = train_test_split(target,test_size=0.5,shuffle=False)
        
        if index == 0:
            train_1 = pd.DataFrame()
            for unique_symbol in sub_companies:
                try:
                    train_2,y = retrieve_data(unique_symbol,start,stop,shift_window_of_days)
                    train_2 = train_2
                    train_1 = pd.concat((train_1,train_2),axis=1)
                except:
                    pass
                
            production_features = train_1[-shift_window_of_days:]
            X_train,X_test = train_test_split(train_1[:-shift_window_of_days],test_size=0.5,shuffle=False)
            
        dct = DecisionTreeRegressor()
        dct.fit(pd.DataFrame(X_train).replace(to_replace=[np.NaN,np.inf,-np.inf],value = 0).values,
                pd.DataFrame(Y_train).replace(to_replace=[np.NaN,np.inf,-np.inf],value = 0).values.flatten())
        predictions = dct.predict(pd.DataFrame(X_test).replace(to_replace=[np.NaN,np.inf,-np.inf],value = 0).values)

        print('Mae'+' : '+company)
        print(mean_absolute_error(Y_test,predictions))
        print('Cat'+' : '+company)
        print(categorical(predictions,Y_test))  

        categorical_acc.append(categorical(predictions,Y_test))
        ticker.append(company)
        prediction_in_TIME.append(dct.predict(pd.DataFrame(production_features).replace(to_replace=[np.NaN,np.inf,-np.inf],value = 0))[-1])

---------------------------------  Industrial Conglomerates  ---------------------------------
Mae : MMM
6.54823492140997
Cat : MMM
0.8063492063492064
Mae : GE
1.015521290188744
Cat : GE
0.9111111111111111
Mae : HON
5.041900271461124
Cat : HON
0.9301587301587302
Mae : ROP
11.434687296549479
Cat : ROP
0.8984126984126984
---------------------------------  Health Care Equipment  ---------------------------------
Mae : ABT
2.03347004885396
Cat : ABT
0.8539682539682539
Mae : ABMD
34.992480226547
Cat : ABMD
0.8253968253968254
Mae : A
2.5990140036931115
Cat : A
0.8825396825396825
Mae : BAX
2.4000323462107827
Cat : BAX
0.8603174603174604
Mae : BDX
10.01025647360181
Cat : BDX
0.819047619047619
Mae : BSX
1.6214603545173767
Cat : BSX
0.9206349206349206
Mae : DHR
3.4015072171650234
Cat : DHR
0.8857142857142857
Mae : EW
12.595305572994171
Cat : EW
0.8634920634920635
Mae : HOLX
2.5490794348338293
Cat : HOLX
0.8380952380952381
Mae : IDXX
9.950794764927455
Cat : IDXX
0.780952380952381
Mae : ISRG
23.88

ValueError: Number of labels=5 does not match number of samples=315

In [None]:
Predictions_Certainty = pd.DataFrame([*zip(ticker,categorical_acc,np.array(prediction_in_TIME).flatten()/np.array(today_price[:-1]).flatten())],columns= ['Symbols','Confidence',
                                                                                          'Prediction'])

In [None]:
for index,confidence_level in enumerate(Predictions_Certainty['Confidence'].tolist()):
    if confidence_level > 1:
        Predictions_Certainty.loc[index,'Confidence'] = .99

In [None]:
plt.scatter(np.arange(len(Predictions_Certainty['Confidence'])),Predictions_Certainty['Confidence'])

In [None]:
Predictions_Certainty.sort_values(by=['Prediction'], inplace=True,ascending=False)
positive_growth = Predictions_Certainty.loc[Predictions_Certainty['Prediction'] > 0 ,:]
if len(positive_growth) > 10:
    positive_growth = positive_growth[:10]
    
Predictions_Certainty.sort_values(by=['Prediction'], inplace=True,ascending=True)
negative_growth = Predictions_Certainty.loc[Predictions_Certainty['Prediction'] < 0 ,:]
if len(negative_growth) > 10:
    negative_growth = negative_growth[:10]

In [None]:
pd.concat((negative_growth,positive_growth),axis=0)

In [None]:
import smtplib, ssl
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart

sender_email = "eladwarshawsky@gmail.com"
receiver_email = "ron@memfix.com"
password = '!Elad2004'

message = MIMEMultipart("alternative")
message["Subject"] = "Top Ten Lowest and Highest Predictions"
message["From"] = sender_email
message["To"] = receiver_email

html = growths

# Turn these into plain/html MIMEText objects
part1 = MIMEText(html, "html")


# Add HTML/plain-text parts to MIMEMultipart message
# The email client will try to render the last part first
message.attach(part1)

# Create secure connection with server and send email
context = ssl.create_default_context()
with smtplib.SMTP_SSL("eladwarshawsky@gmail.com", 465, context=context) as server:
    server.login(sender_email, password)
    server.sendmail(
        sender_email, receiver_email, message.as_string()
    )