In [98]:
import pandas as pd
import yfinance as yf
import numpy as np
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm
from matplotlib import pyplot as plt
import seaborn as sns
import os
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split

importpath = r'C:\Users\nmur1\Google Drive\Springboard\Capstone2\Stock Import Lists'
importfile = 'SandP.csv'
exportpath = r'C:\Users\nmur1\Google Drive\Springboard\Capstone2\CleanData'
exportfile = 'Momentum'
os.chdir(importpath)

stocks = pd.read_csv(importfile, encoding= 'unicode_escape')


#Quantitative Analysis Functions



def TR(row, axis = 1):
    
    H = row['High']
    L = row['Low']
    C = row['Close']
    yC = row['yC']
    
    return max((H-L), abs(H-yC), abs(L-yC))



def DM(row, axis = 1, d = 'PDM'):
    
    
    tH = row['High']
    yH = row['yH']
    
    tL = row['Low']
    yL  = row['yL']
    
    moveUp = tH - yH
    moveDown = yL - tL
    
    #calculate PDM
    if moveUp > 0 and moveUp > moveDown:
        PDM = moveUp
    else:
        PDM = 0
    
   #calculate NDM
    if moveDown > 0 and moveDown > moveUp:
        NDM = moveDown
    else:
        NDM = 0
        
    if d == 'PDM':
        return PDM
    else:
        return NDM

    
def Smoothed(Metric, period, ADX = False):
    
    if ADX == False:
        Base = Metric.rolling(window = period).mean()[period-1]
    else:
        Base = Metric.rolling(window = period).mean()[period*2 - 1]
    
    Metric = list(Metric)
    period = period -1
    lstlen = len(Metric)
    lstSmoothed = np.empty(lstlen)

    for i in range(lstlen):

        if i < period:
            lstSmoothed[i] = 0
        elif i == period:
            lstSmoothed[i] = Base
        else:
            lstSmoothed[i] = (lstSmoothed[i-1] * period + Metric[i])/(period + 1)


    return lstSmoothed

def Slope(Metric, lookback):
    reg = LinearRegression()


    time = np.arange(0,lookback,1)
    lstlen = len(Metric)
    sl = np.empty(lstlen)
    
    for i in range(lstlen):
        
        
        
        y = np.array(Metric[i-(lookback-1):(i+1)]).reshape(-1,1)
        X = time.reshape(-1,1)
        
        if np.isnan(y).sum() > 0:
            sl[i]=0
        else:
            
            if len(y) == lookback:
                reg.fit(X,y)
                sl[i] = reg.coef_
                
                
    return sl

In [99]:
def Momentum(tickers, sdate):

    m = []
    
    for t in tickers:
        try:
            df = yf.download(t,sdate, progress = False, group_by = 'Ticker')
        
            df['yH'] = df[['High']].shift(1)
            df['yL'] = df[['Low']].shift(1)
            df['yC'] = df[['Close']].shift(1)

            df['PDM'] = df.apply(DM, axis = 1, d='PDM')
            df['NDM'] = df.apply(DM, axis = 1,d = 'NDM')
            df['TR'] = df.apply(TR, axis = 1)

            ATR = Smoothed(df['TR'], 14)
            PDM_Smooth = Smoothed(df['PDM'], 14)
            NDM_Smooth =Smoothed(df['NDM'], 14)
            DI_Plus = PDM_Smooth/ATR * 100
            DI_Neg = NDM_Smooth/ATR * 100
            DI_Index =abs(DI_Plus - DI_Neg)/abs(DI_Plus+ DI_Neg) * 100
            ADX =Smoothed(pd.Series(DI_Index), 14, ADX = True)


            dfATR = pd.DataFrame(ATR, index = df.index, columns = ['ATR'])
            dfDI_Plus = pd.DataFrame(DI_Plus, index = df.index, columns = ['DI_Plus'])
            dfDI_Neg = pd.DataFrame(DI_Neg, index = df.index, columns = ['DI_Neg'])
            ADX = pd.DataFrame(ADX, index = df.index, columns = ['ADX'])
            dfNew = pd.concat([df,dfATR, dfDI_Plus, dfDI_Neg, ADX], axis = 1)

            DIN_Slope = pd.DataFrame(Slope(dfNew.DI_Neg,7), index = dfNew.index, columns = ['DI_Neg_Slope'])
            DIP_Slope = pd.DataFrame(Slope(dfNew.DI_Plus,7), index = dfNew.index, columns = ['DI_Plus_Slope'])

            dfNew = pd.concat([dfNew, DIN_Slope, DIP_Slope ], axis = 1)


            dfNew = dfNew[['Close', 'DI_Plus', 'DI_Neg', 'ADX', 'DI_Neg_Slope', 'DI_Plus_Slope']]
            dfNew['SMA'] = dfNew['Close'].rolling(window = 20).mean()
            dfNew['UpperB'] = dfNew.SMA + dfNew['Close'].rolling(window = 20).agg(np.std, ddof = 0) * 2
            dfNew['LowerB'] = dfNew.SMA - dfNew['Close'].rolling(window = 20).agg(np.std, ddof = 0) * 2


            dfNew['Off_SMA'] = (dfNew.Close - dfNew.SMA)/dfNew.SMA * 100
            dfNew['Off_LB'] = (dfNew.Close - dfNew.LowerB)/dfNew.LowerB * 100
            dfNew['Symbol'] = t
            m.append(dfNew)
        except:
            print(t, ' not found')
            
    
    return pd.concat(m)

In [None]:
import warnings
warnings.filterwarnings('ignore')

pd.options.display.float_format = '{:.4f}'.format

stocklist = stocks.Symbol
Quant = Momentum(stocklist, '2019-06-01')



1 Failed download:
- BRK.B: No data found, symbol may be delisted
BRK.B  not found

1 Failed download:
- BF.B: No data found for this date range, symbol may be delisted
BF.B  not found
KIM  not found
KMI  not found
KLAC  not found
KSS  not found


In [None]:
QuantRev = Quant.sort_values(by = ['Symbol', 'Date'],ascending = (False,False))
QuantRev['D'] = QuantRev['Close'].pct_change(periods = 7) * - 1
Quant = QuantRev.sort_values(by = ['Symbol', 'Date'],ascending = (False,True))
Quant = Quant.dropna()
os.chdir(exportpath)
Quant.to_csv('Live_Quant.csv')

In [None]:
#import datasets from cleaning
fund = 'Fundamental_Final.csv'
analyst = 'Analysts.csv'
quant = 'Quant_Pricing.csv'

dfFund = pd.read_csv(fund).drop(columns = ['Unnamed: 0'])
dfanalyst = pd.read_csv(analyst).drop(columns = ['Unnamed: 0'])
dfquant = Quant.reset_index()

dfquant = dfquant.rename(columns = {'Symbol':'Ticker'})
dfFund['Key'] = dfFund[['index','Ticker']].astype(str).apply(lambda x: '_'.join(x), axis=1)
dfquant['Year'] = pd.DatetimeIndex(dfquant['Date']).year - 1
dfquant['Key'] = dfquant[['Year','Ticker']].astype(str).apply(lambda x: '_'.join(x), axis=1)

#filter specific fields from fundamental sheet
fields = ['Key','eps', 'ROE', 'Sector','D2C','epsgrowth', 'Sales', 'Shares']
dfFund = dfFund[fields]

#replace inf values from my ROE calculation
dfFund['ROE'] = dfFund.ROE.replace([np.inf, -np.inf], 0)


#pull percent buy and ticker from the analyst rating data source
dfRating = dfanalyst[['Symbol', 'Percent_Buy']]
dfRating.columns = ['Ticker', 'Percent_Buy']


#merge everyone together

df = pd.merge(dfquant, dfFund, on = 'Key')
df = pd.merge(df, dfRating, on = 'Ticker')

In [None]:
df['PE_Ratio'] = df.SMA/df.eps
df.PE_Ratio = df.PE_Ratio.replace([np.inf, -np.inf], 0)

AverageSectorPE = pd.DataFrame(df.groupby('Sector')['PE_Ratio'].mean())
IQRPE = pd.DataFrame(df.groupby('Sector')['PE_Ratio'].quantile(.75) - df.groupby('Sector')['PE_Ratio'].quantile(.25))
Quartile3 = pd.DataFrame(df.groupby('Sector')['PE_Ratio'].quantile(.75))
Quartile1 = pd.DataFrame(df.groupby('Sector')['PE_Ratio'].quantile(.25))

peSector = pd.concat([AverageSectorPE, IQRPE, Quartile1, Quartile3], axis = 1).reset_index()
peSector.columns = ['Sector', 'AverageSectorPE','IQRPE','Quartile1','Quartile3' ]
peSector['Upper'] = 1.5 * peSector.IQRPE + peSector.Quartile3
peSector['Lower'] = 1.5 * peSector.IQRPE - peSector.Quartile1

peSector = peSector[['Sector','AverageSectorPE', 'Upper', 'Lower']]
df = pd.merge(df,peSector, on = 'Sector')
df['Relative_PE'] = (df.PE_Ratio - df.AverageSectorPE) / df.AverageSectorPE

df['Sales_Ratio'] = df.SMA/ (df.Sales/df.Shares)
df.Sales_Ratio = df.Sales_Ratio.replace([np.inf, -np.inf], 0)

AverageSectorSR = pd.DataFrame(df.groupby('Sector')['Sales_Ratio'].mean())
IQRSR = pd.DataFrame(df.groupby('Sector')['Sales_Ratio'].quantile(.75) - df.groupby('Sector')['Sales_Ratio'].quantile(.25))
Quartile3 = pd.DataFrame(df.groupby('Sector')['Sales_Ratio'].quantile(.75))
Quartile1 = pd.DataFrame(df.groupby('Sector')['Sales_Ratio'].quantile(.25))
srSector = pd.concat([AverageSectorSR, IQRSR, Quartile1, Quartile3], axis = 1).reset_index()
srSector.columns = ['Sector', 'AverageSectorSR','IQRSR','Quartile1','Quartile3' ]
srSector['UpperSR'] = 1.5 * srSector.IQRSR + srSector.Quartile3
srSector['LowerSR'] = 1.5 * srSector.IQRSR - srSector.Quartile1

SR = srSector[['Sector', 'AverageSectorSR', 'UpperSR', 'LowerSR']]
df = pd.merge(df,SR, on = 'Sector')
df['Relative_SR'] = (df.Sales_Ratio - df.AverageSectorSR) / df.AverageSectorSR
df['Relative_SR'] = df['Relative_SR'].fillna(0)
df['month'] = pd.DatetimeIndex(df['Date']).month


In [None]:
def dummy(df, drop):

    df2 = df.drop(columns = drop)
    ds = pd.get_dummies(df[drop])
    dfdum = pd.concat([df2, ds], axis = 1)

    return dfdum

Model = dummy(df, ['Sector'])

In [None]:
df2 = Model

In [None]:
todrop = ['Ticker','Date', 'Key','D','AverageSectorSR','Year', 'Sales_Ratio','DI_Neg_Slope','eps','LowerB', 'UpperSR', 'LowerSR','month',
                         'Sales','Shares','UpperB', 'DI_Plus', 'DI_Neg', 'SMA', 'Close','AverageSectorPE', 'PE_Ratio', 'Upper', 'Lower']



#filter date, eps greater than zero, and outlying Sales Ratios
Model = Model[Model.Date == '2020-08-07']
Model = Model[Model.eps > 0]
Model = Model[(Model.Sales_Ratio > Model.LowerSR) & (Model.Sales_Ratio < Model.UpperSR)]

#Make a copy of the metadata
X_Meta = Model

#Finalize the live X Variable
X_Live = Model.drop(columns = todrop)





In [None]:
df_Sales = pd.read_csv('Final_Final_Model_Set.csv')
X = df_Sales.drop(columns = ['Date','Ticker', 'Dir_Binary','Dir_Multi' ,'Key','Unnamed: 0'])
y = df_Sales['Dir_Binary']

In [None]:
from sklearn.model_selection import GridSearchCV
import pandas as pd
import yfinance as yf
import numpy as np
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm
from matplotlib import pyplot as plt
import seaborn as sns
import os
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.metrics import plot_confusion_matrix
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler

In [None]:
model = RandomForestClassifier(100, n_jobs = -1, random_state = 42, min_samples_leaf = 1, criterion = "gini")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .25, random_state = 42)
model.fit(X_train, y_train)
ypred = model.predict(X_test)

In [None]:
from sklearn.metrics import confusion_matrix
false_positives = confusion_matrix(y_test, ypred)[0][1]
print('False Positives in Test:', false_positives)
print("Chance of FP's ", round(false_positives / len(y_test),4) * 100)


print('ROC:', round(roc_auc_score(y_test, ypred),3))
print('Accuracy', round(accuracy_score(y_test,ypred),3))
print('Recall', round(recall_score(y_test, ypred),3))
print('Precision', round(precision_score(y_test, ypred),3))
print('F1', round(f1_score(y_test, ypred),3))

pd.reset_option('display.float_format')
confmat = plot_confusion_matrix(model, X_test, y_test, cmap="Blues",values_format='.0f')

In [None]:
model.fit(X,y)


In [None]:
try:
    X_Live = X_Live.drop(columns = 'Ticker')
except:
    print('Already Dropped')

In [None]:
X_Live

In [None]:
model.predict(X_Live)
predictions = pd.DataFrame(model.predict_proba(X_Live))


In [None]:
Final = pd.concat([X_Meta.reset_index(), predictions], axis = 1).set_index('Ticker')
Final.to_csv('Fianl_Predicitons.csv')

In [None]:
Final.to_clipboard()