In [14]:
import sys
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import yfinance
import requests
import stock_utils
import config
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense,Dropout
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.initializers import Constant
from requests.exceptions import ConnectionError
import warnings
warnings.filterwarnings('ignore')

In [15]:
def dummy_featuring(series_df, prefix):
    series_df['per_changed'] =  round((series_df.close - series_df.shift(-1).close)*100 / series_df.shift(-1).close, 2)
    series_df['cluster'] = pd.cut(series_df.per_changed, bins=[-99 , -5, -4.5 , -4, -3.5, -3, -2.5, -2, -1.5, -1, -0.5, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5,  np.inf], labels=list(range(21))) 
    #series_df['cluster'] = pd.cut(series_df.per_changed, bins=[-99,-3 ,-0.5, 0.5, 3, np.inf], labels=list(range(5)))
    #series_df['cluster'] = pd.cut(series_df.per_changed, bins=[-99, -0.5, 0.5, np.inf], labels=list(range(3)))
    dummy_df = pd.get_dummies(series_df['cluster'], prefix=prefix, prefix_sep='_')
    return pd.concat([series_df[['date']], dummy_df], axis=1)
    #return dummy_df
    

def df_featuring_v2(series_df):
    series_df['per_changed'] =  round((series_df.close - series_df.shift(-1).close)*100 / series_df.shift(-1).close, 2)
    #series_df['cluster'] = pd.cut(series_df.per_changed, bins=[-99,-6, -4, -2, -0.5, 0.5, 2, 4, 6, np.inf], labels=list(range(9)))
    #series_df['cluster'] = pd.cut(series_df.per_changed, bins=[-99, -2, -0.5, 0.5, 2, np.inf], labels=list(range(5)))
    series_df['cluster'] = pd.cut(series_df.per_changed, bins=[-99, -0.7, 0.7, np.inf], labels=list(range(3)))
    series_df['gap'] =  (series_df.open > series_df.shift(-1).high) | (series_df.open < series_df.shift(-1).low)
    return series_df[['date', 'open', 'high', 'low', 'close', 'cluster', 'gap']]


def get_ticker_data(ticker, period='1y'):
    res = yfinance.Ticker(ticker)
    res =  res.history(period=period)
    res.reset_index(inplace=True)
    res.columns = res.columns.str.lower()
    res = res.iloc[::-1,:].reset_index(drop=True)
    return res

def get_ticker_data_finmodprep(ticker):
    URL = f"https://financialmodelingprep.com/api/v3/historical-price-full/{ticker}"
    params = {
        'timeseries' : 4,
        'apikey' : 'f410520b1e0acb4dfae3ea0549101774'
    }
    res = requests.get(URL, params=params)
    data = res.json()['historical']
    df = pd.DataFrame(data)
    df['date'] = pd.to_datetime(df['date'])
    return df[['date','open', 'high', 'low', 'close']]

def model_df_v2(series_df, days=3):
    # model columns
    c =  []
    model_columns = []
    for n in range(days,0,-1):
        model_columns.extend([#'o_'+str(n),
                  'h_'+str(n),
                  'l_'+str(n),
                  'c_'+str(n)])
    model_columns.append('o_0')
    c.extend(model_columns)
    spy_dummy_columns= ['spy_'+str(n) for n in range(21)]
    vix_dummy_columns= ['vix_'+str(n) for n in range(21)]
    #qqq_dummy_columns= ['qqq_'+str(n) for n in range(21)]
    dummy_columns = spy_dummy_columns + vix_dummy_columns# + qqq_dummy_columns 
    c.extend(dummy_columns)
    c.append('cluster')
    df = pd.DataFrame(columns=c)
    for n in range(days,0,-1):
        #df['o_' + str(n)] = series_df.shift(-n)['open']
        df['h_' + str(n)] = series_df.shift(-n).high
        df['l_' + str(n)] = series_df.shift(-n).low
        df['c_' + str(n)] = series_df.shift(-n)['close']
    df['o_0'] = series_df.open
    df['cluster'] = series_df['cluster']   
    df[dummy_columns] = series_df[dummy_columns]
    df.dropna(inplace=True)
    # opened without gap
    df = df[series_df['gap'] == False]
    #df = df[(df['o_0'] > 10) & (df['o_0'] < 50)] #test
    #normalization
    scaler = MinMaxScaler()
    df[model_columns] = scaler.fit_transform(df[model_columns].T).T
    return df 

In [16]:
vix = get_ticker_data('^VIX', period='4d')
vix_dummy = dummy_featuring(vix, 'vix')
vix_dummy

Unnamed: 0,date,vix_0,vix_1,vix_2,vix_3,vix_4,vix_5,vix_6,vix_7,vix_8,...,vix_11,vix_12,vix_13,vix_14,vix_15,vix_16,vix_17,vix_18,vix_19,vix_20
0,2021-01-20,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2021-01-19,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2021-01-15,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
3,2021-01-14,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [17]:
spy = get_ticker_data('SPY', period='4d')
spy

Unnamed: 0,date,open,high,low,close,volume,dividends,stock splits
0,2021-01-20,381.109985,382.890015,380.690002,382.640015,14063549,0,0
1,2021-01-19,378.339996,379.230011,376.75,378.649994,51014200,0,0
2,2021-01-15,376.720001,377.579987,373.700012,375.700012,107160000,0,0
3,2021-01-14,380.589996,381.130005,378.100006,378.459991,49989100,0,0


In [18]:
sq = get_ticker_data('QCOM', period='4d'
                               )
sq

Unnamed: 0,date,open,high,low,close,volume,dividends,stock splits
0,2021-01-20,164.720001,167.940002,164.440002,166.639999,2370041,0,0
1,2021-01-19,158.940002,164.100006,158.449997,163.770004,8199700,0,0
2,2021-01-15,160.770004,160.860001,156.880005,157.089996,8607400,0,0
3,2021-01-14,159.399994,163.179993,158.5,160.850006,10102400,0,0


In [19]:
spy_dummy = dummy_featuring(spy, 'spy')
spy_dummy = spy_dummy.merge(vix_dummy, on='date', how='left')
#spy_dummy = spy_dummy.merge(qqq_dummy, on='date', how='left')
spy_dummy

Unnamed: 0,date,spy_0,spy_1,spy_2,spy_3,spy_4,spy_5,spy_6,spy_7,spy_8,...,vix_11,vix_12,vix_13,vix_14,vix_15,vix_16,vix_17,vix_18,vix_19,vix_20
0,2021-01-20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2021-01-19,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2021-01-15,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
3,2021-01-14,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [20]:
filtered_tickers = ['FIT','MDLZ',
 'FUV',
 'ROKU',
 'CVX',
 'CRM',
 'BMY',
 'MTDR',
 'ZS',
 'CBAY',
 'AGNC',
 'CNK',
 'FB',
 'WTRH',
 'VBIV',
 'PEP',
 'SPWH',
 'NTNX',
 'PSTG',
 'CRK',
 'OSS',
 'MRNA',
 'SQ',
 'COF',
 'NYMT',
 'SFIX',
 'EOG',
 'ACAM',
 'WMB',
 'ETSY',
 'GCAP',
 'KEY',
 'BYND',
 'WBA',
 'SWI',
 'GME',
 'FE',
 'RDFN',
 'RLGY',
 'T',
 'INSG',
 'EBAY',
 'CLDR',
 'AQB',
 'AKBA',
 'EROS',
 'ZUO',
 'DELL',
 'BBY',
 'COTY',
 'ENPH',
 'AAL',
 'TSLA',
 'CIM',
 'CCL',
 'FAST',
 'SPPI',
 'NTGN',
 'TRVN',
 'IPG',
 'CPE',
 'GNW',
 'AMD',
 'JNPR',
 'FCEL',
 'MS',
 'CLNE',
 'WELL',
 'CLRB',
 'SLB',
 'WDC',
 'BWEN',
 'NLSN',
 'DHR',
 'SMPL',
 'PK',
 'RF',
 'GPS',
 'BCRX',
 'MIK',
 'VER',
 'ANGI',
 'HPE',
 'PENN',
 'NOVA',
 'C',
 'RUBI',
 'ATVI',
 'SIRI',
 'AEO',
 'NYCB',
 'ET',
 'ADT',
 'PBF',
 'AIG',
 'JNJ',
 'CNC',
 'HDS',
 'OVID',
 'MNKD',
 'EQT',
 'PPL',
 'NVDA',
 'NGHC',
 'UNH',
 'HBI',
 'AMZN',
 'MPLX',
 'FHN',
 'NKE',
 'SABR',
 'FEYE',
 'RCL',
 'TWTR',
 'CRIS',
 'V',
 'INFN',
 'KHC',
 'CNX',
 'MTG',
 'CVNA',
 'PLAN',
 'SBRA',
 'PINS',
 'BLDR',
 'NIO',
 'ELAN',
 'HAL',
 'TDOC',
 'CVS',
 'WFC',
 'BSX',
 'NLY',
 'D',
 'SRNE',
 'ATUS',
 'QEP',
 'RIGL',
 'MAC',
 'KKR',
 'THCB',
 'PYPL',
 'IVZ',
 'EDIT',
 'HRB',
 'PEIX',
 'JCI',
 'WMT',
 'MVIS',
 'DKS',
 'CRWD',
 'LTHM',
 'NFLX',
 'MPC',
 'SSYS',
 'AVTR',
 'UNIT',
 'DISH',
 'GPRO',
 'COST',
 'SM',
 'LLNW',
 'MU',
 'WISA',
 'HMHC',
 'YELP',
 'NK',
 'IBM',
 'NVTA',
 'GEVO',
 'BK',
 'NVAX',
 'JWN',
 'INTC',
 'TAP',
 'SE',
 'BLNK',
 'MDT',
 'BAC',
 'KO',
 'AKAM',
 'PAVM',
 'KALA',
 'ON',
 'FSLR',
 'RRC',
 'VERU',
 'AA',
 'DD',
 'DGLY',
 'FITB',
 'GLUU',
 'CLVS',
 'WTI',
 'DXC',
 'SUNW',
 'SO',
 'MBIO',
 'FCX',
 'PAGP',
 'PRTY',
 'AMRS',
 'DS',
 'VSTM',
 'STOR',
 'INO',
 'KNSL',
 'MSFT',
 'GIX',
 'EVRG',
 'VLO',
 'OSTK',
 'VST',
 'BEN',
 'PM',
 'NI',
 'VUZI',
 'SCHW',
 'GEO',
 'ENG',
 'TWO',
 'SAVE',
 'IVR',
 'SNAP',
 'AR',
 'NOV',
 'AMCX',
 'APPS',
 'CTVA',
 'DVAX',
 'CDE',
 'CNP',
 'WWR',
 'PIXY',
 'SBUX',
 'VICI',
 'FPAY',
 'TWLO',
 'DIS',
 'VZ',
 'MO',
 'AMAT',
 'WU',
 'CTXS',
 'LQDA',
 'CHK',
 'FANG',
 'COG',
 'ZIOP',
 'AM',
 'OKE',
 'LOW',
 'IMMR',
 'OPK',
 'ZNGA',
 'MOS',
 'HST',
 'VECO',
 'HD',
 'CSCO',
 'HRTX',
 'HPQ',
 'TPX',
 'HL',
 'GE',
 'VXRT',
 'AQMS',
 'LCA',
 'EAF',
 'WPX',
 'UAA',
 'FIS',
 'DAL',
 'KODK',
 'MAT',
 'SPG',
 'MGI',
 'PACB',
 'OSMT',
 'PDLI',
 'IDEX',
 'PFE',
 'RAD',
 'PE',
 'TRIP',
 'COP',
 'ORCL',
 'TJX',
 'BE',
 'ETRN',
 'MRK',
 'KOS',
 'AIV',
 'AAPL',
 'NEM',
 'TMUS',
 'STL',
 'FDX',
 'NRZ',
 'EVFM',
 'FTI',
 'MIC',
 'EXEL',
 'AES',
 'FOXA',
 'PCG',
 'UAL',
 'GM',
 'ONCT',
 'ENLC',
 'LVS',
 'SLM',
 'CPRX',
 'NTLA',
 'WBT',
 'ZM',
 'UBER',
 'AI',
 'CFG',
 'ABT',
 'BGCP',
 'KSS',
 'AMC',
 'AGEN',
 'STWD',
 'FLDM',
 'RMG',
 'MCD',
 'PSEC',
 'FISV',
 'KMI',
 'SGMO',
 'USB',
 'SPWR',
 'JBLU',
 'GSX',
 'KPTI',
 'EXC',
 'BJ',
 'KR',
 'ABUS',
 'TXN',
 'WY',
 'ARCC',
 'SWN',
 'MFA',
 'HBAN',
 'QCOM',
 'NCLH',
 'CHWY',
 'OPTT',
 'MARA',
 'LXRX',
 'XOM',
 'DBX',
 'TGT',
 'DDD',
 'AFL',
 'MA',
 'MGM',
 'RESN',
 'CLF',
 'WKHS',
 'PLUG',
 'ABBV',
 'Z',
 'DOCU',
 'W',
 'GPN',
 'LUV',
 'MUR',
 'CCX',
 'ALLY',
 'M',
 'FSLY',
 'TCO',
 'CAG',
 'F',
 'MTCH',
 'KDP',
 'DOW',
 'PG',
 'ALXN',
 'DVN',
 'NLS',
 'BA',
 'BTU',
 'ADMA',
 'AHT',
 'MARK',
 'OXY',
 'PAA',
 'MPW',
 'MRO',
 'IRM',
 'NEE',
 'ORC',
 'APA',
 'BBBY',
 'GS',
 'RIOT',
 'LYFT',
 'TPR',
 'REAL',
 'PBCT',
 'GILD',
 'GNPX',
 'GSAH',
 'JPM',
 'EPD',
 'MET',
 'WORK',
 'WATT',
 'GIS',
 'DBI',
 'PSX',
 'LYV',
 'RCMT',
 'AMRH',
 'ED',
 'KOPN',
 'X',
 'RUN',
 'CLNY',
 'TTD']

In [21]:
data = {}
for t in filtered_tickers:
    try:
        data[t] = get_ticker_data(t, period='4d')
    except ConnectionError:
        print('No internet')
        break
    except :
        continue

- NTGN: No data found, symbol may be delisted
- HDS: No data found for this date range, symbol may be delisted
- NGHC: No data found for this date range, symbol may be delisted
- LCA: No data found for this date range, symbol may be delisted
- WPX: No data found for this date range, symbol may be delisted
- PDLI: No data found for this date range, symbol may be delisted
- PE: No data found for this date range, symbol may be delisted
- RMG: No data found for this date range, symbol may be delisted


In [22]:
loaded_model = tf.keras.models.load_model('12_23_20.model')

In [23]:
up, down = [], []
for key in data.keys():
    stock = data[key].iloc[:4]
    df = df_featuring_v2(stock).merge(spy_dummy, on='date', how='left')
    try:
        mt = model_df_v2(df)
    except:
        continue
    pred = loaded_model.predict(mt.drop(['cluster'], axis=1))
    signal = np.argmax(pred)
    if signal == 0:
        down.append((key, pred[0][signal]))
    elif signal == 2:
        up.append((key, pred[0][signal]))

In [24]:
up.sort(key= lambda s : s[1], reverse=True)
up

[('MS', 0.9124866),
 ('PAA', 0.862702),
 ('FEYE', 0.85570806),
 ('AA', 0.85293245),
 ('CDE', 0.84518456),
 ('AKBA', 0.8443908),
 ('ET', 0.84410286),
 ('FUV', 0.84236234),
 ('PAGP', 0.8418361),
 ('AAPL', 0.8415587),
 ('ZS', 0.83617586),
 ('HL', 0.83390516),
 ('QEP', 0.83235633),
 ('X', 0.83208084),
 ('AAL', 0.8315442),
 ('PSTG', 0.8302991),
 ('GE', 0.82715106),
 ('SM', 0.82575524),
 ('OSMT', 0.8204703),
 ('MPLX', 0.81894505),
 ('APA', 0.8149374),
 ('WATT', 0.8123574),
 ('ACAM', 0.81186545),
 ('ANGI', 0.8118387),
 ('EAF', 0.81120074),
 ('LUV', 0.8107453),
 ('WFC', 0.8094364),
 ('SQ', 0.8079397),
 ('EOG', 0.8076952),
 ('NKE', 0.80575556),
 ('KOS', 0.80385),
 ('DOW', 0.80242705),
 ('FDX', 0.80235314),
 ('ADMA', 0.8017616),
 ('NLS', 0.8004747),
 ('GPN', 0.8003298),
 ('MO', 0.79964036),
 ('HPQ', 0.7989954),
 ('FANG', 0.7988139),
 ('GPS', 0.79439825),
 ('ENPH', 0.7937826),
 ('LLNW', 0.7918121),
 ('CVX', 0.79134065),
 ('TRIP', 0.7890507),
 ('AM', 0.78861916),
 ('CPE', 0.7884378),
 ('JBLU', 0.7

In [25]:
down.sort(key= lambda s : s[1], reverse=True)
down

[('GEVO', 0.6961666),
 ('DGLY', 0.60744447),
 ('ED', 0.56513935),
 ('WWR', 0.55331856),
 ('CNP', 0.5428582),
 ('VLO', 0.5265746),
 ('BE', 0.50435627),
 ('FAST', 0.47983485),
 ('SBRA', 0.46309447),
 ('NYMT', 0.44549412),
 ('BYND', 0.4298771)]

In [26]:
URL = f"https://financialmodelingprep.com/api/v3/historical-price-full/{ticker}"
params = {
    'timeseries' : 4,
    'apikey' : 'f410520b1e0acb4dfae3ea0549101774'
}
res = requests.get(URL, params=params)
data = res.json()['historical']
df = pd.DataFrame(data)
df['date'] = pd.to_datetime(df['date'])
df[['date','open', 'high', 'low', 'close']]

NameError: name 'ticker' is not defined

In [None]:
def num_to_roman(input_number):
    result = ''
    roman_map = [
        (1 , 'I'),
        (4 , 'IV'),
        (5 , 'V'),
        (9 , 'IX'),
        (10 , 'X'),
        (40 , 'XL'),
        (50 , 'L'),
        (90 , 'XC'),
        (100 , 'C'),
        (400 , 'CD'),
        (500 , 'D'),
        (900 , 'CM'),
        (1000 ,'M'),
        ]
    
    while input_number != 0 :
        for num , roman in sorted(roman_map, key = lambda x : x[0], reverse=True):
            if input_number - num >= 0:
                result += roman
                input_number -= num
                break
            
    
    return result

num_to_roman(65)

In [32]:
roman_map = [
    (1 , 'I'),
    (4 , 'IV'),
    (5 , 'V'),
    (9 , 'IX'),
    (10 , 'X'),
    (40 , 'XL'),
    (50 , 'L'),
    (90 , 'XC'),
    (100 , 'C'),
    (400 , 'CD'),
    (500 , 'D'),
    (900 , 'CM')
    (1000 ,'M')
    ]

*

1000
500
100
50
10
9
5
4
1


In [11]:
1000 // 100

10

In [25]:
9  1000

0