In [391]:
import pandas as pd
import os

In [392]:
header_names = ["ticker","price","time","meta_signal","meta_previous",
                "4h","1d","1w","1m",
                "4hp","1dp","1wp","1mp",
                "4h_rsi","4h_stochrsi","4h_stoch","4h_ult","4h_macd","4h_hull",
                "1d_rsi","1d_stochrsi","1d_stoch","1d_ult","1d_macd","1d_hull",
                "1w_rsi","1w_stochrsi","1w_stoch","1w_ult","1w_macd","1w_hull",
                "1m_rsi","1m_stochrsi","1m_stoch","1m_ult","1m_macd","1m_hull"
                ]

In [393]:
# first, combine dates into single files
files = os.listdir('./')
csv_files = [f for f in files if "signals.20" in f]
csv_files.sort()
csv_files

frames={}
lists={}

# first build the list of dataframes
for file_ in csv_files:
    # split date out of file names
    file_split=file_.split('.', 1)
    year_split=file_split[1].split('-', 1)
    month_split=year_split[1].split('-', 1)
    day_split=month_split[1].split('-', 1)
    year=year_split[0]
    month=month_split[0]
    day=day_split[0]
    
    date=year+'-'+month+'-'+day
    if (date not in lists):
        lists[date] = []
        
    lists[date].append(pd.read_csv(file_,index_col=None, header=None))
    
for date in lists:
    print(date)
    frame = pd.concat(lists[date])
    frame.columns = header_names
    frame.sort_values(['ticker', 'time'], inplace=True)
    frame.to_csv('./signals-'+date+'.csv', header=None, index=None)

In [394]:
files = os.listdir('./')
csv_files = [f for f in files if "signals-20" in f]
csv_files

['signals-2018-08-28.csv',
 'signals-2018-08-29.csv',
 'signals-2018-08-22.csv',
 'signals-2018-08-23.csv',
 'signals-2018-08-24.csv',
 'signals-2018-08-30.csv',
 'signals-2018-08-27.csv']

In [395]:
frame = pd.DataFrame()
list_ = []
for file_ in csv_files:
    df = pd.read_csv(file_,index_col=None, header=None)
    list_.append(df)
frame = pd.concat(list_)
frame.columns = header_names
frame.sort_values(['ticker', 'time'], inplace=True)
frame.head(5)

Unnamed: 0,ticker,price,time,meta_signal,meta_previous,4h,1d,1w,1m,4hp,...,1w_stoch,1w_ult,1w_macd,1w_hull,1m_rsi,1m_stochrsi,1m_stoch,1m_ult,1m_macd,1m_hull
0,AMEX-BTG,2.31,1534976694686,-1,0,-1,-1,-1,-1,0.0,...,-1,45.19,0.0,24.02,45.06,-1.0,-1,1,1,-1.0
0,AMEX-BTG,2.24,1535032150741,-1,0,-1,-1,-1,-1,0.0,...,-1,44.23,0.0,22.22,43.23,-1.0,-1,-1,-1,-1.0
1,AMEX-BTG,2.235,1535036020573,-1,0,-1,-1,-1,-1,0.0,...,-1,44.08,0.0,21.94,42.94,-1.0,-1,-1,-1,-1.0
2,AMEX-BTG,2.21,1535039443967,-1,0,-1,-1,-1,-1,0.0,...,-1,43.73,0.0,21.24,42.24,-1.0,-1,-1,-1,-1.0
3,AMEX-BTG,2.215,1535042714300,-1,0,-1,-1,-1,-1,0.0,...,-1,43.73,0.0,21.24,42.24,-1.0,-1,-1,-1,-1.0


In [396]:
print('shape', frame.shape)
print("unique tickers: {} ".format(frame.ticker.nunique()))
tickers = set(frame.ticker.unique())

shape (5802, 37)
unique tickers: 169 


In [397]:
# split ticker into market-name
# frame['market'], frame['name'] = frame.ticker.str.split('-', 1).str
# frame.sort_values(['name', 'time'], inplace=True)
# frame.head(2)

In [398]:
# failures to get price (numeric conversion)
frame[frame.price.isnull()]

Unnamed: 0,ticker,price,time,meta_signal,meta_previous,4h,1d,1w,1m,4hp,...,1w_stoch,1w_ult,1w_macd,1w_hull,1m_rsi,1m_stochrsi,1m_stoch,1m_ult,1m_macd,1m_hull


In [399]:
# drop any NaN/null values
# this will happen the first time we run queries against a ticker, until we get diffs on the prev meta signals
# frame = frame.dropna()

# replace NaN values (nicer)
frame = frame.replace('NaN', '/')
print('shape', frame.shape)
print("unique tickers: {} ".format(frame.ticker.nunique()))
print("missing", set(tickers) - set(frame.ticker.unique()))
abb = frame[frame.ticker.astype(str).str.match("NYSE-ABB")]
abb.head(1)

shape (5802, 37)
unique tickers: 169 
missing set()


Unnamed: 0,ticker,price,time,meta_signal,meta_previous,4h,1d,1w,1m,4hp,...,1w_stoch,1w_ult,1w_macd,1w_hull,1m_rsi,1m_stochrsi,1m_stoch,1m_ult,1m_macd,1m_hull
718,NYSE-ABB,23.72,1535494347788,1,0,1,1,1,1,/,...,1,51.56,17.15,21.75,44.71,-1.0,-1,-1,1,1.0


In [400]:
test = frame[frame.ticker.astype(str).str.match("TSX-ENB")]
test.head(1)
test.head(1).values

array([['TSX-ENB', 36.18, 1534978556364, 1, 1, 1, 1, 1, -1, 2.0, 2.0, 0.0,
        0.0, 54.37, 63.14, 72.66, 50.25, 1, 57.01, 61.58, 78.65, 51.75, -1,
        59.1, 94.41, 93.96, 58.4, 1, 47.25, 94.92, 55.01, 48.3, -1.0, -1,
        -1, -1, 1.0]], dtype=object)

In [401]:
# newer tickers (e.g. IGRO, BOTZ, etc) will not have enough data for monthly signals
# script adds "/" in these cases
# we need to convert those to 0 for the ML engine
frame = frame.replace('/', 0)
print('shape', frame.shape)
print("unique tickers: {} ".format(frame.ticker.nunique()))
print("missing", set(tickers) - set(frame.ticker.unique()))
botz = frame[frame.ticker.astype(str).str.match("NASDAQ-BOTZ")]
botz.head(1)

shape (5802, 37)
unique tickers: 169 
missing set()


Unnamed: 0,ticker,price,time,meta_signal,meta_previous,4h,1d,1w,1m,4hp,...,1w_stoch,1w_ult,1w_macd,1w_hull,1m_rsi,1m_stochrsi,1m_stoch,1m_ult,1m_macd,1m_hull
33,NASDAQ-BOTZ,22.29,1534976650054,1,0,1,1,-1,1,-1.0,...,-1,64.35,0.0,43.31,0.0,0.0,-1,1,1,1.0


In [402]:
# more failures to get price
frame[frame.price == 0]

Unnamed: 0,ticker,price,time,meta_signal,meta_previous,4h,1d,1w,1m,4hp,...,1w_stoch,1w_ult,1w_macd,1w_hull,1m_rsi,1m_stochrsi,1m_stoch,1m_ult,1m_macd,1m_hull


In [403]:
# remove them if they exist
frame = frame[frame.price!=0]
print('shape', frame.shape)
print("unique tickers: {} ".format(frame.ticker.nunique()))
print("missing", set(tickers) - set(frame.ticker.unique()))

shape (5802, 37)
unique tickers: 169 
missing set()


In [404]:
abbv = frame[frame.ticker.astype(str).str.match("NYSE-ABBV")]
abbv

Unnamed: 0,ticker,price,time,meta_signal,meta_previous,4h,1d,1w,1m,4hp,...,1w_stoch,1w_ult,1w_macd,1w_hull,1m_rsi,1m_stochrsi,1m_stoch,1m_ult,1m_macd,1m_hull
80,NYSE-ABBV,97.64,1534986940463,1,1,1,1,1,1,2.0,...,-1,61.21,4.18,46.51,44.62,-1.0,1,-1,-1,1.0
685,NYSE-ABBV,97.3,1535031845738,1,1,1,1,1,1,2.0,...,-1,61.01,3.93,46.31,44.36,-1.0,-1,-1,-1,1.0
686,NYSE-ABBV,97.4,1535035671968,1,1,1,1,1,1,2.0,...,-1,61.08,4.02,46.38,44.45,-1.0,1,-1,-1,1.0
687,NYSE-ABBV,97.05,1535039099282,1,1,0,1,1,1,1.0,...,-1,60.87,3.75,46.16,44.17,-1.0,-1,-1,-1,1.0
688,NYSE-ABBV,96.9,1535042394468,1,1,0,1,0,1,1.0,...,-1,60.78,3.65,46.07,44.06,-1.0,-1,-1,-1,1.0
689,NYSE-ABBV,97.16,1535045639362,1,1,1,1,1,1,0.0,...,-1,60.93,3.83,46.22,44.25,-1.0,1,-1,-1,1.0
690,NYSE-ABBV,97.08,1535048868582,1,1,-1,1,1,1,1.0,...,-1,60.87,3.75,46.16,44.17,-1.0,-1,-1,-1,1.0
691,NYSE-ABBV,97.11,1535052105565,1,0,1,1,1,1,-1.0,...,-1,60.91,3.8,46.2,44.22,-1.0,-1,-1,-1,1.0
692,NYSE-ABBV,97.15,1535055309868,1,0,1,1,1,1,-1.0,...,-1,60.93,3.83,46.22,44.25,-1.0,1,-1,-1,1.0
693,NYSE-ABBV,97.15,1535058535964,1,0,1,1,1,1,-1.0,...,-1,60.93,3.83,46.22,44.25,-1.0,1,-1,-1,1.0


In [405]:
# remove duplicates
df = frame.drop_duplicates(["ticker","price","meta_signal","meta_previous",
                "4h","1d","1w","1m",
                "4hp","1dp","1wp","1mp",
                "4h_rsi","4h_stochrsi","4h_stoch","4h_ult","4h_macd","4h_hull",
                "1d_rsi","1d_stochrsi","1d_stoch","1d_ult","1d_macd","1d_hull",
                "1w_rsi","1w_stochrsi","1w_stoch","1w_ult","1w_macd","1w_hull",
                "1m_rsi","1m_stochrsi","1m_stoch","1m_ult","1m_macd","1m_hull"
            ])
df[df.ticker.astype(str).str.match("NYSE-ABBV")]

Unnamed: 0,ticker,price,time,meta_signal,meta_previous,4h,1d,1w,1m,4hp,...,1w_stoch,1w_ult,1w_macd,1w_hull,1m_rsi,1m_stochrsi,1m_stoch,1m_ult,1m_macd,1m_hull
80,NYSE-ABBV,97.64,1534986940463,1,1,1,1,1,1,2.0,...,-1,61.21,4.18,46.51,44.62,-1.0,1,-1,-1,1.0
685,NYSE-ABBV,97.3,1535031845738,1,1,1,1,1,1,2.0,...,-1,61.01,3.93,46.31,44.36,-1.0,-1,-1,-1,1.0
686,NYSE-ABBV,97.4,1535035671968,1,1,1,1,1,1,2.0,...,-1,61.08,4.02,46.38,44.45,-1.0,1,-1,-1,1.0
687,NYSE-ABBV,97.05,1535039099282,1,1,0,1,1,1,1.0,...,-1,60.87,3.75,46.16,44.17,-1.0,-1,-1,-1,1.0
688,NYSE-ABBV,96.9,1535042394468,1,1,0,1,0,1,1.0,...,-1,60.78,3.65,46.07,44.06,-1.0,-1,-1,-1,1.0
689,NYSE-ABBV,97.16,1535045639362,1,1,1,1,1,1,0.0,...,-1,60.93,3.83,46.22,44.25,-1.0,1,-1,-1,1.0
690,NYSE-ABBV,97.08,1535048868582,1,1,-1,1,1,1,1.0,...,-1,60.87,3.75,46.16,44.17,-1.0,-1,-1,-1,1.0
691,NYSE-ABBV,97.11,1535052105565,1,0,1,1,1,1,-1.0,...,-1,60.91,3.8,46.2,44.22,-1.0,-1,-1,-1,1.0
692,NYSE-ABBV,97.15,1535055309868,1,0,1,1,1,1,-1.0,...,-1,60.93,3.83,46.22,44.25,-1.0,1,-1,-1,1.0
688,NYSE-ABBV,96.94,1535120333448,0,1,-1,1,0,1,1.0,...,-1,60.8,3.67,46.09,44.09,-1.0,-1,-1,-1,1.0


In [408]:
print('shape', frame.shape)
print("unique tickers: {} ".format(frame.ticker.nunique()))

shape (5802, 37)
unique tickers: 169 


In [409]:
# add meta movement indicator
df['meta_move'] = df['meta_signal'].values - df['meta_previous'].values
df.head(3)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


Unnamed: 0,ticker,price,time,meta_signal,meta_previous,4h,1d,1w,1m,4hp,...,1w_ult,1w_macd,1w_hull,1m_rsi,1m_stochrsi,1m_stoch,1m_ult,1m_macd,1m_hull,meta_move
0,AMEX-BTG,2.31,1534976694686,-1,0,-1,-1,-1,-1,0.0,...,45.19,0.0,24.02,45.06,-1.0,-1,1,1,-1.0,-1
0,AMEX-BTG,2.24,1535032150741,-1,0,-1,-1,-1,-1,0.0,...,44.23,0.0,22.22,43.23,-1.0,-1,-1,-1,-1.0,-1
1,AMEX-BTG,2.235,1535036020573,-1,0,-1,-1,-1,-1,0.0,...,44.08,0.0,21.94,42.94,-1.0,-1,-1,-1,-1.0,-1


In [410]:
# round to 3 decimals (python tweaks the numbers)
df.price = df.price.round(3)
df[df.ticker=='NASDAQ-HMNY']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[name] = value


Unnamed: 0,ticker,price,time,meta_signal,meta_previous,4h,1d,1w,1m,4hp,...,1w_ult,1w_macd,1w_hull,1m_rsi,1m_stochrsi,1m_stoch,1m_ult,1m_macd,1m_hull,meta_move
49,NASDAQ-HMNY,0.03,1534983502339,-1,0,-1,-1,-1,-2,0.0,...,41.1123,0.0,0.1139,15.7199,-1.0,1,1,1,1.0,-1
425,NASDAQ-HMNY,0.03,1535033202901,-1,0,-1,-1,-1,-2,0.0,...,41.1123,0.0,0.1139,15.7199,-1.0,1,1,1,1.0,-1
426,NASDAQ-HMNY,0.028,1535037142410,-1,0,-1,-1,-1,-2,0.0,...,41.1123,0.0,0.1139,15.7199,-1.0,-1,1,1,1.0,-1
427,NASDAQ-HMNY,0.023,1535040526677,-1,0,-1,-1,-1,-2,0.0,...,41.1123,0.0,0.1139,15.7199,-1.0,-1,-1,1,1.0,-1
428,NASDAQ-HMNY,0.022,1535043779500,-1,0,-1,-1,-1,-2,0.0,...,41.1123,0.0,0.1139,15.7199,-1.0,-1,-1,1,1.0,-1
429,NASDAQ-HMNY,0.022,1535047010532,-1,0,-1,-1,-1,-2,0.0,...,41.1123,0.0,0.1139,15.7199,-1.0,-1,-1,1,1.0,-1
430,NASDAQ-HMNY,0.022,1535050262705,-1,0,-1,-1,-1,-2,0.0,...,41.1123,0.0,0.1139,15.7198,-1.0,-1,-1,1,1.0,-1
431,NASDAQ-HMNY,0.02,1535053482866,-1,0,-1,-1,-1,-2,0.0,...,41.1123,0.0,0.1139,15.7198,-1.0,-1,-1,1,1.0,-1
432,NASDAQ-HMNY,0.02,1535056689683,-1,0,-1,-1,-1,-2,0.0,...,41.1123,0.0,0.1139,15.7198,-1.0,-1,-1,1,1.0,-1
422,NASDAQ-HMNY,0.019,1535118109283,-1,0,-1,-1,-1,-2,0.0,...,41.1123,0.0,0.1139,15.7198,-1.0,-1,-1,1,1.0,-1


In [417]:
df['Meta OK'] = (df.meta_signal==1) | (df.meta_signal==2) | (df.meta_signal==0)
df['Meta Good'] = (df.meta_signal==1) | (df.meta_signal==2)
df['Meta ⬆️'] = df.meta_move > 0
df['Meta ⬆️⬆️'] = df.meta_move > 1
df['Meta Bad'] = (df.meta_signal==-1) | (df.meta_signal==-2)
df['Meta ⬇️'] = df.meta_move < 0
df['Meta ⬇️⬇️'] = df.meta_move < -1

df['RSI Over Short'] = ((df['4h_rsi'] > 70) & (df['1d_rsi'] > 70)).astype(int)
df['RSI Over Long'] = ((df['1w_rsi'] > 70) & (df['1m_rsi'] > 70)).astype(int)
df['RSI Over All'] = (df['RSI Over Short'] & df['RSI Over Long']).astype(int)
df['RSI Under Short'] = ((df['4h_rsi'] < 30) & (df['1d_rsi'] < 30)).astype(int)
df['RSI Under Long'] = ((df['1w_rsi'] < 30) & (df['1m_rsi'] < 30)).astype(int)
df['RSI Under All'] = (df['RSI Under Short'] & df['RSI Under Long']).astype(int)

df['StochRSI Over Short'] = ((df['4h_stochrsi'] > 70) & (df['1d_stochrsi'] > 70)).astype(int)
df['StochRSI Over Long'] = ((df['1w_stochrsi'] > 70) & (df['1m_stochrsi'] > 70)).astype(int)
df['StochRSI Over All'] = (df['StochRSI Over Short'] & df['StochRSI Over Long']).astype(int)
df['StochRSI Under Short'] = ((df['4h_stochrsi'] < 30) & (df['1d_stochrsi'] < 30)).astype(int)
df['StochRSI Under Long'] = ((df['1w_stochrsi'] < 30) & (df['1m_stochrsi'] < 30)).astype(int)
df['StochRSI Under All'] = (df['StochRSI Under Short'] & df['StochRSI Under Long']).astype(int)

df['Stoch Over Short'] = ((df['4h_stoch'] > 80) & (df['1d_stoch'] > 80)).astype(int)
df['Stoch Over Long'] = ((df['1w_stoch'] > 80) & (df['1m_stoch'] > 80)).astype(int)
df['Stoch Over All'] = (df['Stoch Over Short'] & df['Stoch Over Long']).astype(int)
df['Stoch Under Short'] = ((df['4h_stoch'] < 20) & (df['1d_stoch'] < 20)).astype(int)
df['Stoch Under Long'] = ((df['1w_stoch'] < 20) & (df['1m_stoch'] < 20)).astype(int)
df['Stoch Under All'] = (df['Stoch Under Short'] & df['Stoch Under Long']).astype(int)

df['Ult Over Short'] = ((df['4h_ult'] > 70) & (df['1d_ult'] > 70)).astype(int)
df['Ult Over Long'] = ((df['1w_ult'] > 70) & (df['1m_ult'] > 70)).astype(int)
df['Ult Over All'] = (df['Ult Over Short'] & df['Ult Over Long']).astype(int)
df['Ult Under Short'] = ((df['4h_ult'] < 30) & (df['1d_ult'] < 30)).astype(int)
df['Ult Under Long'] = ((df['1w_ult'] < 30) & (df['1m_ult'] < 30)).astype(int)
df['Ult Under All'] = (df['Ult Under Short'] & df['Ult Under Long']).astype(int)

df.head(1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is tryin

Unnamed: 0,ticker,price,time,meta_signal,meta_previous,4h,1d,1w,1m,4hp,...,Stoch Over All,Stoch Under Short,Stoch Under Long,Stoch Under All,Ult Over Short,Ult Over Long,Ult Over All,Ult Under Short,Ult Under Long,Ult Under All
0,AMEX-BTG,2.31,1534976694686,-1,0,-1,-1,-1,-1,0.0,...,0,0,1,0,0,0,0,0,0,0


In [419]:
df.columns

Index(['ticker', 'price', 'time', 'meta_signal', 'meta_previous', '4h', '1d',
       '1w', '1m', '4hp', '1dp', '1wp', '1mp', '4h_rsi', '4h_stochrsi',
       '4h_stoch', '4h_ult', '4h_macd', '4h_hull', '1d_rsi', '1d_stochrsi',
       '1d_stoch', '1d_ult', '1d_macd', '1d_hull', '1w_rsi', '1w_stochrsi',
       '1w_stoch', '1w_ult', '1w_macd', '1w_hull', '1m_rsi', '1m_stochrsi',
       '1m_stoch', '1m_ult', '1m_macd', '1m_hull', 'meta_move', 'Meta OK',
       'Meta Good', 'Meta ⬆️', 'Meta ⬆️⬆️', 'Meta Bad', 'Meta ⬇️', 'Meta ⬇️⬇️',
       'RSI Over Short', 'RSI Over Long', 'RSI Over All', 'RSI Under Short',
       'RSI Under Long', 'RSI Under All', 'StochRSI Over Short',
       'StochRSI Over Long', 'StochRSI Over All', 'StochRSI Under Short',
       'StochRSI Under Long', 'StochRSI Under All', 'Stoch Over Short',
       'Stoch Over Long', 'Stoch Over All', 'Stoch Under Short',
       'Stoch Under Long', 'Stoch Under All', 'Ult Over Short',
       'Ult Over Long', 'Ult Over All', 'Ult Under

In [418]:
# export new, cleaned db file
df.to_csv('./signals.csv', header=None, index=None)