In [158]:
import pandas as pd
import os
import matplotlib as plt

In [159]:
files = os.listdir('./')
csv_files = [f for f in files if "0.csv" in f]
csv_files

['signals.2018-08-18-092222.0.csv',
 'signals.2018-08-17-222339.0.csv',
 'signals.2018-08-18-063826.0.csv']

In [160]:
frame = pd.DataFrame()
list_ = []
for file_ in csv_files:
    df = pd.read_csv(file_,index_col=None, header=None)
    list_.append(df)
frame = pd.concat(list_)
frame.columns = ["ticker", "price", "time", 
                "meta_signal", "meta_previous",
                "4h","1d","1w","1m",
                "4hp","1dp","1wp","1mp"]
frame.sort_values(['ticker', 'time'], inplace=True)
frame.head(5)

Unnamed: 0,ticker,price,time,meta_signal,meta_previous,4h,1d,1w,1m,4hp,1dp,1wp,1mp
17,AMEX-BTG,2.16,1534571497681,-1,0,-1,-1,-1,-1,0,0,0,0
17,AMEX-BTG,2.16,1534604009032,-1,0,-1,-1,-1,-1,0,0,0,0
29,AMEX-COPX,20.96,1534571773235,-1,0,-1,-1,-1,-1,0,0,0,0
29,AMEX-COPX,20.96,1534604312033,-1,0,-1,-1,-1,-1,0,0,0,0
35,AMEX-DGRO,36.73,1534571911008,2,0,1,1,2,2,0,0,0,0


In [161]:
frame.shape

(309, 13)

In [162]:
# split ticker into market-name
frame['market'], frame['name'] = frame.ticker.str.split('-', 1).str
frame.head(2)

Unnamed: 0,ticker,price,time,meta_signal,meta_previous,4h,1d,1w,1m,4hp,1dp,1wp,1mp,market,name
17,AMEX-BTG,2.16,1534571497681,-1,0,-1,-1,-1,-1,0,0,0,0,AMEX,BTG
17,AMEX-BTG,2.16,1534604009032,-1,0,-1,-1,-1,-1,0,0,0,0,AMEX,BTG


In [163]:
frame.sort_values(['name', 'time'], inplace=True)
frame.head(5)

Unnamed: 0,ticker,price,time,meta_signal,meta_previous,4h,1d,1w,1m,4hp,1dp,1wp,1mp,market,name
0,NASDAQ-AAPL,217.58,1534574531372,2,0,1,2,2,1,0,0,0,0,NASDAQ,AAPL
0,NASDAQ-AAPL,217.58,1534603573796,2,0,1,2,2,1,0,0,0,0,NASDAQ,AAPL
0,NASDAQ-AAPL,217.58,1534609342848,2,0,1,2,2,1,0,0,0,0,NASDAQ,AAPL
1,NYSE-ABBV,98.81,1534571111519,1,0,1,2,1,1,0,0,0,0,NYSE,ABBV
1,NYSE-ABBV,98.81,1534603601535,1,0,1,2,1,1,0,0,0,0,NYSE,ABBV


In [164]:
# failures to get price (numeric conversion)
frame[frame.price.isnull()]

Unnamed: 0,ticker,price,time,meta_signal,meta_previous,4h,1d,1w,1m,4hp,1dp,1wp,1mp,market,name
45,NYSE-EXG,,1534604715456,2,0,2,1,2,1,1,0,0,0,NYSE,EXG
146,NASDAQ-VYMI,,1534607161567,-1,0,-1,-2,-1,-1,-2,0,0,0,NASDAQ,VYMI


In [165]:
frame = frame.dropna()
frame.shape

(307, 15)

In [166]:
# more failures to get price
frame[frame.price == 0]

Unnamed: 0,ticker,price,time,meta_signal,meta_previous,4h,1d,1w,1m,4hp,1dp,1wp,1mp,market,name
57,NASDAQ-GERN,0.0,1534605014514,1,0,1,1,1,1,0,0,0,0,NASDAQ,GERN
63,AMEX-GTE,0.0,1534605165732,0,0,-1,-1,0,1,0,0,0,0,AMEX,GTE


In [167]:
frame = frame[frame.price!=0]
frame.shape

(305, 15)

In [168]:
abbv = frame[frame.ticker.astype(str).str.match("NYSE-ABBV")]
abbv

Unnamed: 0,ticker,price,time,meta_signal,meta_previous,4h,1d,1w,1m,4hp,1dp,1wp,1mp,market,name
1,NYSE-ABBV,98.81,1534571111519,1,0,1,2,1,1,0,0,0,0,NYSE,ABBV
1,NYSE-ABBV,98.81,1534603601535,1,0,1,2,1,1,0,0,0,0,NYSE,ABBV


In [169]:
# only get records where something changed (price, or indicators)
df = frame.drop_duplicates(["ticker", "price", 
                "meta_signal", "meta_previous",
                "4h","1d","1w","1m",
                "4hp","1dp","1wp","1mp"])
df[df.ticker.astype(str).str.match("NYSE-ABBV")]

Unnamed: 0,ticker,price,time,meta_signal,meta_previous,4h,1d,1w,1m,4hp,1dp,1wp,1mp,market,name
1,NYSE-ABBV,98.81,1534571111519,1,0,1,2,1,1,0,0,0,0,NYSE,ABBV


In [170]:
# test that we have expected number of unique tickers (at least one entry for each)
# currently, we have 155 tickers
print("unique tickers, "+df.ticker.nunique())
print("unique tickers, "+df.ticker.nunique())

155

In [171]:
# add meta movement indicator
df['meta_move'] = df['meta_signal'].values - df['meta_previous'].values
df.head(3)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


Unnamed: 0,ticker,price,time,meta_signal,meta_previous,4h,1d,1w,1m,4hp,1dp,1wp,1mp,market,name,meta_move
0,NASDAQ-AAPL,217.58,1534574531372,2,0,1,2,2,1,0,0,0,0,NASDAQ,AAPL,2
1,NYSE-ABBV,98.81,1534571111519,1,0,1,2,1,1,0,0,0,0,NYSE,ABBV,1
2,NYSE-AIT,76.4,1534571133163,1,0,1,1,2,1,0,0,0,0,NYSE,AIT,1


In [172]:
# round to 3 decimals (python tweaks the numbers)
df.price = df.price.round(3)
df[df.name=='HMNY']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[name] = value


Unnamed: 0,ticker,price,time,meta_signal,meta_previous,4h,1d,1w,1m,4hp,1dp,1wp,1mp,market,name,meta_move
68,NASDAQ-HMNY,0.029,1534572683953,-1,0,-1,-1,-1,-2,0,0,0,0,NASDAQ,HMNY,-1


In [173]:
# export new, cleaned db file
df.to_csv('./signals.csv', header=None, index=None)