In [134]:
#import dash_data
import pandas as pd
import cbpro
from datetime import datetime
import pickle


In [None]:
# We vectorize the text using a bag of words model
def get_vectorizer(ngram, max_features):
    return CountVectorizer(ngram_range=(1, ngram),
                             analyzer = "word",
                             tokenizer = None,
                             preprocessor = review_cleaner,
                             stop_words = None, 
                             max_features = max_features)

# Model training
def train_predict_sentiment(reviews, vectorizer, y=train["sentiment"], ngram=1, max_features=1000, model_random_state=0):
    '''
        This function will:
            1. split data into train and test set.
            2. get n-gram counts from cleaned reviews 
            3. train a random forest model using train n-gram counts and y (labels)
            4. test the model on your test split
            5. print accuracy of sentiment prediction on test and training data
            6. print confusion matrix on test data results

            To change n-gram type, set value of ngram argument
            To change the number of features you want the countvectorizer to generate, set the value of max_features argument
            
            @cleaned_review (type:str) is preprocessed string from review_cleaner()
            @return none
    '''

    print("Creating the model!\n")
    
    # train / test split
    X_train, X_test, y_train, y_test = train_test_split(reviews, y, random_state=0, test_size=.2)

    # Then we use fit_transform() to fit the model / learn the vocabulary,
    # then transform the data into feature vectors.
    # The input should be a list of strings. .toarray() converts to a numpy array
    
    train_bag = vectorizer.fit_transform(X_train)
    if not isinstance(train_bag, np.ndarray):
        train_bag = train_bag.toarray()
    test_bag = vectorizer.transform(X_test)
    if not isinstance(test_bag, np.ndarray):
        test_bag = test_bag.toarray()

    print("Training the random forest classifier!\n")
    # Initialize a Random Forest classifier with 50 trees
    forest = RandomForestClassifier(n_estimators = 50, random_state = model_random_state) 

    # Fit the forest to the training set, using the bag of words as 
    # features and the sentiment labels as the target variable
    forest = forest.fit(train_bag, y_train)

    # predict
    train_predictions = forest.predict(train_bag)
    test_predictions = forest.predict(test_bag)
    
    # validation
    train_acc = metrics.accuracy_score(y_train, train_predictions)
    valid_acc = metrics.accuracy_score(y_test, test_predictions)
    
    print(" The training accuracy is: ", train_acc, "\n", "The validation accuracy is: ", valid_acc)
    print()
    print('CONFUSION MATRIX:')
    print('         Predicted')
    print('          neg pos')
    print(' Actual')
    c=confusion_matrix(y_test, test_predictions)
    print('     neg  ',c[0])
    print('     pos  ',c[1])

    return forest

# Print out the top features
def top_features(forest, vectorizer, n):
    #Extract feature importance
    print('\nTOP TEN IMPORTANT FEATURES:')
    feature_text = vectorizer.get_feature_names().copy()
    feature_importance = forest.feature_importances_.copy()
    
    indices = np.argsort(feature_importance)[::-1]
    
    top_n_ind = indices[:n]
    top_n = list([vectorizer.get_feature_names()[ind] for ind in top_n_ind])
    
    return top_n

# Print out whether the prediction is accurate
def check_prediction(model, vectorizer, review, expected):
    prediction = model.predict(vectorizer.transform([review]))[0]
    sentiment = "👍" if prediction else "👎"
    correct = "\x1b[92mcorrect\x1b[0m" if prediction == expected else "\x1b[31mincorrect\x1b[0m"
    print("{} ⟶ {} {}".format(review, sentiment, correct))

In [131]:
auth_client = cbpro.AuthenticatedClient(key, b64secret, passphrase,
                                  api_url="https://api-public.sandbox.pro.coinbase.com")

NameError: name 'key' is not defined

In [132]:
from collections import deque
i = deque(maxlen=20)
i.append(1)

In [166]:
data_times_prices = pickle.load(open("data_times_prices.p", "rb"))


In [179]:
all = pickle.load(open("data_all.p", "rb"))
len(all)

742

In [181]:
for i in range(740): 
    all.pop()

In [183]:
all

deque([{'type': 'ticker',
        'sequence': 24116767881,
        'product_id': 'BTC-USD',
        'price': '50797.6',
        'open_24h': '52903',
        'volume_24h': '44451.60924808',
        'low_24h': '47464.65',
        'high_24h': '52915.66',
        'volume_30d': '476191.37204228',
        'best_bid': '50797.60',
        'best_ask': '50797.61',
        'side': 'sell',
        'time': '2021-04-23T19:52:25.996611Z',
        'trade_id': 160454588,
        'last_size': '0.01883569'},
       {'type': 'ticker',
        'sequence': 24116767964,
        'product_id': 'BTC-USD',
        'price': '50797.61',
        'open_24h': '52903',
        'volume_24h': '44451.62855228',
        'low_24h': '47464.65',
        'high_24h': '52915.66',
        'volume_30d': '476191.39134648',
        'best_bid': '50797.60',
        'best_ask': '50797.61',
        'side': 'buy',
        'time': '2021-04-23T19:52:26.600817Z',
        'trade_id': 160454589,
        'last_size': '0.0193042'}])

In [193]:
def tfi(dall):
    tfi=0
    for el in dall:
        if el['side']=='buy':
            tfi+=float(el['last_size'])
        else:
            tfi-=float(el['last_size'])
    #res=sum(elem == item for elem in dq)
    return(tfi)

In [194]:
tfi(all)

0.00046851000000000184

In [195]:
-0.01883569+0.0193042

0.00046851000000000184

In [None]:
np.where(pre_sample_df['side'] == 'buy', pre_sample_df['last_size'], -pre_sample_df['last_size'])

In [169]:
all[0]

{'type': 'ticker',
 'sequence': 24116208859,
 'product_id': 'BTC-USD',
 'price': '50713.11',
 'open_24h': '52903',
 'volume_24h': '43978.48711591',
 'low_24h': '47464.65',
 'high_24h': '52915.66',
 'volume_30d': '475718.24991011',
 'best_bid': '50708.30',
 'best_ask': '50713.11',
 'side': 'buy',
 'time': '2021-04-23T19:29:40.762711Z',
 'trade_id': 160444492,
 'last_size': '0.00188742'}

In [160]:
sides = pickle.load(open("data_sides.p", "rb"))


In [161]:
sides.count_values()

AttributeError: 'collections.deque' object has no attribute 'count_values'

In [163]:
sides[-10]

'sell'

In [164]:
def count(dq, item):
    return sum(elem == item for elem in dq)

In [165]:
count(sides, 'buy' )

4

In [151]:
len(filter(lambda : x == 1, sides))

TypeError: object of type 'filter' has no len()

In [142]:
0.9*float(max(data_times_prices[1]))

45000.0

In [13]:
df=pd.read_csv('data_03_13_dash.csv')
df['time'][i*1000]

1    2021-03-13T00:00:00.594420Z
1    2021-03-13T00:00:00.594420Z
1    2021-03-13T00:00:00.594420Z
1    2021-03-13T00:00:00.594420Z
1    2021-03-13T00:00:00.594420Z
1    2021-03-13T00:00:00.594420Z
1    2021-03-13T00:00:00.594420Z
1    2021-03-13T00:00:00.594420Z
1    2021-03-13T00:00:00.594420Z
1    2021-03-13T00:00:00.594420Z
1    2021-03-13T00:00:00.594420Z
1    2021-03-13T00:00:00.594420Z
1    2021-03-13T00:00:00.594420Z
1    2021-03-13T00:00:00.594420Z
1    2021-03-13T00:00:00.594420Z
1    2021-03-13T00:00:00.594420Z
1    2021-03-13T00:00:00.594420Z
1    2021-03-13T00:00:00.594420Z
1    2021-03-13T00:00:00.594420Z
1    2021-03-13T00:00:00.594420Z
Name: time, dtype: object

In [124]:
class websocket_trades(cbpro.WebsocketClient):
    def on_open(self):
        print("-- wassup --")
        self.url = "wss://ws-feed.pro.coinbase.com/"
        #self.firebase= firebase.FirebaseApplication(
        self.products = ["BTC-USD"]
        self.message_count = 0
        self.channels=['ticker']
        self.prices=deque(maxlen=1000)
        self.times=deque(maxlen=1000)

    def on_message(self, msg):
        print(msg)
        print(type(msg))
        

        if msg['type']=='ticker':
            self.prices.append(msg['price'])
            self.times.append(
                datetime.strptime(msg['time'], '%Y-%m-%dT%H:%M:%S.%fZ')
                )
          #self.message_count += 1
          #today = datetime.now(timezone.utc).date()
          #postt_string='/websocket_trades_v1_'+str(t
          #self.firebase.post(postt_string, msg)
          
        print(msg)

{'type': 'ticker', 'sequence': 24090434704, 'product_id': 'BTC-USD', 'price': '48462.66', 'open_24h': '54158.9', 'volume_24h': '39159.26612537', 'low_24h': '48381.05', 'high_24h': '55469.98', 'volume_30d': '457961.98409019', 'best_bid': '48462.65', 'best_ask': '48462.66', 'side': 'buy', 'time': '2021-04-23T07:33:16.751360Z', 'trade_id': 160057924, 'last_size': '0.00227632'}
{'type': 'ticker', 'sequence': 24090434704, 'product_id': 'BTC-USD', 'price': '48462.66', 'open_24h': '54158.9', 'volume_24h': '39159.26612537', 'low_24h': '48381.05', 'high_24h': '55469.98', 'volume_30d': '457961.98409019', 'best_bid': '48462.65', 'best_ask': '48462.66', 'side': 'buy', 'time': '2021-04-23T07:33:16.751360Z', 'trade_id': 160057924, 'last_size': '0.00227632'}
{'type': 'ticker', 'sequence': 24090435067, 'product_id': 'BTC-USD', 'price': '48462.66', 'open_24h': '54158.9', 'volume_24h': '39159.26704898', 'low_24h': '48381.05', 'high_24h': '55469.98', 'volume_30d': '457961.98501380', 'best_bid': '48462.65

In [125]:
wsClientTrades = websocket_trades()

{'type': 'ticker', 'sequence': 24090435795, 'product_id': 'BTC-USD', 'price': '48466.36', 'open_24h': '54158.9', 'volume_24h': '39159.34290904', 'low_24h': '48381.05', 'high_24h': '55469.98', 'volume_30d': '457962.06087386', 'best_bid': '48466.35', 'best_ask': '48466.36', 'side': 'buy', 'time': '2021-04-23T07:33:18.246959Z', 'trade_id': 160057934, 'last_size': '0.00950995'}
{'type': 'ticker', 'sequence': 24090435795, 'product_id': 'BTC-USD', 'price': '48466.36', 'open_24h': '54158.9', 'volume_24h': '39159.34290904', 'low_24h': '48381.05', 'high_24h': '55469.98', 'volume_30d': '457962.06087386', 'best_bid': '48466.35', 'best_ask': '48466.36', 'side': 'buy', 'time': '2021-04-23T07:33:18.246959Z', 'trade_id': 160057934, 'last_size': '0.00950995'}
{'type': 'ticker', 'sequence': 24090435804, 'product_id': 'BTC-USD', 'price': '48466.36', 'open_24h': '54158.9', 'volume_24h': '39159.34409089', 'low_24h': '48381.05', 'high_24h': '55469.98', 'volume_30d': '457962.06205571', 'best_bid': '48466.35

In [126]:
wsClientTrades.start()

-- wassup --
{'type': 'subscriptions', 'channels': [{'name': 'ticker', 'product_ids': ['BTC-USD']}]}
<class 'dict'>
{'type': 'subscriptions', 'channels': [{'name': 'ticker', 'product_ids': ['BTC-USD']}]}
{'type': 'ticker', 'sequence': 24090437528, 'product_id': 'BTC-USD', 'price': '48470.06', 'open_24h': '54158.9', 'volume_24h': '39101.23060747', 'low_24h': '48381.05', 'high_24h': '55469.98', 'volume_30d': '457903.94857229', 'best_bid': '48470.05', 'best_ask': '48470.06', 'side': 'buy', 'time': '2021-04-23T07:33:20.683395Z', 'trade_id': 160057958, 'last_size': '0.08104227'}
<class 'dict'>
{'type': 'ticker', 'sequence': 24090437528, 'product_id': 'BTC-USD', 'price': '48470.06', 'open_24h': '54158.9', 'volume_24h': '39101.23060747', 'low_24h': '48381.05', 'high_24h': '55469.98', 'volume_30d': '457903.94857229', 'best_bid': '48470.05', 'best_ask': '48470.06', 'side': 'buy', 'time': '2021-04-23T07:33:20.683395Z', 'trade_id': 160057958, 'last_size': '0.08104227'}
{'type': 'ticker', 'sequenc

In [129]:
wsClientTrades.prices

deque(['48470.06',
       '48470.06',
       '48470.06',
       '48470.06',
       '48470.06',
       '48470.05',
       '48470.05',
       '48470.04',
       '48470.04',
       '48463.32',
       '48463.32',
       '48463.32',
       '48470.05',
       '48463.11',
       '48463.03',
       '48469.23',
       '48463.12',
       '48463.03',
       '48463.03',
       '48462.66',
       '48464.27',
       '48462.65',
       '48462.65',
       '48456.59',
       '48461.65',
       '48461.65',
       '48461.65',
       '48461.65',
       '48461.65',
       '48461.65',
       '48461.65',
       '48461.65',
       '48461.65',
       '48461.65',
       '48461.65',
       '48461.65',
       '48461.65',
       '48461.65',
       '48461.65',
       '48461.65',
       '48461.65',
       '48461.65',
       '48461.64',
       '48461.64',
       '48461.65',
       '48461.64',
       '48455',
       '48453.11',
       '48453.11',
       '48453.11',
       '48451.84',
       '48451.51',
       '48450',

In [121]:
wsClientTrades.prices

{'type': 'ticker', 'sequence': 24090365938, 'product_id': 'BTC-USD', 'price': '48500', 'open_24h': '54158.9', 'volume_24h': '39095.12821365', 'low_24h': '48381.05', 'high_24h': '55469.98', 'volume_30d': '457897.84617847', 'best_bid': '48499.99', 'best_ask': '48500.00', 'side': 'buy', 'time': '2021-04-23T07:31:51.528005Z', 'trade_id': 160056966, 'last_size': '1.72'}
{'type': 'ticker', 'sequence': 24090365938, 'product_id': 'BTC-USD', 'price': '48500', 'open_24h': '54158.9', 'volume_24h': '39095.12821365', 'low_24h': '48381.05', 'high_24h': '55469.98', 'volume_30d': '457897.84617847', 'best_bid': '48499.99', 'best_ask': '48500.00', 'side': 'buy', 'time': '2021-04-23T07:31:51.528005Z', 'trade_id': 160056966, 'last_size': '1.72'}
{'type': 'ticker', 'sequence': 24090365938, 'product_id': 'BTC-USD', 'price': '48500', 'open_24h': '54158.9', 'volume_24h': '39110.88016391', 'low_24h': '48381.05', 'high_24h': '55469.98', 'volume_30d': '457913.59812873', 'best_bid': '48499.99', 'best_ask': '48500

deque(['48494.97',
       '48490.27',
       '48494.97',
       '48490.05',
       '48490.05',
       '48490.05',
       '48495',
       '48499.96',
       '48499.96',
       '48499.96',
       '48500',
       '48500',
       '48500',
       '48500',
       '48499.91',
       '48500',
       '48500',
       '48500',
       '48500',
       '48500',
       '48500',
       '48500',
       '48500',
       '48500',
       '48509.21',
       '48517.89',
       '48528.7',
       '48538.8',
       '48538.8',
       '48538.79',
       '48516.6',
       '48509.72',
       '48509.24',
       '48509.21',
       '48509.2',
       '48504.84',
       '48504.68',
       '48502.8',
       '48500.52',
       '48517.13',
       '48511.75',
       '48509.21',
       '48506.05',
       '48504.6',
       '48504.61',
       '48503.64',
       '48500',
       '48500',
       '48500',
       '48505.7',
       '48505.69',
       '48502.42'])

In [66]:
wsClientTrades.all[0]['price']

'49200'

In [84]:
len(wsClientTrades.all)


1000

In [86]:
wsClientTrades.close()

{'type': 'ticker', 'sequence': 24090124754, 'product_id': 'BTC-USD', 'price': '49076.85', 'open_24h': '54158.9', 'volume_24h': '38971.32809981', 'low_24h': '48381.05', 'high_24h': '55469.98', 'volume_30d': '457774.04606463', 'best_bid': '49076.84', 'best_ask': '49093.31', 'side': 'buy', 'time': '2021-04-23T07:26:27.933008Z', 'trade_id': 160054480, 'last_size': '0.0000275'}
{'type': 'ticker', 'sequence': 24090124754, 'product_id': 'BTC-USD', 'price': '49076.85', 'open_24h': '54158.9', 'volume_24h': '38971.32809981', 'low_24h': '48381.05', 'high_24h': '55469.98', 'volume_30d': '457774.04606463', 'best_bid': '49076.84', 'best_ask': '49093.31', 'side': 'buy', 'time': '2021-04-23T07:26:27.933008Z', 'trade_id': 160054480, 'last_size': '0.0000275'}
{'type': 'ticker', 'sequence': 24090124756, 'product_id': 'BTC-USD', 'price': '49093.31', 'open_24h': '54158.9', 'volume_24h': '38971.33003791', 'low_24h': '48381.05', 'high_24h': '55469.98', 'volume_30d': '457774.04800273', 'best_bid': '49076.84',

In [88]:
wsClientTrades.all[0]

{'type': 'ticker',
 'sequence': 24090004820,
 'product_id': 'BTC-USD',
 'price': '49168.4',
 'open_24h': '54158.9',
 'volume_24h': '38905.85159071',
 'low_24h': '48381.05',
 'high_24h': '55469.98',
 'volume_30d': '457708.56955553',
 'best_bid': '49168.40',
 'best_ask': '49182.17',
 'side': 'sell',
 'time': '2021-04-23T07:23:31.416220Z',
 'trade_id': 160053472,
 'last_size': '0.1322'}

In [83]:
for el in wsClientTrades.all:
    print(el['price'])

49234.64
49236.92
49238.14
49238.38
49238.62
49240.19
49240.26
49241.27
49242.36
49246.08
49250
49250.78
49251.81
49252.64
49252.64
49254.35
49262.18
49263.08
49263.5
49264.19
49264.19
49204.14
49209.75
49209.74
49207.24
49204.21
49204.19
49209.75
49209.75
49228.94
49229.95
49229.95
49229.95
49229.95
49229.95
49229.95
49229.94
49229.94
49225.35
49225.35
49225.35
49225.34
49207.78
49207.78
49207.19
49207.78
49207.78
49207.78
49205.71
49205.71
49202.7
49204.58
49204.58
49202.77
49202.73
49204.59
49204.59
49204.59
49204.59
49204.59
49204.59
49204.59
49204.59
49204.59
49204.58
49204.58
49204.58
49204.57
49202.69
49179.68
49179.68
49179.68
49179.68
49179.69
49179.69
49178.79
49172.71
49176.1
49176.1
49176.06
49176.03
49173.76
49173.78
49173.78
49173.78
49180.28
49179.64
49181.51
49181.51
49181.51
49184.15
49184.15
49184.29
49200.8
49206.79
49207.39
49200
49208.41
49208.41
49208.41
49210.38
49211.01
49210.81
49214.37
49214.36
49214.37
49214.1
49210.08
49210.08
49210.08
49216.22
49216.02
4921

RuntimeError: deque mutated during iteration

In [74]:
datetime.strptime(wsClientTrades.all[0]['time'], '%Y-%m-%dT%H:%M:%S.%fZ')
print(datetime.strptime(wsClientTrades.all[0]['time'], '%Y-%m-%dT%H:%M:%S.%Z'))

{'type': 'ticker', 'sequence': 24089797546, 'product_id': 'BTC-USD', 'price': '49165.6', 'open_24h': '54158.9', 'volume_24h': '38798.73716123', 'low_24h': '48381.05', 'high_24h': '55469.98', 'volume_30d': '457601.45512605', 'best_bid': '49165.60', 'best_ask': '49168.84', 'side': 'sell', 'time': '2021-04-23T07:17:59.150024Z', 'trade_id': 160051615, 'last_size': '0.01392919'}
{'type': 'ticker', 'sequence': 24089797546, 'product_id': 'BTC-USD', 'price': '49165.6', 'open_24h': '54158.9', 'volume_24h': '38798.73716123', 'low_24h': '48381.05', 'high_24h': '55469.98', 'volume_30d': '457601.45512605', 'best_bid': '49165.60', 'best_ask': '49168.84', 'side': 'sell', 'time': '2021-04-23T07:17:59.150024Z', 'trade_id': 160051615, 'last_size': '0.01392919'}
{'type': 'ticker', 'sequence': 24089797546, 'product_id': 'BTC-USD', 'price': '49165.6', 'open_24h': '54158.9', 'volume_24h': '38799.75829611', 'low_24h': '48381.05', 'high_24h': '55469.98', 'volume_30d': '457602.47626093', 'best_bid': '49165.60'

ValueError: time data '2021-04-23T07:15:31.497929Z' does not match format '%Y-%m-%dT%H:%M:%S.%Z'

In [27]:
wsClientTrades.close()

{'type': 'ticker', 'sequence': 24089390859, 'product_id': 'BTC-USD', 'price': '49008.78', 'open_24h': '54158.9', 'volume_24h': '38604.13338407', 'low_24h': '48381.05', 'high_24h': '55469.98', 'volume_30d': '457406.85134889', 'best_bid': '49008.77', 'best_ask': '49008.78', 'side': 'buy', 'time': '2021-04-23T07:07:32.213362Z', 'trade_id': 160047875, 'last_size': '0.00981361'}
{'type': 'ticker', 'sequence': 24089390859, 'product_id': 'BTC-USD', 'price': '49008.78', 'open_24h': '54158.9', 'volume_24h': '38604.13338407', 'low_24h': '48381.05', 'high_24h': '55469.98', 'volume_30d': '457406.85134889', 'best_bid': '49008.77', 'best_ask': '49008.78', 'side': 'buy', 'time': '2021-04-23T07:07:32.213362Z', 'trade_id': 160047875, 'last_size': '0.00981361'}
{'type': 'ticker', 'sequence': 24089390859, 'product_id': 'BTC-USD', 'price': '49008.78', 'open_24h': '54158.9', 'volume_24h': '38600.39370526', 'low_24h': '48381.05', 'high_24h': '55469.98', 'volume_30d': '457403.11167008', 'best_bid': '49008.77