In [1]:
import time
import datetime as dt
import json
from kafka import KafkaConsumer
import numpy as np
import pandas as pd

#river useful packages 
from river.linear_model import LinearRegression
from river import optim
from river.optim import Adam
from river.neural_net import MLPRegressor, activations
from river import metrics
from river.optim import losses
from river import preprocessing, evaluate, neighbors, tree

# We will use the linear regression, MLP and adaptative random forest 
linear = (preprocessing.StandardScaler() |
              LinearRegression(optimizer=optim.SGD(1e-4)))

MLP = (
     preprocessing.StandardScaler() |
         MLPRegressor(
             hidden_dims=(10,),
             activations=(
                 activations.ReLU(),
                 activations.ReLU(),
                 activations.ReLU()
             ),
             optimizer=optim.SGD(1e-4),
             seed=42
         )
     )

tr = (preprocessing.StandardScaler() |
      tree.HoeffdingTreeRegressor(
         grace_period=100,
          max_depth=30,
         leaf_prediction='adaptive',
         model_selector_decay=0.9
    )
)

In [None]:
topic_name = "depth20-btc"
consumer = KafkaConsumer(topic_name, bootstrap_servers="localhost:9092")

price_norm_factor = 45000.0
qtity_norm_factor = 5.0

metric = {'linear':metrics.MSE(), 'MLP':metrics.MSE(),
                 "tr":metrics.MSE()}
data = pd.DataFrame(columns=[str(i) for i in range(79)])  # That's the data we use to predict Y
Y = [] # the output we retrieved
k=0
try:
    for message in consumer:
        print(message)
        json_obj = json.loads(message.value)
        #timestamp = json_obj["time"]
        bids = json_obj["bids"]
        asks = json_obj["asks"]
        l = [(float(ask[0]) - price_norm_factor)/price_norm_factor for ask in asks] + \
                [(float(ask[1])-qtity_norm_factor)/qtity_norm_factor for ask in asks] + \
                [(float(bid[0])-price_norm_factor)/price_norm_factor for bid in bids] + \
                [(float(bid[1])-qtity_norm_factor)/qtity_norm_factor for bid in bids]
    
        y = l.pop(0) # minnimum selligng price
        X = {str(i):l[i] for i in range(len(l))}
        
        """
        y = tensor.pop(40) # maximum buying price
        We can use the maximum buying price as our input in the model 
        Tensor ready to use input, normalized and we can use instead the StandardScaler inside a 
        pipeline to normalize the data.
        """
    
        #Make online prediction with streaming model
        y_linear = linear.predict_one(X)
        metric["linear"] = metric["linear"].update(y, y_linear)
        linear = linear.learn_one(X, y)
    
        y_MLP = MLP.predict_one(X)
        metric["MLP"] = metric["MLP"].update(y, y_MLP)
        MLP = MLP.learn_one(X, y)
    
        y_tr = tr.predict_one(X)
        metric["tr"] = metric["tr"].update(y, y_tr)
        tr = tr.learn_one(X, y)
        "------------------------------------------"
        data = data.append(X, ignore_index = True)
        Y.append(y)
        k+=1
        if k>2000:
            print(f"We have passed throught {k} examples")
            break
except KeyboardInterrupt as e:
    print("The user has stopped the process")

In [6]:
print("The final metric of the linear model is :",metric["linear"].get())
print("The final metric of the tree model is :",metric["tr"].get())
print("The final metric of the MLP model is :",metric["MLP"].get())

The final metric of the linear model is : 0.0
The final metric of the tree model is : 0.0
The final metric of the MLP model is : 0.0


In [408]:
# Here we try to use the batch algorithms on the same dataset "data"
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error

In [402]:
linear_sklearn = LinearRegression()
rf_sklearn = RandomForestRegressor(n_estimators=100)
MLP_sklearn = MLPRegressor()

In [407]:
linear_sklearn.fit(data, Y)
rf_sklearn.fit(data, Y)
MLP_sklearn.fit(data, Y)

MLPRegressor()

In [409]:
y_pred = linear_sklearn.predict(data)
mse_linear = mean_squared_error(Y, y_pred)
y_pred = rf_sklearn.predict(data)
mse_rf = mean_squared_error(Y, y_pred)
y_pred = MLP_sklearn.predict(data)
mse_mlp = mean_squared_error(Y, y_pred)
"------------------------------------------"
print("The final metric of the linear model is :",mse_linear)
print("The final metric of the random forest model is :",mse_rf)
print("The final metric of the MLP model is :",mse_mlp)

The final metric of the linear model is : 1.4456561996401147e-11
The final metric of the random forest model is : 2.3921493771601245e-12
The final metric of the MLP model is : 0.0005617775512273568
