In [None]:
from datetime import datetime, timedelta
import pandas as pd
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
%matplotlib inline
%config IPCompleter.greedy = True
%config InlineBackend.figure_format = 'retina'
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import seaborn as sns
import xgboost as xgb

sns.set(
    font="Franklin Gothic Book",
    rc={
        "axes.axisbelow": False,
        "axes.edgecolor": "lightgrey",
        "axes.facecolor": "None",
        "axes.grid": False,
        "axes.labelcolor": "dimgrey",
        "axes.spines.right": False,
        "axes.spines.top": False,
        "figure.facecolor": "white",
        "lines.solid_capstyle": "round",
        "patch.edgecolor": "w",
        "patch.force_edgecolor": True,
        "text.color": "dimgrey",
        "xtick.bottom": False,
        "xtick.color": "dimgrey",
        "xtick.direction": "out",
        "xtick.top": False,
        "ytick.color": "dimgrey",
        "ytick.direction": "out",
        "ytick.left": False,
        "ytick.right": False,
    },
)

sns.set_context(
    "notebook", rc={"font.size": 16, "axes.titlesize": 20, "axes.labelsize": 18}
)
import requests
from creds import api_key

import tensorflow as tf
import tensorflow_probability as tfp
from pandasgui import show

tf.distribute.OneDeviceStrategy(device="/gpu:0")

In [None]:
def compile_lc_data(num_days=180, read_csv=False, write_csv=False, coins="ETH"):
    file = Path.cwd() / "lunar_histdata.csv"
    if read_csv is True:
        df = pd.read_csv(file, index_col=0)
        return df
    intervals = ["1d", "1w", "1m", "3m", "6m", "1y", "2y"]
    finish = datetime.now()
    start = finish - timedelta(days=num_days)
    delta = timedelta(hours=720)
    df = pd.DataFrame()
    while finish > start:
        payload = {
            "key": api_key,
            "symbol": coins,
            "change": intervals,
            "data_points": "720",
            "start": datetime.timestamp(start),
        }

        r = requests.get("https://api.lunarcrush.com/v2?data=assets", params=payload)

        data = pd.DataFrame.from_dict(r.json()["data"][0])
        ts = data.timeSeries.to_dict()
        new = pd.DataFrame.from_dict(ts, orient="index")
        new.pop("asset_id")
        new.pop("search_average")
        new["time"] = pd.to_datetime(new["time"], unit="s")
        new.set_index("time", inplace=True)
        new.sort_index(ascending=True, inplace=True)
        new["month"] = [new.index[i].month for i in range(len(new))]
        new["day"] = [new.index[i].day for i in range(len(new))]
        new["hour"] = [new.index[i].hour for i in range(len(new))]
        new.fillna(new.mean(), inplace=True)

        df = df.append(new, ignore_index=False, verify_integrity=True)
        start = start + delta

    if write_csv is True:
        df.to_csv(file)

    return df


df = compile_lc_data(read_csv=True)
print("NaN Check ")
print(df.isna().sum().sum())
print(df.index.max())
print(df.index.min())
print(len(df))
df.tail()


In [None]:
train_ds = df.sample(frac=0.8, random_state=0)
test_ds = df.drop(train_ds.index)

train_features = train_ds.copy()
test_features = test_ds.copy()

train_label = train_features.pop("close")
test_label = test_features.pop("close")


normalizer = tf.keras.layers.Normalization(axis=-1)
normalizer.adapt(np.array(train_features).astype("float32"))

linear_model = tf.keras.models.Sequential([normalizer, tf.keras.layers.Dense(1)])
optimizer = tf.keras.optimizers.Adam()

linear_model.compile(optimizer=optimizer, loss="mse")


In [None]:
linear_model.layers[1].kernel

In [None]:
%%time
early_stopping = tf.keras.callbacks.EarlyStopping(
        monitor="val_loss", patience=4, mode="min"
    )
history = linear_model.fit(
    train_features,
    train_label,
    epochs=2000,
    validation_split=.2,
    callbacks=[early_stopping],
    
)

In [None]:
hist = pd.DataFrame(history.history)
hist["epoch"] = history.epoch
hist.tail()


In [None]:
test_results = {}

test_results["linear_baseline"] = linear_model.evaluate(
    test_features, test_label, verbose=0
)


In [None]:
def build_and_compile_model(norm):
    model = tf.keras.models.Sequential(
        [
            norm,
            tf.keras.layers.Dense(64, activation="relu"),
            tf.keras.layers.Dense(64, activation="relu"),
            tf.keras.layers.Dense(1),
        ]
    )
    model.compile(
        loss="mse", optimizer="adam", metrics=[tf.metrics.MeanAbsoluteError()]
    )
    return model


In [None]:
dnn_model = build_and_compile_model(normalizer)

dnn_model.summary()

In [None]:
%%time
history = dnn_model.fit(
    train_features,
    train_label,
    validation_split=0.2,
    epochs=2000, callbacks=[early_stopping])

In [None]:
hist = pd.DataFrame(history.history)
hist["epoch"] = history.epoch
hist.plot(x="epoch", y="loss")


In [None]:
test_results["Simple DNN"] = dnn_model.evaluate(test_features, test_label)

print(test_results)


In [None]:
test_predictions = {}
test_predictions["dnn_model"] = dnn_model.predict(test_features).flatten()


In [None]:
dtrain = xgb.DMatrix(train_features, label=train_label)
dtest = xgb.DMatrix(test_features, label=test_label)

param = {"max_depth": 50, "eta": 1, "objective": "reg:linear", "booster": "gbtree"}
evallist = [(dtest, "eval"), (dtrain, "train")]

num_round = 2
xg = xgb.train(param, dtrain, num_round, evallist)

In [None]:
test_results["xgb"] = xg.evaluate(test_features, test_label)

print(test_results)

In [None]:
test_predictions["xgb"] = xg.predict(test_features).flatten()

print(test_predictions)


In [None]:
# def conv_model(normalizer):
#     CONV_WIDTH = 3

#     model = tf.keras.models.Sequential(
#         [
#             normalizer,
#             # tf.keras.layers.Lambda(lambda x: x[:, -CONV_WIDTH:, :]),
#             # tf.keras.layers.Conv1D(
#             #     filters=64,
#             #     kernel_size=(CONV_WIDTH),
#             #     strides=1,
#             #     padding="causal",
#             #     # activation="relu",
#             # ),
#             tf.keras.layers.Bidirectional(
#                 tf.keras.layers.LSTM(128, return_sequences=False)
#             ),
#             # tf.keras.layers.Bidirectional(
#             #     tf.keras.layers.LSTM(64, return_sequences=True)),
#             tf.keras.layers.Dense(128),
#             tf.keras.layers.Dropout(0.2),
#             tf.keras.layers.Dense(1),
#         ]
#     )

#     model.compile(
#         loss="mse", optimizer="adam", metrics=[tf.metrics.MeanAbsoluteError()]
#     )

#     return model


In [None]:
# conv_model = conv_model(normalizer)

# conv_model.summary()


In [None]:
# %%time
# history = conv_model.fit(
#     train_features,
#     train_label,
#     validation_split=0.2,
#     epochs=2000, callbacks=[early_stopping])

In [None]:
# test_results["conv_model"] = dnn_model.evaluate(test_features, test_label)

# print(test_results)


In [None]:
# test_predictions["conv_model"] = dnn_model.predict(test_features).flatten()

# print(test_predictions)


In [None]:
def plot_result(labels, preds):
    plt.figure(figsize=(15, 10))
    plt.scatter(x=labels.index, y=labels, color="r", marker=".", label="real data")
    plt.scatter(x=labels.index, y=preds, color="b", marker="X", label="predictions")
    plt.xlabel("time")
    plt.ylabel("price")
    plt.title("Red is predictions, Blue is real data")
    plt.show()


In [None]:
plot_result(test_label, test_predictions["dnn_model"])

In [None]:
plot_result(test_label, test_predictions["conv_model"])
