In [10]:
from datetime import datetime, timedelta
import pandas as pd
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
%matplotlib inline
%config IPCompleter.greedy = True
%config InlineBackend.figure_format = 'retina'
import numpy as np
import matplotlib.pyplot as plt
import cbpro
from pathlib import Path
import seaborn as sns
import requests
from creds import api_key

import tensorflow as tf
import tensorflow_probability as tfp
from pandasgui import show

tf.distribute.OneDeviceStrategy(device="/gpu:0")
policy = tf.keras.mixed_precision.Policy("mixed_float16")
tf.keras.mixed_precision.experimental.set_policy(policy)

public_client = cbpro.PublicClient()

In [11]:
class MinerMeta(type):
    # def __init__(self):
    #     result = getattr(self, "df", None)
    #     if result is None:
    #         self.df = self.compile_historic(read_csv=True)

    def compile_historic(self, num_days=100, write_csv=False, read_csv=False):
        file = Path.cwd() / f"{self.coin}_histdata.csv"
        if read_csv is True:
            df = pd.read_csv(file, index_col="time", infer_datetime_format=True)
            return df
        else:
            finish = datetime.now()
            start = finish - timedelta(num_days)
            delta = timedelta(hours=300)
            df = pd.DataFrame()

            while finish > start:
                historic = public_client.get_product_historic_rates(
                    f"{self.coin}-USD",
                    granularity=3600,
                    start=start,
                    end=start + delta,
                )
                start += delta
                df = df.append(historic, ignore_index=True, verify_integrity=True)
            df.columns = ["time", "low", "high", "open", "close", "volume"]

            # timestamp_s = date_time.map(pd.Timestamp.timestamp)
            # day = 24 * 60 * 60
            # year = (365.2425) * day
            # df["Day sin"] = np.sin(timestamp_s * (2 * np.pi / day))
            # df["Day cos"] = np.cos(timestamp_s * (2 * np.pi / day))
            # df["Year sin"] = np.sin(timestamp_s * (2 * np.pi / year))
            # df["Year cos"] = np.cos(timestamp_s * (2 * np.pi / year))

            df["time"] = pd.to_datetime(df["time"], unit="s")
            df.set_index("time", inplace=True, verify_integrity=False)
            df.sort_index(ascending=False, inplace=True)

            if write_csv is True:
                df.to_csv(file, index=True)

            return df

    def get_day_stats(self):
        result = getattr(self, "day_stats", None)
        if result is None:
            ticker = public_client.get_product_24hr_stats(f"{self.coin}-USD")
            df = pd.DataFrame.from_dict(ticker, orient="index")
            self.day_stats = df
        return df

    def year_day_fft(self, col):
        df = self.compile_historic()
        today = datetime.today()
        ylim = int(df[col].max())
        plt.figure(figsize=(15, 10))

        fft = tf.signal.rfft(df[col])
        f_per_dataset = np.arange(0, len(fft))

        n_samples_h = 1
        hours_per_year = 24 * 365.2524
        hours_per_week = 24 * 7
        years_per_dataset = n_samples_h / (hours_per_year)
        hours_per_dataset = n_samples_h / (hours_per_week)

        f_per_year = f_per_dataset / years_per_dataset
        f_per_week = f_per_dataset / hours_per_dataset
        plt.step(f_per_week, np.abs(fft))
        plt.xscale("log")
        # plt.ylim(1000, ylim)
        # plt.xlim([0.1, max(plt.xlim())])
        plt.xticks([1, 7], labels=["1/Week", "1/day"])
        plt.xlabel("Frequency (log scale)")

        return plt.show()

    def ttsplit_norm(self, df, split_time=0.7, feature_plot=False):
        # train_df Test Split
        n = len(df)
        train_df = df[0 : int(n * 0.7)]
        val_df = df[int(n * 0.7) : int(n * 0.9)]
        test_df = df[int(n * 0.9) :]
        # Normalize the Data
        train_df_mean = train_df.mean()
        train_df_std = train_df.std()

        train_df = (train_df - train_df_mean) / train_df_std
        val_df = (val_df - train_df_mean) / train_df_std
        test_df = (test_df - train_df_mean) / train_df_std

        # Create Feature Plot if wanted
        if feature_plot is True:
            df_std = (df - train_df_mean) / train_df_std
            df_std = df_std.melt(var_name="Column", value_name="Normalized")
            plt.figure(figsize=(12, 6))
            ax = sns.violinplot(x="Column", y="Normalized", data=df_std)
            ax.set_xticklabels(df.keys(), rotation=90)
            ax.set_title("train_dfing Data Feature Dist with whole DF Mean")

            return train_df, val_df, test_df

        return train_df, val_df, test_df

    def __call__(self, *args, **kwargs):

        cls = type.__call__(self, *args)

        # setattr(cls, "compile_historic", self.compile_historic)
        # setattr(cls, "year_day_fft", self.year_day_fft)
        # setattr(cls, "ttsplit_norm", self.ttsplit_norm)
        # setattr(cls, "get_day_stats", self.get_day_stats)
        # setattr(cls, "day_stats", self.get_day_stats())

        # for key, value in historic.items():
        #     setattr(cls, "hist_" + key, value)
        # for key, value in ticker.items():
        #     setattr(cls, "tick_" + key, value)

        return cls


In [12]:
# # api_key = r"ru1zaf0ssaa29394mb4ahp"
# lc_file = Path.cwd() / "lc_data.csv"
# coins = "ETH"


# def lc_data(read_csv=False):
#     file = Path.cwd() / "lunardata.csv"
#     if read_csv is True:
#         data = pd.read_csv(file, index_col="id")
#         return data
#     else:
#         assets_url = (
#             b"https://api.lunarcrush.com/v2?data=assets&key="
#             + api_key
#             + "&symbol="
#             + coins
#             + +"&data_points=720"
#         )
#         assets = json.loads(urllib.request.urlopen(assets_url).read())
#         # config = pd.DataFrame.from_dict(assets["config"], orient="index")
#         # usage = pd.DataFrame.from_dict(assets["usage"], orient="index")
#         data = pd.DataFrame.from_dict(assets["data"])
#         return data


# lc = lc_data()

# lc.tail()


In [13]:
# lc_file = Path.cwd() / "lc_data.csv"
# coins = "ETH"
# intervals = ["1d", "1w", "1m", "3m", "6m", "1y", "2y"]
# now = datetime.now()
# delta = timedelta(30)
# start = now - delta

# payload = {
#     "key": api_key,
#     "symbol": coins,
#     "change": intervals,
#     "data_points": "720",
#     # "start": datetime.timestamp(start),
#     # "end": datetime.timestamp(start + timedelta(30)),
# }

# r = requests.get("https://api.lunarcrush.com/v2?data=assets", params=payload)

# data = pd.DataFrame.from_dict(r.json()["data"][0])

# ts = data.timeSeries.to_dict()
# new = pd.DataFrame.from_dict(ts, orient="index")
# new.pop("asset_id")
# new["time"] = pd.to_datetime(new["time"], unit="s")
# new.set_index("time", inplace=True)
# new.sort_index(ascending=False)
# print("Min", new.index.min())
# print("Max", new.index.max())
# print(new.index.max() - new.index.min())
# print(list(new.columns))
# new.tail()


In [None]:
class WindowGenerator:
    def __init__(
        self,
        input_width,
        label_width,
        shift,
        df=df,
        label_columns=None,
    ):
        # Store the raw data.
        train_df, val_df, test_df = eth.ttsplit_norm(df)

        self.df = df
        self.train_df = train_df
        self.val_df = val_df
        self.test_df = test_df

        # Work out the label column indices.
        self.label_columns = label_columns
        if label_columns is not None:
            self.label_columns_indices = {
                name: i for i, name in enumerate(label_columns)
            }
        self.column_indices = {name: i for i, name in enumerate(train_df.columns)}

        # Work out the window parameters.
        self.input_width = input_width
        self.label_width = label_width
        self.shift = shift

        self.total_window_size = input_width + shift

        self.input_slice = slice(0, input_width)
        self.input_indices = np.arange(self.total_window_size)[self.input_slice]

        self.label_start = self.total_window_size - self.label_width
        self.labels_slice = slice(self.label_start, None)
        self.label_indices = np.arange(self.total_window_size)[self.labels_slice]

    def split_window(self, features):
        inputs = features[:, self.input_slice, :]
        labels = features[:, self.labels_slice, :]
        if self.label_columns is not None:
            labels = tf.stack(
                [
                    labels[:, :, self.column_indices[name]]
                    for name in self.label_columns
                ],
                axis=-1,
            )

        # Slicing doesn't preserve static shape information, so set the shapes
        # manually. This way the `tf.data.Datasets` are easier to inspect.
        inputs.set_shape([None, self.input_width, None])
        labels.set_shape([None, self.label_width, None])

        return inputs, labels

    def make_ds(self, data):
        data = np.array(data, dtype=np.float32)
        ds = tf.keras.preprocessing.timeseries_dataset_from_array(
            data=data,
            targets=None,
            sequence_length=self.total_window_size,
            sequence_stride=1,
            shuffle=True,
            batch_size=32,
        )

        ds = ds.map(self.split_window)

        return ds

    def plot(self, model=None, plot_col="close", max_subplots=3):
        inputs, labels = self.example
        plt.figure(figsize=(15, 10))
        plot_col_index = self.column_indices[plot_col]
        max_n = min(max_subplots, len(inputs))
        for n in range(max_n):
            plt.subplot(max_n, 1, n + 1)
            plt.ylabel(f"{plot_col} [normed]")
            plt.plot(
                self.input_indices,
                inputs[n, :, plot_col_index],
                label="Inputs",
                marker=".",
                zorder=-10,
            )

            if self.label_columns:
                label_col_index = self.label_columns_indices.get(plot_col, None)
            else:
                label_col_index = plot_col_index

            if label_col_index is None:
                continue

            plt.scatter(
                self.label_indices,
                labels[n, :, label_col_index],
                # edgecolors="k",
                # label="Labels",
                # c="#2ca02c",
                s=64,
            )
            if model is not None:
                predictions = model(inputs)
                plt.scatter(
                    self.label_indices,
                    predictions[n, :, label_col_index],
                    marker="X",
                    # edgecolors="k",
                    # label="Predictions",
                    # c="#ff7f0e",
                    s=64,
                )

            if n == 0:
                plt.legend()

        plt.xlabel("Time [h]")

    @property
    def train(self):
        return self.make_ds(self.train_df)

    @property
    def val(self):
        return self.make_ds(self.val_df)

    @property
    def test(self):
        return self.make_ds(self.test_df)

    @property
    def example(self):
        """Get and cache an example batch of `inputs, labels` for plotting."""
        result = getattr(self, "_example", None)
        if result is None:
            # No example batch was found, so get one from the `.train` dataset
            result = next(iter(self.train))
            # And cache it for next time
            self._example = result
        return result

    def __repr__(self):
        return "\n".join(
            [
                f"Total window size: {self.total_window_size}",
                f"Input indices: {self.input_indices}",
                f"Label indices: {self.label_indices}",
                f"Label column name(s): {self.label_columns}",
            ]
        )


In [14]:
def compile_lc_data(coins="ETH"):
    intervals = ["1d", "1w", "1m", "3m", "6m", "1y", "2y"]
    now = datetime.now()
    # delta = timedelta(num_days_in_past)
    # ut = datetime.timestamp((now - delta))

    payload = {
        "key": api_key,
        "symbol": coins,
        "change": intervals,
        "data_points": "720",
        # "start": ut,
        # "end": datetime.timestamp(now),
    }

    r = requests.get("https://api.lunarcrush.com/v2?data=assets", params=payload)

    data = pd.DataFrame.from_dict(r.json()["data"][0])
    ts = data.timeSeries.to_dict()
    new = pd.DataFrame.from_dict(ts, orient="index")
    new.pop("asset_id")
    new["time"] = pd.to_datetime(new["time"], unit="s")
    new.sort_values(by="time", inplace=True)
    time_index = new.pop("time")

    return time_index, new


index, df = compile_lc_data()
df.head()


Unnamed: 0,open,close,high,low,volume,market_cap,url_shares,unique_url_shares,reddit_posts,reddit_posts_score,...,medium,youtube,social_contributors,social_volume,price_btc,social_volume_global,social_dominance,market_cap_global,market_dominance,percent_change_24h
0,3077.143867,3084.552444,3085.51074,3056.500672,1242564000.0,362168319659,701,340,38.0,410.0,...,1.0,1.0,2367,2608,0.070607,49898,5.226662,2263989254025,15.99691,11.491972
1,3081.35725,3047.252229,3096.831669,3045.709234,1766392000.0,359161074399,787,466,25.0,369.0,...,1.0,1.0,2327,2918,0.07043,37398,7.802556,2239342341542,16.038685,9.670836
2,3047.928587,3074.104613,3079.927281,3042.025838,1308292000.0,361385465846,657,309,42.0,54.0,...,,,2315,2606,0.070303,38703,6.733328,2263895860023,15.96299,7.055718
3,3074.999395,3080.821023,3089.692187,3059.841029,1076057000.0,362236309044,954,292,64.0,500.0,...,,,2425,2532,0.070303,40666,6.226332,2268473359925,15.968286,7.555985
4,3080.531461,3122.525901,3123.213418,3075.871316,1764341000.0,366711441897,821,240,35.0,903.0,...,,,2369,2294,0.070703,33638,6.819668,2291805284595,16.000986,8.71209


In [15]:
targets = df[["open", "close"]]
inputs = df.drop(["high", "low"], axis=1)

targets = targets.values.astype("float32")
inputs = inputs.values.astype("float32")
SPLIT_RATIO = 0.8
WINDOW_SIZE = 60
BATCH_SIZE = 32


In [16]:
split_index = int(SPLIT_RATIO * len(df))

trainx = inputs[:split_index]
trainy = targets[:split_index]

testx = inputs[split_index:]
testy = targets[split_index:]


In [17]:
train_ds = tf.keras.preprocessing.timeseries_dataset_from_array(
    trainx, trainy, sequence_length=168
)

test_ds = tf.keras.preprocessing.timeseries_dataset_from_array(
    testx, testy, sequence_length=168
)


In [21]:
baseline = tf.keras.models.Sequential([tf.keras.layers.Dense(1)])

baseline.compile(
    loss='mse',
    optimizer=tf.keras.optimizers.Adam(),
    metrics=[tf.keras.metrics.MeanAbsoluteError()],
)

baseline.fit(train_ds, epochs=20)


Epoch 1/20


InvalidArgumentError:  required broadcastable shapes
	 [[node mean_squared_error/SquaredDifference (defined at C:\Users\josephdavis\AppData\Local\Temp/ipykernel_44744/3674018120.py:9) ]] [Op:__inference_train_function_20061]

Function call stack:
train_function


In [19]:
CONV_WIDTH = 3
multi_val_performance = {}
multi_performance = {}

dev_model = tf.keras.models.Sequential(
    [
        # tf.keras.layers.Lambda(lambda x: x[:, -CONV_WIDTH:, :]),
        tf.keras.layers.Conv1D(
            filters=64,
            kernel_size=(CONV_WIDTH),
            strides=1,
            padding="causal",
            # activation="relu",
        ),
        tf.keras.layers.Bidirectional(
            tf.keras.layers.LSTM(128, return_sequences=False)
        ),
        # tf.keras.layers.Bidirectional(
        #     tf.keras.layers.LSTM(64, return_sequences=True)),
        tf.keras.layers.Dense(128),
        # tf.keras.layers.Dropout(.2),
        tf.keras.layers.Dense(1),
    ]
)

dev_model.compile(
    loss="mse", optimizer="adam", metrics=[tf.metrics.MeanAbsoluteError()]
)
dev_model.fit(train_ds, epochs=20)


Epoch 1/20


UnknownError:  Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above.
	 [[node sequential_2/conv1d_2/conv1d (defined at C:\Users\josephdavis\AppData\Local\Temp/ipykernel_44744/1080473186.py:29) ]] [Op:__inference_train_function_19256]

Function call stack:
train_function
