In [11]:
from datetime import datetime, timedelta
import pandas as pd
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
%matplotlib inline
%config IPCompleter.greedy = True
%config InlineBackend.figure_format = 'retina'
import numpy as np
import matplotlib.pyplot as plt
import cbpro
import os
from pathlib import Path
import seaborn as sns
import json
import urllib
import requests

# os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true"
os.environ["TF_GPU_THREAD_MODE"] = "gpu_private"
# os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

import tensorflow as tf
import tensorflow_probability as tfp

tf.distribute.OneDeviceStrategy(device="/gpu:0")
policy = tf.keras.mixed_precision.Policy("mixed_float16")
tf.keras.mixed_precision.experimental.set_policy(policy)

public_client = cbpro.PublicClient()


In [12]:
class MinerMeta(type):
    # def __init__(self):
    #     result = getattr(self, "df", None)
    #     if result is None:
    #         self.df = self.compile_historic(read_csv=True)

    def compile_historic(self, num_days=100, write_csv=False, read_csv=False):
        file = Path.cwd() / f"{self.coin}_histdata.csv"
        if read_csv is True:
            df = pd.read_csv(file, index_col="time", infer_datetime_format=True)
            return df
        else:
            finish = datetime.now()
            start = finish - timedelta(num_days)
            delta = timedelta(hours=300)
            df = pd.DataFrame()

            while finish > start:
                historic = public_client.get_product_historic_rates(
                    f"{self.coin}-USD",
                    granularity=3600,
                    start=start,
                    end=start + delta,
                )
                start += delta
                df = df.append(historic, ignore_index=True, verify_integrity=True)
            df.columns = ["time", "low", "high", "open", "close", "volume"]

            # timestamp_s = date_time.map(pd.Timestamp.timestamp)
            # day = 24 * 60 * 60
            # year = (365.2425) * day
            # df["Day sin"] = np.sin(timestamp_s * (2 * np.pi / day))
            # df["Day cos"] = np.cos(timestamp_s * (2 * np.pi / day))
            # df["Year sin"] = np.sin(timestamp_s * (2 * np.pi / year))
            # df["Year cos"] = np.cos(timestamp_s * (2 * np.pi / year))

            df["time"] = pd.to_datetime(df["time"], unit="s")
            df.set_index("time", inplace=True, verify_integrity=False)
            df.sort_index(ascending=False, inplace=True)

            if write_csv is True:
                df.to_csv(file, index=True)

            return df

    def get_day_stats(self):
        result = getattr(self, "day_stats", None)
        if result is None:
            ticker = public_client.get_product_24hr_stats(f"{self.coin}-USD")
            df = pd.DataFrame.from_dict(ticker, orient="index")
            self.day_stats = df
        return df

    def year_day_fft(self, col):
        df = self.compile_historic()
        today = datetime.today()
        ylim = int(df[col].max())
        plt.figure(figsize=(15, 10))

        fft = tf.signal.rfft(df[col])
        f_per_dataset = np.arange(0, len(fft))

        n_samples_h = 1
        hours_per_year = 24 * 365.2524
        hours_per_week = 24 * 7
        years_per_dataset = n_samples_h / (hours_per_year)
        hours_per_dataset = n_samples_h / (hours_per_week)

        f_per_year = f_per_dataset / years_per_dataset
        f_per_week = f_per_dataset / hours_per_dataset
        plt.step(f_per_week, np.abs(fft))
        plt.xscale("log")
        # plt.ylim(1000, ylim)
        # plt.xlim([0.1, max(plt.xlim())])
        plt.xticks([1, 7], labels=["1/Week", "1/day"])
        plt.xlabel("Frequency (log scale)")

        return plt.show()

    def ttsplit_norm(self, df, split_time=0.7, feature_plot=False):
        # train_df Test Split
        n = len(df)
        train_df = df[0 : int(n * 0.7)]
        val_df = df[int(n * 0.7) : int(n * 0.9)]
        test_df = df[int(n * 0.9) :]
        # Normalize the Data
        train_df_mean = train_df.mean()
        train_df_std = train_df.std()

        train_df = (train_df - train_df_mean) / train_df_std
        val_df = (val_df - train_df_mean) / train_df_std
        test_df = (test_df - train_df_mean) / train_df_std

        # Create Feature Plot if wanted
        if feature_plot is True:
            df_std = (df - train_df_mean) / train_df_std
            df_std = df_std.melt(var_name="Column", value_name="Normalized")
            plt.figure(figsize=(12, 6))
            ax = sns.violinplot(x="Column", y="Normalized", data=df_std)
            ax.set_xticklabels(df.keys(), rotation=90)
            ax.set_title("train_dfing Data Feature Dist with whole DF Mean")

            return train_df, val_df, test_df

        return train_df, val_df, test_df

    def __call__(self, *args, **kwargs):

        cls = type.__call__(self, *args)

        # setattr(cls, "compile_historic", self.compile_historic)
        # setattr(cls, "year_day_fft", self.year_day_fft)
        # setattr(cls, "ttsplit_norm", self.ttsplit_norm)
        # setattr(cls, "get_day_stats", self.get_day_stats)
        # setattr(cls, "day_stats", self.get_day_stats())

        # for key, value in historic.items():
        #     setattr(cls, "hist_" + key, value)
        # for key, value in ticker.items():
        #     setattr(cls, "tick_" + key, value)

        return cls


In [13]:
# # api_key = r"ru1zaf0ssaa29394mb4ahp"
# lc_file = Path.cwd() / "lc_data.csv"
# coins = "ETH"


# def lc_data(read_csv=False):
#     file = Path.cwd() / "lunardata.csv"
#     if read_csv is True:
#         data = pd.read_csv(file, index_col="id")
#         return data
#     else:
#         assets_url = (
#             b"https://api.lunarcrush.com/v2?data=assets&key="
#             + api_key
#             + "&symbol="
#             + coins
#             + +"&data_points=720"
#         )
#         assets = json.loads(urllib.request.urlopen(assets_url).read())
#         # config = pd.DataFrame.from_dict(assets["config"], orient="index")
#         # usage = pd.DataFrame.from_dict(assets["usage"], orient="index")
#         data = pd.DataFrame.from_dict(assets["data"])
#         return data


# lc = lc_data()

# lc.tail()


In [14]:
from pandasgui import show
api_key = r"ru1zaf0ssaa29394mb4ahp"
lc_file = Path.cwd() / "lc_data.csv"
coins = "ETH"
intervals = ["1d", "1w", "1m", "3m", "6m", "1y", "2y"]
now = datetime.now()
delta = timedelta(60)
start = now - delta

payload = {
    "key": api_key,
    "symbol": coins,
    "change": intervals[:2],
    "data_points": "720",
    "start": datetime.timestamp(start),
    # "end": datetime.timestamp(start + timedelta(30)),
}

r = requests.get("https://api.lunarcrush.com/v2?data=assets", params=payload)

data = pd.DataFrame.from_dict(r.json()["data"][0])
ts = data.timeSeries.to_dict()
new = pd.DataFrame.from_dict(ts, orient="index")
new.pop("asset_id")
new["time"] = pd.to_datetime(new["time"], unit="s")
new.set_index("time", inplace=True)
new.sort_index(ascending=False)
print("Min", new.index.min())
print("Max", new.index.max())
print(new.index.max() - new.index.min())
show(new)

In [15]:
import requests


def compile_lc_data(num_days=100):
    intervals = ["1d", "1w", "1m", "3m", "6m", "1y", "2y"]
    now = datetime.now()
    delta = timedelta(num_days)
    ut = datetime.timestamp((now - delta))

    payload = {
        "key": api_key,
        "symbol": coins,
        "change": intervals,
        "data_points": "720",
        # "start": ut,
        # "end": datetime.timestamp(now),
    }

    r = requests.get("https://api.lunarcrush.com/v2?data=assets", params=payload)

    data = pd.DataFrame.from_dict(r.json()["data"][0])
    ts = data.timeSeries.to_dict()
    new = pd.DataFrame.from_dict(ts, orient="index")
    new.pop("asset_id")
    new["time"] = pd.to_datetime(new["time"], unit="s")
    new.set_index("time", inplace=True)
    new.sort_index(ascending=False)

    return new


df = compile_lc_data(100)
df.head()


Unnamed: 0_level_0,open,close,high,low,volume,market_cap,url_shares,unique_url_shares,reddit_posts,reddit_posts_score,...,medium,youtube,social_contributors,social_volume,price_btc,social_volume_global,social_dominance,market_cap_global,market_dominance,percent_change_24h
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-10-19 20:00:00,3818.766839,3808.128496,3823.310686,3806.714822,854524000.0,449614465954,809,485,25.0,90.0,...,,,3200.0,4498,0.059246,89927,5.001835,2771285000000.0,16.224044,1.797468
2021-10-19 21:00:00,3809.562649,3816.20545,3821.774204,3806.085254,715269900.0,450184948757,861,448,41.0,239.0,...,,,3243.0,4093,0.059363,81450,5.025169,2777479000000.0,16.208403,1.622131
2021-10-19 22:00:00,3817.012169,3881.681895,3881.681895,3817.012169,1457581000.0,457174478119,729,406,37.0,144.0,...,1.0,,2892.0,3735,0.06039,71322,5.236813,2796157000000.0,16.350101,3.361324
2021-10-19 23:00:00,3879.870574,3881.275588,3886.424027,3864.508652,588191600.0,457575692489,695,372,27.0,597.0,...,,,1948.0,3233,0.060237,64030,5.049196,2794339000000.0,16.375096,3.462496
2021-10-20 00:00:00,,3883.520742,,,,458241471728,636,360,23.0,639.0,...,,,,3325,0.060343,61341,5.420518,,,3.41


In [16]:
def plot_loss(history):
    plt.plot(history.history["loss"], label="loss")
    plt.plot(history.history["val_loss"], label="val_loss")
    plt.ylim([0, 10])
    plt.xlabel("Epoch")
    # plt.ylabel("Error [MPG]")
    plt.legend()
    plt.grid(True)


In [17]:
cols = list(df.columns)

# sns.pairplot(df[cols], diag_kind="kde")

In [21]:
def tt_split(df, train_percent=0.6, val_percent=0.2):
    m = len(df.index)
    train_end = int(train_percent * m)
    val_end = int(val_percent * m) + train_end
    train = df.iloc[:train_end]
    # val = df.iloc[train_end:val_end]
    # test = df.iloc[val_end:]
    test = df.iloc[train_end:]
    return train, test


train, test = tt_split(df)

label = train.pop("close")
test_label = test.pop("close")

numpy.ndarray

In [18]:
def make_ds(x, y):
    ds = (
        tf.data.Dataset.from_tensor_slices((x.values, y.values))
        .batch(24)
        .prefetch(tf.data.AUTOTUNE)
    )
    return ds


ds = make_ds(train, label)
# val_ds = make_ds(val, val_label)
test_ds = make_ds(test, test_label)

for features_tensor, target_tensor in ds.take(1):
    print(f"features:{features_tensor} target:{target_tensor}")


ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type float).

In [None]:
normalizer = tf.keras.layers.experimental.preprocessing.Normalization(axis=-1)
normalizer.adapt(train.values)


In [None]:
linear_model = tf.keras.Sequential([normalizer, tf.keras.layers.Dense(units=1)])
linear_model.compile(
    optimizer=tf.optimizers.Adam(learning_rate=0.1), loss="mean_absolute_error"
)

linear_model.summary()


In [None]:
%time
history = linear_model.fit(
    train.values, label, 
    epochs=100,
    validation_split = 0.2)

In [None]:
hist = pd.DataFrame(history.history)
hist["epoch"] = history.epoch
hist.tail()


In [None]:
plot_loss(history)


In [None]:
model = tf.keras.Sequential(
    [
        normalizer,
        tf.keras.layers.Dense(64, activation="relu"),
        tf.keras.layers.Dense(64, activation="relu"),
        tf.keras.layers.Dense(1),
    ]
)
model.compile(
    optimizer=tf.optimizers.Adam(learning_rate=0.1), loss="mean_absolute_error"
)


model.summary()


In [None]:
%%time
history = model.fit(train.values, label.values,epochs=100, validation_split=.2)

In [None]:
hist = pd.DataFrame(history.history)
hist["epoch"] = history.epoch
hist.tail()


In [None]:
plot_loss(history)


In [None]:
yhat2 = model.predict(test[:1])
print(yhat2)