In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from urllib.request import Request, urlopen

In [None]:
price_url = "https://etherscan.io/chart/etherprice?output=csv"
market_capitalization_url = "https://etherscan.io/chart/marketcap?output=csv"
supply_growth_url = "https://etherscan.io/chart/ethersupplygrowth?output=csv"
daily_transactions_url = "https://etherscan.io/chart/tx?output=csv"
daily_token_transfer_url = "https://etherscan.io/chart/tokenerc-20txns?output=csv"
unique_address_url = "https://etherscan.io/chart/address?output=csv"
avg_block_size_url = "https://etherscan.io/chart/blocksize?output=csv"
avg_block_time_url = "https://etherscan.io/chart/blocktime?output=csv"
avg_gas_price_url = "https://etherscan.io/chart/gasprice?output=csv"
avg_gas_limit_url = "https://etherscan.io/chart/gaslimit?output=csv"
daily_gas_used_url = "https://etherscan.io/chart/gasused?output=csv"
daily_block_reward_url = "https://etherscan.io/chart/blockreward?output=csv"
block_count_rewards_url = "https://etherscan.io/chart/blocks?output=csv"
uncle_count_rewards_url = "https://etherscan.io/chart/uncles?output=csv"
net_hashrte_url = "https://etherscan.io/chart/hashrate?output=csv"
net_difficulty_url = "https://etherscan.io/chart/difficulty?output=csv"
pending_transactions_url = "https://etherscan.io/chart/pendingtx?output=csv"
net_transaction_fee_url = "https://etherscan.io/chart/transactionfee?output=csv"
net_utilization_url = "https://etherscan.io/chart/networkutilization?output=csv"
daily_verified_contracts_url = "https://etherscan.io/chart/verified-contracts?output=csv"

In [None]:
def read_data(url):
  req = Request(url)
  req.add_header('User-Agent', 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:77.0) Gecko/20100101 Firefox/77.0')
  content = urlopen(req)

  return pd.read_csv(content)
url_list = [
              ("price", price_url),
              ("market cap.", market_capitalization_url),
              ("supply growth", supply_growth_url),
              ("daily trans.", daily_transactions_url),
              ("daily token trans.", daily_token_transfer_url),
              ("uniq. address", unique_address_url),
              ("avg blk size", avg_block_size_url),
              ("avg blk time", avg_block_time_url),
              ("avg gas price", avg_gas_price_url),
              ("avg gas limit", avg_gas_limit_url),
              ("daily gas used", daily_gas_used_url),
              ("daily blk reward", daily_block_reward_url),
              ("blk count reward", block_count_rewards_url),
              ("uncle count reward", uncle_count_rewards_url),
              ("net hashrte", net_hashrte_url),
              ("net diff.", net_difficulty_url),
              # ("pending trans.", pending_transactions_url),
              ("net trans. fee", net_transaction_fee_url),
              ("net util.", net_utilization_url),
              # ("daily verified contracts", daily_verified_contracts_url),
]


In [None]:
df_list = []
for column_name, url in url_list:
  df = read_data(url)
  df["Date(UTC)"] = pd.to_datetime(df["Date(UTC)"]).dt.strftime('%m/%d/%Y')

  if "UnixTimeStamp" in df.columns:
    df = df.drop("UnixTimeStamp", 1)
    
  if "Value" in df.columns:
    df = df.rename(columns={"Value": column_name})

  df_list.append(df)
from functools import reduce


In [None]:
df_total = reduce(lambda df1, df2: pd.merge(df1, df2, on='Date(UTC)'), df_list)
df_total = df_total.rename(columns={"Value (Wei)": "avg gas price"})
df_total

In [None]:
df_total.info()

In [None]:
df_total = pd.read_csv("ethereum_data.csv")
df_total

In [None]:
describe df_total

In [None]:
import matplotlib.pyplot as plt

df_total["Price"].plot()
plt.show()

In [None]:
df_total["price"].plot()
plt.show()

In [None]:
import tensorflow as tf

df_months = pd.to_datetime(df_total["Date(UTC)"]).dt.strftime('%m').astype("int16")
df_days = pd.to_datetime(df_total["Date(UTC)"]).dt.strftime('%d').astype("int16")
months = pd.DataFrame(tf.keras.utils.to_categorical(df_months)[:, 1:], columns=["Month "+str(i) for i in range(1, 13)]).astype("int")
days = pd.DataFrame(tf.keras.utils.to_categorical(df_days)[:, 1:], columns=["Day "+str(i) for i in range(1, 32)]).astype("int")
df_total = pd.concat([df_total, months, days], axis=1)
df_total

In [None]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np

series = df_total[[column for column in df_total.columns if column not in ["Date(UTC)"]]]
series = series.applymap(lambda value: value.replace(",", "") if type(value) is str else value)
series = series.to_numpy()
scaler = MinMaxScaler()
series = scaler.fit_transform(series)

In [None]:
split_time = 1500

time = np.array(df_total.index)
time_train = time[:split_time]
x_train = series[:split_time]
time_valid = time[split_time:]
x_valid = series[split_time:]

window_size = 90
batch_size = 512
shuffle_buffer_size = 1000

In [None]:
def windowed_dataset(series, window_size, batch_size, shuffle_buffer):
  dataset = tf.data.Dataset.from_tensor_slices(series)
  dataset = dataset.window(window_size + 1, shift=1, drop_remainder=True)
  dataset = dataset.flat_map(lambda window: window.batch(window_size + 1))
  dataset = dataset.shuffle(shuffle_buffer).map(lambda window: (window[:-1], window[-1][0]))
  dataset = dataset.batch(batch_size).prefetch(1)
  
  return dataset

In [None]:
tf.keras.backend.clear_session()

train_set = windowed_dataset(x_train, window_size, batch_size=batch_size, shuffle_buffer=shuffle_buffer_size)

In [None]:
model = tf.keras.models.Sequential([
  # tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=-1), batch_input_shape=[None, window_size, series.shape[1]]),
  # tf.keras.layers.TimeDistributed(tf.keras.layers.Conv1D(filters=32, kernel_size=3), name="Conv1d_1"),
  # tf.keras.layers.TimeDistributed(tf.keras.layers.MaxPool1D(pool_size=2), name="maxpool_1"),
  # tf.keras.layers.TimeDistributed(tf.keras.layers.Dropout(0.2), name="dropout_1"),
  # tf.keras.layers.TimeDistributed(tf.keras.layers.Conv1D(filters=32, kernel_size=3), name="Conv1d_2"),
  # tf.keras.layers.TimeDistributed(tf.keras.layers.MaxPool1D(pool_size=2), name="maxpool_2"),
  # tf.keras.layers.TimeDistributed(tf.keras.layers.Dropout(0.2), name="dropout_2"),
  # tf.keras.layers.TimeDistributed(tf.keras.layers.Flatten(), name="flatten"),
  tf.keras.layers.Conv1D(filters=128, kernel_size=3, padding="causal", activation="relu", input_shape=[None, series.shape[1]]),
  # tf.keras.layers.MaxPool1D(pool_size=2),
  tf.keras.layers.Conv1D(filters=64, kernel_size=3, padding="causal", activation="relu"),
  # tf.keras.layers.MaxPool1D(pool_size=2),
  # tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
  tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128, return_sequences=True)),
  tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128, return_sequences=True)),
  # tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32, return_sequences=True)),
  tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128)),
  tf.keras.layers.Dense(128, activation="relu"),
  tf.keras.layers.Dense(1),
  # tf.keras.layers.Lambda(lambda x: x * 100.0)
])

lr = 0.1
lr_schedule = tf.keras.callbacks.LearningRateScheduler(lambda epoch: lr * np.power(0.6, np.floor((1 + epoch)/20)))
early_stopping = tf.keras.callbacks.EarlyStopping(monitor="mae", patience=5, restore_best_weights=True)
optimizer = tf.keras.optimizers.Adam(lr=lr)
# optimizer = tf.keras.optimizers.SGD(lr=lr, momentum=0.9)
model.compile(loss="mse",
              optimizer=optimizer,
              metrics=["mae"])

model.summary()

history = model.fit(train_set, epochs=200, callbacks=[
                                                      learning_rate_schedule, 
                                                      # early_stopping,
                                                      ])

In [None]:
plt.plot(np.arange(200), 0.1*np.power(0.6, np.floor((1 + np.arange(200))/20)))

In [None]:
plt.semilogx(history.history["lr"], history.history["loss"])
plt.show()

In [None]:
def plot_series(time, series, label, format="-", start=0, end=None):
    plt.plot(time[start:end], series[start:end], format, label=label)
    plt.xlabel("Time")
    plt.ylabel("Value")
    plt.grid(True)
    plt.legend()

In [None]:
forecast=[]
for time in range(len(series) - window_size):
  pred = model.predict(series[time:time + window_size][np.newaxis])
  # rescaled_pred = scaler2.inverse_transform(pred)
  # pred = pred / 100.
  forecast.append(pred)

forecast = forecast[split_time-window_size:]
results = np.array(forecast)[:, 0, 0]

plt.figure(figsize=(10, 6))
plot_series(time_valid, x_valid[:, 0], "real")
plot_series(time_valid, results, "predicted")

In [None]:
tf.keras.metrics.mean_absolute_error(x_valid[:, 0], results).numpy()

In [None]:
model.predict(series[-window_size:][np.newaxis])[0, 0] - model.predict(series[-window_size-1:-1][np.newaxis])[0, 0]

In [None]:
scaler2 = MinMaxScaler()
scaler2.min_ = scaler.min_[0]
scaler2.scale_ = scaler.scale_[0]
print(scaler2.inverse_transform(model.predict(series[-window_size-1:-1][np.newaxis]))[0, 0])
print(scaler2.inverse_transform(model.predict(series[-window_size:][np.newaxis]))[0, 0])