In [176]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf
import numpy as np
import os
import datetime

In [177]:
bt = pd.read_csv("Bitcoin Historical Data (2014-2024).csv")
bt

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %
0,03/24/2024,67211.9,64036.5,67587.8,63812.9,65.59K,4.96%
1,03/23/2024,64037.8,63785.6,65972.4,63074.9,35.11K,0.40%
2,03/22/2024,63785.5,65501.5,66633.3,62328.3,72.43K,-2.62%
3,03/21/2024,65503.8,67860.0,68161.7,64616.1,75.26K,-3.46%
4,03/20/2024,67854.0,62046.8,68029.5,60850.9,133.53K,9.35%
...,...,...,...,...,...,...,...
3649,03/28/2014,482.6,460.5,515.0,453.8,4.11K,4.81%
3650,03/27/2014,460.5,562.5,567.8,460.5,3.78K,-18.13%
3651,03/26/2014,562.5,562.9,575.4,546.3,3.71K,-0.08%
3652,03/25/2014,562.9,567.6,569.7,550.4,3.87K,-0.82%


In [178]:
bt.dtypes

Date        object
Price       object
Open        object
High        object
Low         object
Vol.        object
Change %    object
dtype: object

In [179]:
#reformatting to drop k and m, change to integers

#function loop  to drop and multiply
def km_to_number(value):
    if isinstance(value, str):
        if 'K' in value:
            return float(value.replace('K', '')) * 1000
        elif 'M' in value:
            return float(value.replace('M', '')) * 1000000
        elif 'B' in value:
            return float(value.replace('B', '')) * 1000000000
    else:
        return value

bt["Vol."] = bt["Vol."].apply(km_to_number)

bt

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %
0,03/24/2024,67211.9,64036.5,67587.8,63812.9,65590.0,4.96%
1,03/23/2024,64037.8,63785.6,65972.4,63074.9,35110.0,0.40%
2,03/22/2024,63785.5,65501.5,66633.3,62328.3,72430.0,-2.62%
3,03/21/2024,65503.8,67860.0,68161.7,64616.1,75260.0,-3.46%
4,03/20/2024,67854.0,62046.8,68029.5,60850.9,133530.0,9.35%
...,...,...,...,...,...,...,...
3649,03/28/2014,482.6,460.5,515.0,453.8,4110.0,4.81%
3650,03/27/2014,460.5,562.5,567.8,460.5,3780.0,-18.13%
3651,03/26/2014,562.5,562.9,575.4,546.3,3710.0,-0.08%
3652,03/25/2014,562.9,567.6,569.7,550.4,3870.0,-0.82%


In [180]:
#convert to date to datetime, price/open/high/low to float, change% to float
bt["Date"] = pd.to_datetime(bt["Date"])
bt[["Price", "Open", "High", "Low"]] = bt[["Price", "Open", "High", "Low"]].applymap(lambda x: float(x.replace(',', '')))
bt['Change %'] = bt['Change %'].str.rstrip('%').astype(float)
bt

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %
0,2024-03-24,67211.9,64036.5,67587.8,63812.9,65590.0,4.96
1,2024-03-23,64037.8,63785.6,65972.4,63074.9,35110.0,0.40
2,2024-03-22,63785.5,65501.5,66633.3,62328.3,72430.0,-2.62
3,2024-03-21,65503.8,67860.0,68161.7,64616.1,75260.0,-3.46
4,2024-03-20,67854.0,62046.8,68029.5,60850.9,133530.0,9.35
...,...,...,...,...,...,...,...
3649,2014-03-28,482.6,460.5,515.0,453.8,4110.0,4.81
3650,2014-03-27,460.5,562.5,567.8,460.5,3780.0,-18.13
3651,2014-03-26,562.5,562.9,575.4,546.3,3710.0,-0.08
3652,2014-03-25,562.9,567.6,569.7,550.4,3870.0,-0.82


In [181]:
bt.dtypes

Date        datetime64[ns]
Price              float64
Open               float64
High               float64
Low                float64
Vol.               float64
Change %           float64
dtype: object

In [182]:
# bt['Date'] = bt['Date'].astype('int64')
# bt['Date'] = bt['Date'].astype(float)

In [183]:
#adding a classification for softmax activation function
# conditions = [
#     (bt['Change %'] > 0),
#     (bt['Change %'] < 0),
#     (bt['Change %'] == 0)
# ]
# values = [1, 0, 2]

# bt['Inertia'] = np.where(conditions[0], values[0],
#                 np.where(conditions[1], values[1], values[2]))
bt

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %
0,2024-03-24,67211.9,64036.5,67587.8,63812.9,65590.0,4.96
1,2024-03-23,64037.8,63785.6,65972.4,63074.9,35110.0,0.40
2,2024-03-22,63785.5,65501.5,66633.3,62328.3,72430.0,-2.62
3,2024-03-21,65503.8,67860.0,68161.7,64616.1,75260.0,-3.46
4,2024-03-20,67854.0,62046.8,68029.5,60850.9,133530.0,9.35
...,...,...,...,...,...,...,...
3649,2014-03-28,482.6,460.5,515.0,453.8,4110.0,4.81
3650,2014-03-27,460.5,562.5,567.8,460.5,3780.0,-18.13
3651,2014-03-26,562.5,562.9,575.4,546.3,3710.0,-0.08
3652,2014-03-25,562.9,567.6,569.7,550.4,3870.0,-0.82


In [184]:
bit = bt.copy()
# bit.drop(columns=["Date"])
# bit["Date"] = bit.index

In [186]:
#bit["Date"] = bit["Date"].astype("string")
bit['Date'] = pd.to_datetime(bit['Date'])

bit['Date'] = bit['Date'].astype('int64')
bit['Date'] = bit['Date'].astype(float)
bit

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %
0,1.711238e+18,67211.9,64036.5,67587.8,63812.9,65590.0,4.96
1,1.711152e+18,64037.8,63785.6,65972.4,63074.9,35110.0,0.40
2,1.711066e+18,63785.5,65501.5,66633.3,62328.3,72430.0,-2.62
3,1.710979e+18,65503.8,67860.0,68161.7,64616.1,75260.0,-3.46
4,1.710893e+18,67854.0,62046.8,68029.5,60850.9,133530.0,9.35
...,...,...,...,...,...,...,...
3649,1.395965e+18,482.6,460.5,515.0,453.8,4110.0,4.81
3650,1.395878e+18,460.5,562.5,567.8,460.5,3780.0,-18.13
3651,1.395792e+18,562.5,562.9,575.4,546.3,3710.0,-0.08
3652,1.395706e+18,562.9,567.6,569.7,550.4,3870.0,-0.82


In [144]:
# for i in k
#     if i[price] - i-1[price] >0
#         i[inertia] = 1
#     elseif i[price] - i-1[price] <0
#         i[inertia] = 0
#     elseif i[price] - i-1[price] == 0
#         i[inertia] = 2

#either do it this way, or calc amount change from one day to the next
#or predict price using date
    #linear activation function in output layer

#using price circumvents the key issue of using inertia -- accounting for volatility

In [145]:
#JUSTIN PSEUDOCODE
    #predicting price for future day given previouys day
    #could modify window to predict next x days
    #essentially implementing single step model
    
# BASIC IMPLEMENTATION - PREDICT 100th day using first 99:

# X = df['Price'][:-1] # all values except last day
# y = df['Price'][1:] # All values except 1st day

# xscaled = scaler.fit_transform(X)

# model = tf.keras.Sequential([
#     tf.keras.layers.Dense(1, input_shape=(1,))
# ])
# model.compile(optimizer='adam', loss='mean_squared_error')
# model.fit(X_scaled[:-(1)], y[:-(1)])  # fit model on first 99 days

# last_day_price = model.predict(X_scaled[-1]) #Predict 100th day using first 99

#output layer has no activation function -- unspecified means it defaults to linear
    #implementing linear regression using a neural networks
        #previously we were trying to do classification
    

In [168]:

# # y = bit["Inertia"].values
# # X = bit.drop(columns="Inertia").values

# y = bit["Price"].values  
# X = bit[["Date"]].values

# #try to predict price based on date
# #one series time forecast
# # https://www.tensorflow.org/tutorials/structured_data/time_series#single_step_models
#     #go back to datetime to use this
#     #


# X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

# scaler = StandardScaler()

# X_scaler = scaler.fit(X_train)

# X_train_scaled = X_scaler.transform(X_train)
# X_test_scaled = X_scaler.transform(X_test)

ValueError: could not convert string to float: '2022-12-23'

In [147]:
#X.reshape(-1, 1)
X

array([['2024-03-24T00:00:00.000000000'],
       ['2024-03-23T00:00:00.000000000'],
       ['2024-03-22T00:00:00.000000000'],
       ...,
       ['2014-03-26T00:00:00.000000000'],
       ['2014-03-25T00:00:00.000000000'],
       ['2014-03-24T00:00:00.000000000']], dtype='datetime64[ns]')

In [187]:
#making all these functions (this cell + next few) make code extremely reusable
#always remember we're doing a forecast

column_indices = {name: i for i, name in enumerate(bit.columns)}

n = len(bit)
train_df = bit[0:int(n*0.7)]
val_df = bit[int(n*0.7):int(n*0.9)]
test_df = bit[int(n*0.9):]

num_features = bit.shape[1]


In [193]:
class WindowGenerator():
  def __init__(self, input_width, label_width, shift,
               train_df=train_df, val_df=val_df, test_df=test_df,
               label_columns=None):
    # Store the raw data.
    self.train_df = train_df
    self.val_df = val_df
    self.test_df = test_df

    # Work out the label column indices.
    self.label_columns = label_columns
    if label_columns is not None:
      self.label_columns_indices = {name: i for i, name in
                                    enumerate(label_columns)}
    self.column_indices = {name: i for i, name in
                           enumerate(train_df.columns)}

    # Work out the window parameters.
    self.input_width = input_width
    self.label_width = label_width
    self.shift = shift

    self.total_window_size = input_width + shift

    self.input_slice = slice(0, input_width)
    self.input_indices = np.arange(self.total_window_size)[self.input_slice]

    self.label_start = self.total_window_size - self.label_width
    self.labels_slice = slice(self.label_start, None)
    self.label_indices = np.arange(self.total_window_size)[self.labels_slice]


In [194]:
def split_window(self, features):
  inputs = features[:, self.input_slice, :]
  labels = features[:, self.labels_slice, :]
  if self.label_columns is not None:
    labels = tf.stack(
        [labels[:, :, self.column_indices[name]] for name in self.label_columns],
        axis=-1)

  # Slicing doesn't preserve static shape information, so set the shapes
  # manually. This way the `tf.data.Datasets` are easier to inspect.
  inputs.set_shape([None, self.input_width, None])
  labels.set_shape([None, self.label_width, None])

  return inputs, labels

WindowGenerator.split_window = split_window

In [195]:
def make_dataset(self, data):
  data = np.array(data, dtype=np.float32)
  ds = tf.keras.utils.timeseries_dataset_from_array(
      data=data,
      targets=None,
      sequence_length=self.total_window_size,
      sequence_stride=1,
      shuffle=True,
      batch_size=32,)

  ds = ds.map(self.split_window)

  return ds

WindowGenerator.make_dataset = make_dataset

In [196]:
@property
def train(self):
  return self.make_dataset(self.train_df)

@property
def val(self):
  return self.make_dataset(self.val_df)

@property
def test(self):
  return self.make_dataset(self.test_df)

@property
def example(self):
  """Get and cache an example batch of `inputs, labels` for plotting."""
  result = getattr(self, '_example', None)
  if result is None:
    # No example batch was found, so get one from the `.train` dataset
    result = next(iter(self.train))
    # And cache it for next time
    self._example = result
  return result

WindowGenerator.train = train
WindowGenerator.val = val
WindowGenerator.test = test
WindowGenerator.example = example

In [155]:
#bit["Date"] = bit["Date"].astype(str)
#bit["Date"] = bit["Date"].strftime('%Y-%m-%d')

# bit['Date'] = pd.to_datetime(bit['Date'])

# bit['Date'] = bit['Date'].astype('int64')

# bit['Date'] = bit['Date'].astype(float)
# bit.head()
#bit["Date"] = bit["Date"].dt.strftime('%Y-%m-%d')
#bit["Date"] = bit["Date"].astype("string")

In [191]:
bit.dtypes

Date        float64
Price       float64
Open        float64
High        float64
Low         float64
Vol.        float64
Change %    float64
dtype: object

In [198]:
w2 = WindowGenerator(input_width=6, label_width=1, shift=1,
                     label_columns=['Price'])



w2.train.element_spec

(TensorSpec(shape=(None, 6, 7), dtype=tf.float32, name=None),
 TensorSpec(shape=(None, 1, 1), dtype=tf.float32, name=None))

In [199]:
single_step_window = WindowGenerator(
    input_width=1, label_width=1, shift=1,
    label_columns=['Price'])
single_step_window

<__main__.WindowGenerator at 0x241cb929c30>

In [200]:
class Baseline(tf.keras.Model):
  def __init__(self, label_index=None):
    super().__init__()
    self.label_index = label_index

  def call(self, inputs):
    if self.label_index is None:
      return inputs
    result = inputs[:, :, self.label_index]
    return result[:, :, tf.newaxis]


In [201]:
#Instantiate and evaluate this model:
baseline = Baseline(label_index=column_indices['Price'])

baseline.compile(loss=tf.keras.losses.MeanSquaredError(),
                 metrics=[tf.keras.metrics.MeanAbsoluteError()])

val_performance = {}
performance = {}
val_performance['Baseline'] = baseline.evaluate(single_step_window.val, return_dict=True)
performance['Baseline'] = baseline.evaluate(single_step_window.test, verbose=0, return_dict=True)


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 324.2934 - mean_absolute_error: 9.2261 


In [70]:
# # Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
# number_input_features = len(X_train[0])
# hidden_nodes_layer1 =  80
# hidden_nodes_layer2 = 30
# #num_classes = 3

# nn = tf.keras.models.Sequential()

# # First hidden layer
# nn.add(
#     tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
# )

# # Second hidden layer
# nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))

# # Output layer
# #nn.add(tf.keras.layers.Dense(units=num_classes, activation="softmax"))



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [71]:
# #Compile
# nn.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

# #Fit
# nn.fit(X_train_scaled, y_train, epochs=50, verbose=1)

Epoch 1/50


InvalidArgumentError: Graph execution error:

Detected at node compile_loss/sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits defined at (most recent call last):
  File "c:\Users\lucas\anaconda3\envs\dev\lib\runpy.py", line 196, in _run_module_as_main

  File "c:\Users\lucas\anaconda3\envs\dev\lib\runpy.py", line 86, in _run_code

  File "c:\Users\lucas\anaconda3\envs\dev\lib\site-packages\ipykernel_launcher.py", line 17, in <module>

  File "c:\Users\lucas\anaconda3\envs\dev\lib\site-packages\traitlets\config\application.py", line 992, in launch_instance

  File "c:\Users\lucas\anaconda3\envs\dev\lib\site-packages\ipykernel\kernelapp.py", line 736, in start

  File "c:\Users\lucas\anaconda3\envs\dev\lib\site-packages\tornado\platform\asyncio.py", line 195, in start

  File "c:\Users\lucas\anaconda3\envs\dev\lib\asyncio\base_events.py", line 603, in run_forever

  File "c:\Users\lucas\anaconda3\envs\dev\lib\asyncio\base_events.py", line 1909, in _run_once

  File "c:\Users\lucas\anaconda3\envs\dev\lib\asyncio\events.py", line 80, in _run

  File "c:\Users\lucas\anaconda3\envs\dev\lib\site-packages\ipykernel\kernelbase.py", line 516, in dispatch_queue

  File "c:\Users\lucas\anaconda3\envs\dev\lib\site-packages\ipykernel\kernelbase.py", line 505, in process_one

  File "c:\Users\lucas\anaconda3\envs\dev\lib\site-packages\ipykernel\kernelbase.py", line 412, in dispatch_shell

  File "c:\Users\lucas\anaconda3\envs\dev\lib\site-packages\ipykernel\kernelbase.py", line 740, in execute_request

  File "c:\Users\lucas\anaconda3\envs\dev\lib\site-packages\ipykernel\ipkernel.py", line 422, in do_execute

  File "c:\Users\lucas\anaconda3\envs\dev\lib\site-packages\ipykernel\zmqshell.py", line 546, in run_cell

  File "c:\Users\lucas\anaconda3\envs\dev\lib\site-packages\IPython\core\interactiveshell.py", line 3024, in run_cell

  File "c:\Users\lucas\anaconda3\envs\dev\lib\site-packages\IPython\core\interactiveshell.py", line 3079, in _run_cell

  File "c:\Users\lucas\anaconda3\envs\dev\lib\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner

  File "c:\Users\lucas\anaconda3\envs\dev\lib\site-packages\IPython\core\interactiveshell.py", line 3284, in run_cell_async

  File "c:\Users\lucas\anaconda3\envs\dev\lib\site-packages\IPython\core\interactiveshell.py", line 3466, in run_ast_nodes

  File "c:\Users\lucas\anaconda3\envs\dev\lib\site-packages\IPython\core\interactiveshell.py", line 3526, in run_code

  File "C:\Users\lucas\AppData\Local\Temp\ipykernel_25460\494426354.py", line 5, in <module>

  File "c:\Users\lucas\anaconda3\envs\dev\lib\site-packages\keras\src\utils\traceback_utils.py", line 117, in error_handler

  File "c:\Users\lucas\anaconda3\envs\dev\lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 325, in fit

  File "c:\Users\lucas\anaconda3\envs\dev\lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 118, in one_step_on_iterator

  File "c:\Users\lucas\anaconda3\envs\dev\lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 106, in one_step_on_data

  File "c:\Users\lucas\anaconda3\envs\dev\lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 60, in train_step

  File "c:\Users\lucas\anaconda3\envs\dev\lib\site-packages\keras\src\trainers\trainer.py", line 322, in compute_loss

  File "c:\Users\lucas\anaconda3\envs\dev\lib\site-packages\keras\src\trainers\compile_utils.py", line 605, in __call__

  File "c:\Users\lucas\anaconda3\envs\dev\lib\site-packages\keras\src\trainers\compile_utils.py", line 641, in call

  File "c:\Users\lucas\anaconda3\envs\dev\lib\site-packages\keras\src\losses\loss.py", line 42, in __call__

  File "c:\Users\lucas\anaconda3\envs\dev\lib\site-packages\keras\src\losses\losses.py", line 22, in call

  File "c:\Users\lucas\anaconda3\envs\dev\lib\site-packages\keras\src\losses\losses.py", line 1714, in sparse_categorical_crossentropy

  File "c:\Users\lucas\anaconda3\envs\dev\lib\site-packages\keras\src\ops\nn.py", line 1554, in sparse_categorical_crossentropy

  File "c:\Users\lucas\anaconda3\envs\dev\lib\site-packages\keras\src\backend\tensorflow\nn.py", line 633, in sparse_categorical_crossentropy

Received a label value of 62467 which is outside the valid range of [0, 3).  Label values: 30382 23474 16613 325 2836 44420 35595 20594 15180 5320 11468 273 620 3815 60582 36544 9729 9580 667 4161 377 27298 17127 62467 1008 381 31007 40715 60866 19134 7504 57996
	 [[{{node compile_loss/sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits}}]] [Op:__inference_one_step_on_iterator_14764]

In [16]:
# model_loss, model_accuracy = nn.evaluate(X_test_scaled, y_test, verbose=2)
# print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

29/29 - 0s - 11ms/step - accuracy: 0.9891 - loss: 0.0278
Loss: 0.02776368334889412, Accuracy: 0.9890590906143188


In [20]:
# nn.predict(bit)

ValueError: Exception encountered when calling Sequential.call().

[1mInput 0 of layer "dense" is incompatible with the layer: expected axis -1 of input shape to have value 7, but received input with shape (32, 8)[0m

Arguments received by Sequential.call():
  • inputs=tf.Tensor(shape=(32, 8), dtype=float32)
  • training=False
  • mask=None